parent
1b00a3ed54
commit
31c8b527c6
64 changed files with 6425 additions and 4476 deletions
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,13 @@ |
||||
if(ANDROID OR IOS) |
||||
ocv_module_disable(gpuarithm) |
||||
endif() |
||||
|
||||
set(the_description "GPU-accelerated Operations on Matrices") |
||||
|
||||
ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations) |
||||
|
||||
ocv_define_module(gpuarithm opencv_core) |
||||
|
||||
if(HAVE_CUBLAS) |
||||
CUDA_ADD_CUBLAS_TO_TARGET(${the_module}) |
||||
endif() |
@ -0,0 +1,10 @@ |
||||
******************************************* |
||||
gpu. GPU-accelerated Operations on Matrices |
||||
******************************************* |
||||
|
||||
.. toctree:: |
||||
:maxdepth: 1 |
||||
|
||||
operations_on_matrices |
||||
per_element_operations |
||||
matrix_reductions |
@ -0,0 +1,279 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_GPUARITHM_HPP__ |
||||
#define __OPENCV_GPUARITHM_HPP__ |
||||
|
||||
#include "opencv2/core/gpumat.hpp" |
||||
|
||||
namespace cv { namespace gpu { |
||||
|
||||
//! adds one matrix to another (c = a + b)
|
||||
CV_EXPORTS void add(const GpuMat& a, const GpuMat& b, GpuMat& c, const GpuMat& mask = GpuMat(), int dtype = -1, Stream& stream = Stream::Null()); |
||||
//! adds scalar to a matrix (c = a + s)
|
||||
CV_EXPORTS void add(const GpuMat& a, const Scalar& sc, GpuMat& c, const GpuMat& mask = GpuMat(), int dtype = -1, Stream& stream = Stream::Null()); |
||||
|
||||
//! subtracts one matrix from another (c = a - b)
|
||||
CV_EXPORTS void subtract(const GpuMat& a, const GpuMat& b, GpuMat& c, const GpuMat& mask = GpuMat(), int dtype = -1, Stream& stream = Stream::Null()); |
||||
//! subtracts scalar from a matrix (c = a - s)
|
||||
CV_EXPORTS void subtract(const GpuMat& a, const Scalar& sc, GpuMat& c, const GpuMat& mask = GpuMat(), int dtype = -1, Stream& stream = Stream::Null()); |
||||
|
||||
//! computes element-wise weighted product of the two arrays (c = scale * a * b)
|
||||
CV_EXPORTS void multiply(const GpuMat& a, const GpuMat& b, GpuMat& c, double scale = 1, int dtype = -1, Stream& stream = Stream::Null()); |
||||
//! weighted multiplies matrix to a scalar (c = scale * a * s)
|
||||
CV_EXPORTS void multiply(const GpuMat& a, const Scalar& sc, GpuMat& c, double scale = 1, int dtype = -1, Stream& stream = Stream::Null()); |
||||
|
||||
//! computes element-wise weighted quotient of the two arrays (c = a / b)
|
||||
CV_EXPORTS void divide(const GpuMat& a, const GpuMat& b, GpuMat& c, double scale = 1, int dtype = -1, Stream& stream = Stream::Null()); |
||||
//! computes element-wise weighted quotient of matrix and scalar (c = a / s)
|
||||
CV_EXPORTS void divide(const GpuMat& a, const Scalar& sc, GpuMat& c, double scale = 1, int dtype = -1, Stream& stream = Stream::Null()); |
||||
//! computes element-wise weighted reciprocal of an array (dst = scale/src2)
|
||||
CV_EXPORTS void divide(double scale, const GpuMat& b, GpuMat& c, int dtype = -1, Stream& stream = Stream::Null()); |
||||
|
||||
//! computes the weighted sum of two arrays (dst = alpha*src1 + beta*src2 + gamma)
|
||||
CV_EXPORTS void addWeighted(const GpuMat& src1, double alpha, const GpuMat& src2, double beta, double gamma, GpuMat& dst, |
||||
int dtype = -1, Stream& stream = Stream::Null()); |
||||
|
||||
//! adds scaled array to another one (dst = alpha*src1 + src2)
|
||||
static inline void scaleAdd(const GpuMat& src1, double alpha, const GpuMat& src2, GpuMat& dst, Stream& stream = Stream::Null()) |
||||
{ |
||||
addWeighted(src1, alpha, src2, 1.0, 0.0, dst, -1, stream); |
||||
} |
||||
|
||||
//! computes element-wise absolute difference of two arrays (c = abs(a - b))
|
||||
CV_EXPORTS void absdiff(const GpuMat& a, const GpuMat& b, GpuMat& c, Stream& stream = Stream::Null()); |
||||
//! computes element-wise absolute difference of array and scalar (c = abs(a - s))
|
||||
CV_EXPORTS void absdiff(const GpuMat& a, const Scalar& s, GpuMat& c, Stream& stream = Stream::Null()); |
||||
|
||||
//! computes absolute value of each matrix element
|
||||
//! supports CV_16S and CV_32F depth
|
||||
CV_EXPORTS void abs(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()); |
||||
|
||||
//! computes square of each pixel in an image
|
||||
//! supports CV_8U, CV_16U, CV_16S and CV_32F depth
|
||||
CV_EXPORTS void sqr(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()); |
||||
|
||||
//! computes square root of each pixel in an image
|
||||
//! supports CV_8U, CV_16U, CV_16S and CV_32F depth
|
||||
CV_EXPORTS void sqrt(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()); |
||||
|
||||
//! computes exponent of each matrix element (b = e**a)
|
||||
//! supports CV_8U, CV_16U, CV_16S and CV_32F depth
|
||||
CV_EXPORTS void exp(const GpuMat& a, GpuMat& b, Stream& stream = Stream::Null()); |
||||
|
||||
//! computes natural logarithm of absolute value of each matrix element: b = log(abs(a))
|
||||
//! supports CV_8U, CV_16U, CV_16S and CV_32F depth
|
||||
CV_EXPORTS void log(const GpuMat& a, GpuMat& b, Stream& stream = Stream::Null()); |
||||
|
||||
//! computes power of each matrix element:
|
||||
// (dst(i,j) = pow( src(i,j) , power), if src.type() is integer
|
||||
// (dst(i,j) = pow(fabs(src(i,j)), power), otherwise
|
||||
//! supports all, except depth == CV_64F
|
||||
CV_EXPORTS void pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream = Stream::Null()); |
||||
|
||||
//! compares elements of two arrays (c = a <cmpop> b)
|
||||
CV_EXPORTS void compare(const GpuMat& a, const GpuMat& b, GpuMat& c, int cmpop, Stream& stream = Stream::Null()); |
||||
CV_EXPORTS void compare(const GpuMat& a, Scalar sc, GpuMat& c, int cmpop, Stream& stream = Stream::Null()); |
||||
|
||||
//! performs per-elements bit-wise inversion
|
||||
CV_EXPORTS void bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null()); |
||||
|
||||
//! calculates per-element bit-wise disjunction of two arrays
|
||||
CV_EXPORTS void bitwise_or(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null()); |
||||
//! calculates per-element bit-wise disjunction of array and scalar
|
||||
//! supports 1, 3 and 4 channels images with CV_8U, CV_16U or CV_32S depth
|
||||
CV_EXPORTS void bitwise_or(const GpuMat& src1, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null()); |
||||
|
||||
//! calculates per-element bit-wise conjunction of two arrays
|
||||
CV_EXPORTS void bitwise_and(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null()); |
||||
//! calculates per-element bit-wise conjunction of array and scalar
|
||||
//! supports 1, 3 and 4 channels images with CV_8U, CV_16U or CV_32S depth
|
||||
CV_EXPORTS void bitwise_and(const GpuMat& src1, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null()); |
||||
|
||||
//! calculates per-element bit-wise "exclusive or" operation
|
||||
CV_EXPORTS void bitwise_xor(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null()); |
||||
//! calculates per-element bit-wise "exclusive or" of array and scalar
|
||||
//! supports 1, 3 and 4 channels images with CV_8U, CV_16U or CV_32S depth
|
||||
CV_EXPORTS void bitwise_xor(const GpuMat& src1, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null()); |
||||
|
||||
//! pixel by pixel right shift of an image by a constant value
|
||||
//! supports 1, 3 and 4 channels images with integers elements
|
||||
CV_EXPORTS void rshift(const GpuMat& src, Scalar_<int> sc, GpuMat& dst, Stream& stream = Stream::Null()); |
||||
|
||||
//! pixel by pixel left shift of an image by a constant value
|
||||
//! supports 1, 3 and 4 channels images with CV_8U, CV_16U or CV_32S depth
|
||||
CV_EXPORTS void lshift(const GpuMat& src, Scalar_<int> sc, GpuMat& dst, Stream& stream = Stream::Null()); |
||||
|
||||
//! computes per-element minimum of two arrays (dst = min(src1, src2))
|
||||
CV_EXPORTS void min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream = Stream::Null()); |
||||
|
||||
//! computes per-element minimum of array and scalar (dst = min(src1, src2))
|
||||
CV_EXPORTS void min(const GpuMat& src1, double src2, GpuMat& dst, Stream& stream = Stream::Null()); |
||||
|
||||
//! computes per-element maximum of two arrays (dst = max(src1, src2))
|
||||
CV_EXPORTS void max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream = Stream::Null()); |
||||
|
||||
//! computes per-element maximum of array and scalar (dst = max(src1, src2))
|
||||
CV_EXPORTS void max(const GpuMat& src1, double src2, GpuMat& dst, Stream& stream = Stream::Null()); |
||||
|
||||
//! implements generalized matrix product algorithm GEMM from BLAS
|
||||
CV_EXPORTS void gemm(const GpuMat& src1, const GpuMat& src2, double alpha, |
||||
const GpuMat& src3, double beta, GpuMat& dst, int flags = 0, Stream& stream = Stream::Null()); |
||||
|
||||
//! transposes the matrix
|
||||
//! supports matrix with element size = 1, 4 and 8 bytes (CV_8UC1, CV_8UC4, CV_16UC2, CV_32FC1, etc)
|
||||
CV_EXPORTS void transpose(const GpuMat& src1, GpuMat& dst, Stream& stream = Stream::Null()); |
||||
|
||||
//! reverses the order of the rows, columns or both in a matrix
|
||||
//! supports 1, 3 and 4 channels images with CV_8U, CV_16U, CV_32S or CV_32F depth
|
||||
CV_EXPORTS void flip(const GpuMat& a, GpuMat& b, int flipCode, Stream& stream = Stream::Null()); |
||||
|
||||
//! transforms 8-bit unsigned integers using lookup table: dst(i)=lut(src(i))
|
||||
//! destination array will have the depth type as lut and the same channels number as source
|
||||
//! supports CV_8UC1, CV_8UC3 types
|
||||
CV_EXPORTS void LUT(const GpuMat& src, const Mat& lut, GpuMat& dst, Stream& stream = Stream::Null()); |
||||
|
||||
//! makes multi-channel array out of several single-channel arrays
|
||||
CV_EXPORTS void merge(const GpuMat* src, size_t n, GpuMat& dst, Stream& stream = Stream::Null()); |
||||
|
||||
//! makes multi-channel array out of several single-channel arrays
|
||||
CV_EXPORTS void merge(const std::vector<GpuMat>& src, GpuMat& dst, Stream& stream = Stream::Null()); |
||||
|
||||
//! copies each plane of a multi-channel array to a dedicated array
|
||||
CV_EXPORTS void split(const GpuMat& src, GpuMat* dst, Stream& stream = Stream::Null()); |
||||
|
||||
//! copies each plane of a multi-channel array to a dedicated array
|
||||
CV_EXPORTS void split(const GpuMat& src, std::vector<GpuMat>& dst, Stream& stream = Stream::Null()); |
||||
|
||||
//! computes magnitude of complex (x(i).re, x(i).im) vector
|
||||
//! supports only CV_32FC2 type
|
||||
CV_EXPORTS void magnitude(const GpuMat& xy, GpuMat& magnitude, Stream& stream = Stream::Null()); |
||||
|
||||
//! computes squared magnitude of complex (x(i).re, x(i).im) vector
|
||||
//! supports only CV_32FC2 type
|
||||
CV_EXPORTS void magnitudeSqr(const GpuMat& xy, GpuMat& magnitude, Stream& stream = Stream::Null()); |
||||
|
||||
//! computes magnitude of each (x(i), y(i)) vector
|
||||
//! supports only floating-point source
|
||||
CV_EXPORTS void magnitude(const GpuMat& x, const GpuMat& y, GpuMat& magnitude, Stream& stream = Stream::Null()); |
||||
|
||||
//! computes squared magnitude of each (x(i), y(i)) vector
|
||||
//! supports only floating-point source
|
||||
CV_EXPORTS void magnitudeSqr(const GpuMat& x, const GpuMat& y, GpuMat& magnitude, Stream& stream = Stream::Null()); |
||||
|
||||
//! computes angle (angle(i)) of each (x(i), y(i)) vector
|
||||
//! supports only floating-point source
|
||||
CV_EXPORTS void phase(const GpuMat& x, const GpuMat& y, GpuMat& angle, bool angleInDegrees = false, Stream& stream = Stream::Null()); |
||||
|
||||
//! converts Cartesian coordinates to polar
|
||||
//! supports only floating-point source
|
||||
CV_EXPORTS void cartToPolar(const GpuMat& x, const GpuMat& y, GpuMat& magnitude, GpuMat& angle, bool angleInDegrees = false, Stream& stream = Stream::Null()); |
||||
|
||||
//! converts polar coordinates to Cartesian
|
||||
//! supports only floating-point source
|
||||
CV_EXPORTS void polarToCart(const GpuMat& magnitude, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees = false, Stream& stream = Stream::Null()); |
||||
|
||||
//! scales and shifts array elements so that either the specified norm (alpha) or the minimum (alpha) and maximum (beta) array values get the specified values
|
||||
CV_EXPORTS void normalize(const GpuMat& src, GpuMat& dst, double alpha = 1, double beta = 0, |
||||
int norm_type = NORM_L2, int dtype = -1, const GpuMat& mask = GpuMat()); |
||||
CV_EXPORTS void normalize(const GpuMat& src, GpuMat& dst, double a, double b, |
||||
int norm_type, int dtype, const GpuMat& mask, GpuMat& norm_buf, GpuMat& cvt_buf); |
||||
|
||||
//! computes mean value and standard deviation of all or selected array elements
|
||||
//! supports only CV_8UC1 type
|
||||
CV_EXPORTS void meanStdDev(const GpuMat& mtx, Scalar& mean, Scalar& stddev); |
||||
//! buffered version
|
||||
CV_EXPORTS void meanStdDev(const GpuMat& mtx, Scalar& mean, Scalar& stddev, GpuMat& buf); |
||||
|
||||
//! computes norm of array
|
||||
//! supports NORM_INF, NORM_L1, NORM_L2
|
||||
//! supports all matrices except 64F
|
||||
CV_EXPORTS double norm(const GpuMat& src1, int normType=NORM_L2); |
||||
CV_EXPORTS double norm(const GpuMat& src1, int normType, GpuMat& buf); |
||||
CV_EXPORTS double norm(const GpuMat& src1, int normType, const GpuMat& mask, GpuMat& buf); |
||||
|
||||
//! computes norm of the difference between two arrays
|
||||
//! supports NORM_INF, NORM_L1, NORM_L2
|
||||
//! supports only CV_8UC1 type
|
||||
CV_EXPORTS double norm(const GpuMat& src1, const GpuMat& src2, int normType=NORM_L2); |
||||
|
||||
//! computes sum of array elements
|
||||
//! supports only single channel images
|
||||
CV_EXPORTS Scalar sum(const GpuMat& src); |
||||
CV_EXPORTS Scalar sum(const GpuMat& src, GpuMat& buf); |
||||
CV_EXPORTS Scalar sum(const GpuMat& src, const GpuMat& mask, GpuMat& buf); |
||||
|
||||
//! computes sum of array elements absolute values
|
||||
//! supports only single channel images
|
||||
CV_EXPORTS Scalar absSum(const GpuMat& src); |
||||
CV_EXPORTS Scalar absSum(const GpuMat& src, GpuMat& buf); |
||||
CV_EXPORTS Scalar absSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf); |
||||
|
||||
//! computes squared sum of array elements
|
||||
//! supports only single channel images
|
||||
CV_EXPORTS Scalar sqrSum(const GpuMat& src); |
||||
CV_EXPORTS Scalar sqrSum(const GpuMat& src, GpuMat& buf); |
||||
CV_EXPORTS Scalar sqrSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf); |
||||
|
||||
//! finds global minimum and maximum array elements and returns their values
|
||||
CV_EXPORTS void minMax(const GpuMat& src, double* minVal, double* maxVal=0, const GpuMat& mask=GpuMat()); |
||||
CV_EXPORTS void minMax(const GpuMat& src, double* minVal, double* maxVal, const GpuMat& mask, GpuMat& buf); |
||||
|
||||
//! finds global minimum and maximum array elements and returns their values with locations
|
||||
CV_EXPORTS void minMaxLoc(const GpuMat& src, double* minVal, double* maxVal=0, Point* minLoc=0, Point* maxLoc=0, |
||||
const GpuMat& mask=GpuMat()); |
||||
CV_EXPORTS void minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc, |
||||
const GpuMat& mask, GpuMat& valbuf, GpuMat& locbuf); |
||||
|
||||
//! counts non-zero array elements
|
||||
CV_EXPORTS int countNonZero(const GpuMat& src); |
||||
CV_EXPORTS int countNonZero(const GpuMat& src, GpuMat& buf); |
||||
|
||||
//! reduces a matrix to a vector
|
||||
CV_EXPORTS void reduce(const GpuMat& mtx, GpuMat& vec, int dim, int reduceOp, int dtype = -1, Stream& stream = Stream::Null()); |
||||
|
||||
//! applies fixed threshold to the image
|
||||
CV_EXPORTS double threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxval, int type, Stream& stream = Stream::Null()); |
||||
|
||||
}} // namespace cv { namespace gpu {
|
||||
|
||||
#endif /* __OPENCV_GPUARITHM_HPP__ */ |
@ -0,0 +1,47 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "perf_precomp.hpp" |
||||
|
||||
using namespace perf; |
||||
|
||||
CV_PERF_TEST_MAIN(gpuarithm, printCudaInfo()) |
@ -0,0 +1,43 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "perf_precomp.hpp" |
@ -0,0 +1,64 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifdef __GNUC__ |
||||
# pragma GCC diagnostic ignored "-Wmissing-declarations" |
||||
# if defined __clang__ || defined __APPLE__ |
||||
# pragma GCC diagnostic ignored "-Wmissing-prototypes" |
||||
# pragma GCC diagnostic ignored "-Wextra" |
||||
# endif |
||||
#endif |
||||
|
||||
#ifndef __OPENCV_PERF_PRECOMP_HPP__ |
||||
#define __OPENCV_PERF_PRECOMP_HPP__ |
||||
|
||||
#include "opencv2/ts.hpp" |
||||
#include "opencv2/ts/gpu_perf.hpp" |
||||
|
||||
#include "opencv2/core.hpp" |
||||
#include "opencv2/gpuarithm.hpp" |
||||
|
||||
#ifdef GTEST_CREATE_SHARED_LIBRARY |
||||
#error no modules except ts should have GTEST_CREATE_SHARED_LIBRARY defined |
||||
#endif |
||||
|
||||
#endif |
@ -0,0 +1,147 @@ |
||||
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||
// |
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||
// |
||||
// By downloading, copying, installing or using the software you agree to this license. |
||||
// If you do not agree to this license, do not download, install, |
||||
// copy or use the software. |
||||
// |
||||
// |
||||
// License Agreement |
||||
// For Open Source Computer Vision Library |
||||
// |
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
||||
// Third party copyrights are property of their respective owners. |
||||
// |
||||
// Redistribution and use in source and binary forms, with or without modification, |
||||
// are permitted provided that the following conditions are met: |
||||
// |
||||
// * Redistribution's of source code must retain the above copyright notice, |
||||
// this list of conditions and the following disclaimer. |
||||
// |
||||
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||
// this list of conditions and the following disclaimer in the documentation |
||||
// and/or other materials provided with the distribution. |
||||
// |
||||
// * The name of the copyright holders may not be used to endorse or promote products |
||||
// derived from this software without specific prior written permission. |
||||
// |
||||
// This software is provided by the copyright holders and contributors "as is" and |
||||
// any express or implied warranties, including, but not limited to, the implied |
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||
// indirect, incidental, special, exemplary, or consequential damages |
||||
// (including, but not limited to, procurement of substitute goods or services; |
||||
// loss of use, data, or profits; or business interruption) however caused |
||||
// and on any theory of liability, whether in contract, strict liability, |
||||
// or tort (including negligence or otherwise) arising in any way out of |
||||
// the use of this software, even if advised of the possibility of such damage. |
||||
// |
||||
//M*/ |
||||
|
||||
#if !defined CUDA_DISABLER |
||||
|
||||
#include "opencv2/core/cuda/common.hpp" |
||||
#include "opencv2/core/cuda/functional.hpp" |
||||
#include "opencv2/core/cuda/transform.hpp" |
||||
#include "opencv2/core/cuda/saturate_cast.hpp" |
||||
#include "opencv2/core/cuda/simd_functions.hpp" |
||||
|
||||
#include "arithm_func_traits.hpp" |
||||
|
||||
using namespace cv::gpu; |
||||
using namespace cv::gpu::cudev; |
||||
|
||||
namespace arithm |
||||
{ |
||||
struct VAbsDiff4 : binary_function<uint, uint, uint> |
||||
{ |
||||
__device__ __forceinline__ uint operator ()(uint a, uint b) const |
||||
{ |
||||
return vabsdiff4(a, b); |
||||
} |
||||
|
||||
__device__ __forceinline__ VAbsDiff4() {} |
||||
__device__ __forceinline__ VAbsDiff4(const VAbsDiff4& other) {} |
||||
}; |
||||
|
||||
struct VAbsDiff2 : binary_function<uint, uint, uint> |
||||
{ |
||||
__device__ __forceinline__ uint operator ()(uint a, uint b) const |
||||
{ |
||||
return vabsdiff2(a, b); |
||||
} |
||||
|
||||
__device__ __forceinline__ VAbsDiff2() {} |
||||
__device__ __forceinline__ VAbsDiff2(const VAbsDiff2& other) {} |
||||
}; |
||||
|
||||
__device__ __forceinline__ int _abs(int a) |
||||
{ |
||||
return ::abs(a); |
||||
} |
||||
__device__ __forceinline__ float _abs(float a) |
||||
{ |
||||
return ::fabsf(a); |
||||
} |
||||
__device__ __forceinline__ double _abs(double a) |
||||
{ |
||||
return ::fabs(a); |
||||
} |
||||
|
||||
template <typename T> struct AbsDiffMat : binary_function<T, T, T> |
||||
{ |
||||
__device__ __forceinline__ T operator ()(T a, T b) const |
||||
{ |
||||
return saturate_cast<T>(_abs(a - b)); |
||||
} |
||||
|
||||
__device__ __forceinline__ AbsDiffMat() {} |
||||
__device__ __forceinline__ AbsDiffMat(const AbsDiffMat& other) {} |
||||
}; |
||||
} |
||||
|
||||
namespace cv { namespace gpu { namespace cudev |
||||
{ |
||||
template <> struct TransformFunctorTraits< arithm::VAbsDiff4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)> |
||||
{ |
||||
}; |
||||
|
||||
template <> struct TransformFunctorTraits< arithm::VAbsDiff2 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)> |
||||
{ |
||||
}; |
||||
|
||||
template <typename T> struct TransformFunctorTraits< arithm::AbsDiffMat<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)> |
||||
{ |
||||
}; |
||||
}}} |
||||
|
||||
namespace arithm |
||||
{ |
||||
void absDiffMat_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream) |
||||
{ |
||||
cudev::transform(src1, src2, dst, VAbsDiff4(), WithOutMask(), stream); |
||||
} |
||||
|
||||
void absDiffMat_v2(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream) |
||||
{ |
||||
cudev::transform(src1, src2, dst, VAbsDiff2(), WithOutMask(), stream); |
||||
} |
||||
|
||||
template <typename T> |
||||
void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream) |
||||
{ |
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, AbsDiffMat<T>(), WithOutMask(), stream); |
||||
} |
||||
|
||||
template void absDiffMat<uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void absDiffMat<schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void absDiffMat<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void absDiffMat<short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void absDiffMat<int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void absDiffMat<float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void absDiffMat<double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); |
||||
} |
||||
|
||||
#endif // CUDA_DISABLER |
@ -0,0 +1,98 @@ |
||||
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||
// |
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||
// |
||||
// By downloading, copying, installing or using the software you agree to this license. |
||||
// If you do not agree to this license, do not download, install, |
||||
// copy or use the software. |
||||
// |
||||
// |
||||
// License Agreement |
||||
// For Open Source Computer Vision Library |
||||
// |
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
||||
// Third party copyrights are property of their respective owners. |
||||
// |
||||
// Redistribution and use in source and binary forms, with or without modification, |
||||
// are permitted provided that the following conditions are met: |
||||
// |
||||
// * Redistribution's of source code must retain the above copyright notice, |
||||
// this list of conditions and the following disclaimer. |
||||
// |
||||
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||
// this list of conditions and the following disclaimer in the documentation |
||||
// and/or other materials provided with the distribution. |
||||
// |
||||
// * The name of the copyright holders may not be used to endorse or promote products |
||||
// derived from this software without specific prior written permission. |
||||
// |
||||
// This software is provided by the copyright holders and contributors "as is" and |
||||
// any express or implied warranties, including, but not limited to, the implied |
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||
// indirect, incidental, special, exemplary, or consequential damages |
||||
// (including, but not limited to, procurement of substitute goods or services; |
||||
// loss of use, data, or profits; or business interruption) however caused |
||||
// and on any theory of liability, whether in contract, strict liability, |
||||
// or tort (including negligence or otherwise) arising in any way out of |
||||
// the use of this software, even if advised of the possibility of such damage. |
||||
// |
||||
//M*/ |
||||
|
||||
#if !defined CUDA_DISABLER |
||||
|
||||
#include "opencv2/core/cuda/common.hpp" |
||||
#include "opencv2/core/cuda/functional.hpp" |
||||
#include "opencv2/core/cuda/transform.hpp" |
||||
#include "opencv2/core/cuda/saturate_cast.hpp" |
||||
#include "opencv2/core/cuda/simd_functions.hpp" |
||||
|
||||
#include "arithm_func_traits.hpp" |
||||
|
||||
using namespace cv::gpu; |
||||
using namespace cv::gpu::cudev; |
||||
|
||||
namespace arithm |
||||
{ |
||||
template <typename T, typename S> struct AbsDiffScalar : unary_function<T, T> |
||||
{ |
||||
S val; |
||||
|
||||
explicit AbsDiffScalar(S val_) : val(val_) {} |
||||
|
||||
__device__ __forceinline__ T operator ()(T a) const |
||||
{ |
||||
abs_func<S> f; |
||||
return saturate_cast<T>(f(a - val)); |
||||
} |
||||
}; |
||||
} |
||||
|
||||
namespace cv { namespace gpu { namespace cudev |
||||
{ |
||||
template <typename T, typename S> struct TransformFunctorTraits< arithm::AbsDiffScalar<T, S> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)> |
||||
{ |
||||
}; |
||||
}}} |
||||
|
||||
namespace arithm |
||||
{ |
||||
template <typename T, typename S> |
||||
void absDiffScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream) |
||||
{ |
||||
AbsDiffScalar<T, S> op(static_cast<S>(val)); |
||||
|
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, op, WithOutMask(), stream); |
||||
} |
||||
|
||||
template void absDiffScalar<uchar, float>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void absDiffScalar<schar, float>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void absDiffScalar<ushort, float>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void absDiffScalar<short, float>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void absDiffScalar<int, float>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void absDiffScalar<float, float>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void absDiffScalar<double, double>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); |
||||
} |
||||
|
||||
#endif // CUDA_DISABLER |
@ -0,0 +1,185 @@ |
||||
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||
// |
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||
// |
||||
// By downloading, copying, installing or using the software you agree to this license. |
||||
// If you do not agree to this license, do not download, install, |
||||
// copy or use the software. |
||||
// |
||||
// |
||||
// License Agreement |
||||
// For Open Source Computer Vision Library |
||||
// |
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
||||
// Third party copyrights are property of their respective owners. |
||||
// |
||||
// Redistribution and use in source and binary forms, with or without modification, |
||||
// are permitted provided that the following conditions are met: |
||||
// |
||||
// * Redistribution's of source code must retain the above copyright notice, |
||||
// this list of conditions and the following disclaimer. |
||||
// |
||||
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||
// this list of conditions and the following disclaimer in the documentation |
||||
// and/or other materials provided with the distribution. |
||||
// |
||||
// * The name of the copyright holders may not be used to endorse or promote products |
||||
// derived from this software without specific prior written permission. |
||||
// |
||||
// This software is provided by the copyright holders and contributors "as is" and |
||||
// any express or implied warranties, including, but not limited to, the implied |
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||
// indirect, incidental, special, exemplary, or consequential damages |
||||
// (including, but not limited to, procurement of substitute goods or services; |
||||
// loss of use, data, or profits; or business interruption) however caused |
||||
// and on any theory of liability, whether in contract, strict liability, |
||||
// or tort (including negligence or otherwise) arising in any way out of |
||||
// the use of this software, even if advised of the possibility of such damage. |
||||
// |
||||
//M*/ |
||||
|
||||
#if !defined CUDA_DISABLER |
||||
|
||||
#include "opencv2/core/cuda/common.hpp" |
||||
#include "opencv2/core/cuda/functional.hpp" |
||||
#include "opencv2/core/cuda/transform.hpp" |
||||
#include "opencv2/core/cuda/saturate_cast.hpp" |
||||
#include "opencv2/core/cuda/simd_functions.hpp" |
||||
|
||||
#include "arithm_func_traits.hpp" |
||||
|
||||
using namespace cv::gpu; |
||||
using namespace cv::gpu::cudev; |
||||
|
||||
namespace arithm |
||||
{ |
||||
struct VAdd4 : binary_function<uint, uint, uint> |
||||
{ |
||||
__device__ __forceinline__ uint operator ()(uint a, uint b) const |
||||
{ |
||||
return vadd4(a, b); |
||||
} |
||||
|
||||
__device__ __forceinline__ VAdd4() {} |
||||
__device__ __forceinline__ VAdd4(const VAdd4& other) {} |
||||
}; |
||||
|
||||
struct VAdd2 : binary_function<uint, uint, uint> |
||||
{ |
||||
__device__ __forceinline__ uint operator ()(uint a, uint b) const |
||||
{ |
||||
return vadd2(a, b); |
||||
} |
||||
|
||||
__device__ __forceinline__ VAdd2() {} |
||||
__device__ __forceinline__ VAdd2(const VAdd2& other) {} |
||||
}; |
||||
|
||||
template <typename T, typename D> struct AddMat : binary_function<T, T, D> |
||||
{ |
||||
__device__ __forceinline__ D operator ()(T a, T b) const |
||||
{ |
||||
return saturate_cast<D>(a + b); |
||||
} |
||||
|
||||
__device__ __forceinline__ AddMat() {} |
||||
__device__ __forceinline__ AddMat(const AddMat& other) {} |
||||
}; |
||||
} |
||||
|
||||
namespace cv { namespace gpu { namespace cudev |
||||
{ |
||||
template <> struct TransformFunctorTraits< arithm::VAdd4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)> |
||||
{ |
||||
}; |
||||
|
||||
template <> struct TransformFunctorTraits< arithm::VAdd2 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)> |
||||
{ |
||||
}; |
||||
|
||||
template <typename T, typename D> struct TransformFunctorTraits< arithm::AddMat<T, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)> |
||||
{ |
||||
}; |
||||
}}} |
||||
|
||||
namespace arithm |
||||
{ |
||||
void addMat_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream) |
||||
{ |
||||
cudev::transform(src1, src2, dst, VAdd4(), WithOutMask(), stream); |
||||
} |
||||
|
||||
void addMat_v2(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream) |
||||
{ |
||||
cudev::transform(src1, src2, dst, VAdd2(), WithOutMask(), stream); |
||||
} |
||||
|
||||
template <typename T, typename D> |
||||
void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream) |
||||
{ |
||||
if (mask.data) |
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, AddMat<T, D>(), mask, stream); |
||||
else |
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, AddMat<T, D>(), WithOutMask(), stream); |
||||
} |
||||
|
||||
template void addMat<uchar, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addMat<uchar, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addMat<uchar, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addMat<uchar, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addMat<uchar, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addMat<uchar, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addMat<uchar, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
|
||||
template void addMat<schar, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addMat<schar, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addMat<schar, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addMat<schar, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addMat<schar, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addMat<schar, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addMat<schar, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
|
||||
//template void addMat<ushort, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void addMat<ushort, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addMat<ushort, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addMat<ushort, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addMat<ushort, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addMat<ushort, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addMat<ushort, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
|
||||
//template void addMat<short, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void addMat<short, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addMat<short, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addMat<short, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addMat<short, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addMat<short, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addMat<short, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
|
||||
//template void addMat<int, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void addMat<int, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void addMat<int, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void addMat<int, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addMat<int, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addMat<int, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addMat<int, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
|
||||
//template void addMat<float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void addMat<float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void addMat<float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void addMat<float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void addMat<float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addMat<float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addMat<float, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
|
||||
//template void addMat<double, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void addMat<double, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void addMat<double, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void addMat<double, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void addMat<double, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void addMat<double, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addMat<double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
} |
||||
|
||||
#endif // CUDA_DISABLER |
@ -0,0 +1,148 @@ |
||||
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||
// |
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||
// |
||||
// By downloading, copying, installing or using the software you agree to this license. |
||||
// If you do not agree to this license, do not download, install, |
||||
// copy or use the software. |
||||
// |
||||
// |
||||
// License Agreement |
||||
// For Open Source Computer Vision Library |
||||
// |
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
||||
// Third party copyrights are property of their respective owners. |
||||
// |
||||
// Redistribution and use in source and binary forms, with or without modification, |
||||
// are permitted provided that the following conditions are met: |
||||
// |
||||
// * Redistribution's of source code must retain the above copyright notice, |
||||
// this list of conditions and the following disclaimer. |
||||
// |
||||
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||
// this list of conditions and the following disclaimer in the documentation |
||||
// and/or other materials provided with the distribution. |
||||
// |
||||
// * The name of the copyright holders may not be used to endorse or promote products |
||||
// derived from this software without specific prior written permission. |
||||
// |
||||
// This software is provided by the copyright holders and contributors "as is" and |
||||
// any express or implied warranties, including, but not limited to, the implied |
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||
// indirect, incidental, special, exemplary, or consequential damages |
||||
// (including, but not limited to, procurement of substitute goods or services; |
||||
// loss of use, data, or profits; or business interruption) however caused |
||||
// and on any theory of liability, whether in contract, strict liability, |
||||
// or tort (including negligence or otherwise) arising in any way out of |
||||
// the use of this software, even if advised of the possibility of such damage. |
||||
// |
||||
//M*/ |
||||
|
||||
#if !defined CUDA_DISABLER |
||||
|
||||
#include "opencv2/core/cuda/common.hpp" |
||||
#include "opencv2/core/cuda/functional.hpp" |
||||
#include "opencv2/core/cuda/transform.hpp" |
||||
#include "opencv2/core/cuda/saturate_cast.hpp" |
||||
#include "opencv2/core/cuda/simd_functions.hpp" |
||||
|
||||
#include "arithm_func_traits.hpp" |
||||
|
||||
using namespace cv::gpu; |
||||
using namespace cv::gpu::cudev; |
||||
|
||||
namespace arithm |
||||
{ |
||||
template <typename T, typename S, typename D> struct AddScalar : unary_function<T, D> |
||||
{ |
||||
S val; |
||||
|
||||
explicit AddScalar(S val_) : val(val_) {} |
||||
|
||||
__device__ __forceinline__ D operator ()(T a) const |
||||
{ |
||||
return saturate_cast<D>(a + val); |
||||
} |
||||
}; |
||||
} |
||||
|
||||
namespace cv { namespace gpu { namespace cudev |
||||
{ |
||||
template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::AddScalar<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)> |
||||
{ |
||||
}; |
||||
}}} |
||||
|
||||
namespace arithm |
||||
{ |
||||
template <typename T, typename S, typename D> |
||||
void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream) |
||||
{ |
||||
AddScalar<T, S, D> op(static_cast<S>(val)); |
||||
|
||||
if (mask.data) |
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, mask, stream); |
||||
else |
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, WithOutMask(), stream); |
||||
} |
||||
|
||||
template void addScalar<uchar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addScalar<uchar, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addScalar<uchar, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addScalar<uchar, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addScalar<uchar, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addScalar<uchar, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addScalar<uchar, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
|
||||
template void addScalar<schar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addScalar<schar, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addScalar<schar, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addScalar<schar, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addScalar<schar, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addScalar<schar, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addScalar<schar, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
|
||||
//template void addScalar<ushort, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void addScalar<ushort, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addScalar<ushort, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addScalar<ushort, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addScalar<ushort, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addScalar<ushort, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addScalar<ushort, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
|
||||
//template void addScalar<short, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void addScalar<short, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addScalar<short, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addScalar<short, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addScalar<short, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addScalar<short, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addScalar<short, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
|
||||
//template void addScalar<int, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void addScalar<int, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void addScalar<int, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void addScalar<int, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addScalar<int, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addScalar<int, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addScalar<int, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
|
||||
//template void addScalar<float, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void addScalar<float, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void addScalar<float, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void addScalar<float, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void addScalar<float, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addScalar<float, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addScalar<float, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
|
||||
//template void addScalar<double, double, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void addScalar<double, double, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void addScalar<double, double, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void addScalar<double, double, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void addScalar<double, double, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void addScalar<double, double, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void addScalar<double, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
} |
||||
|
||||
#endif // CUDA_DISABLER |
@ -0,0 +1,364 @@ |
||||
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||
// |
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||
// |
||||
// By downloading, copying, installing or using the software you agree to this license. |
||||
// If you do not agree to this license, do not download, install, |
||||
// copy or use the software. |
||||
// |
||||
// |
||||
// License Agreement |
||||
// For Open Source Computer Vision Library |
||||
// |
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
||||
// Third party copyrights are property of their respective owners. |
||||
// |
||||
// Redistribution and use in source and binary forms, with or without modification, |
||||
// are permitted provided that the following conditions are met: |
||||
// |
||||
// * Redistribution's of source code must retain the above copyright notice, |
||||
// this list of conditions and the following disclaimer. |
||||
// |
||||
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||
// this list of conditions and the following disclaimer in the documentation |
||||
// and/or other materials provided with the distribution. |
||||
// |
||||
// * The name of the copyright holders may not be used to endorse or promote products |
||||
// derived from this software without specific prior written permission. |
||||
// |
||||
// This software is provided by the copyright holders and contributors "as is" and |
||||
// any express or implied warranties, including, but not limited to, the implied |
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||
// indirect, incidental, special, exemplary, or consequential damages |
||||
// (including, but not limited to, procurement of substitute goods or services; |
||||
// loss of use, data, or profits; or business interruption) however caused |
||||
// and on any theory of liability, whether in contract, strict liability, |
||||
// or tort (including negligence or otherwise) arising in any way out of |
||||
// the use of this software, even if advised of the possibility of such damage. |
||||
// |
||||
//M*/ |
||||
|
||||
#if !defined CUDA_DISABLER |
||||
|
||||
#include "opencv2/core/cuda/common.hpp" |
||||
#include "opencv2/core/cuda/functional.hpp" |
||||
#include "opencv2/core/cuda/transform.hpp" |
||||
#include "opencv2/core/cuda/saturate_cast.hpp" |
||||
|
||||
#include "arithm_func_traits.hpp" |
||||
|
||||
using namespace cv::gpu; |
||||
using namespace cv::gpu::cudev; |
||||
|
||||
namespace arithm |
||||
{ |
||||
template <typename T> struct UseDouble_ |
||||
{ |
||||
enum {value = 0}; |
||||
}; |
||||
template <> struct UseDouble_<double> |
||||
{ |
||||
enum {value = 1}; |
||||
}; |
||||
template <typename T1, typename T2, typename D> struct UseDouble |
||||
{ |
||||
enum {value = (UseDouble_<T1>::value || UseDouble_<T2>::value || UseDouble_<D>::value)}; |
||||
}; |
||||
|
||||
template <typename T1, typename T2, typename D, bool useDouble> struct AddWeighted_; |
||||
template <typename T1, typename T2, typename D> struct AddWeighted_<T1, T2, D, false> : binary_function<T1, T2, D> |
||||
{ |
||||
float alpha; |
||||
float beta; |
||||
float gamma; |
||||
|
||||
AddWeighted_(double alpha_, double beta_, double gamma_) : alpha(static_cast<float>(alpha_)), beta(static_cast<float>(beta_)), gamma(static_cast<float>(gamma_)) {} |
||||
|
||||
__device__ __forceinline__ D operator ()(T1 a, T2 b) const |
||||
{ |
||||
return saturate_cast<D>(a * alpha + b * beta + gamma); |
||||
} |
||||
}; |
||||
template <typename T1, typename T2, typename D> struct AddWeighted_<T1, T2, D, true> : binary_function<T1, T2, D> |
||||
{ |
||||
double alpha; |
||||
double beta; |
||||
double gamma; |
||||
|
||||
AddWeighted_(double alpha_, double beta_, double gamma_) : alpha(alpha_), beta(beta_), gamma(gamma_) {} |
||||
|
||||
__device__ __forceinline__ D operator ()(T1 a, T2 b) const |
||||
{ |
||||
return saturate_cast<D>(a * alpha + b * beta + gamma); |
||||
} |
||||
}; |
||||
template <typename T1, typename T2, typename D> struct AddWeighted : AddWeighted_<T1, T2, D, UseDouble<T1, T2, D>::value> |
||||
{ |
||||
AddWeighted(double alpha_, double beta_, double gamma_) : AddWeighted_<T1, T2, D, UseDouble<T1, T2, D>::value>(alpha_, beta_, gamma_) {} |
||||
}; |
||||
} |
||||
|
||||
namespace cv { namespace gpu { namespace cudev |
||||
{ |
||||
template <typename T1, typename T2, typename D, size_t src1_size, size_t src2_size, size_t dst_size> struct AddWeightedTraits : DefaultTransformFunctorTraits< arithm::AddWeighted<T1, T2, D> > |
||||
{ |
||||
}; |
||||
template <typename T1, typename T2, typename D, size_t src_size, size_t dst_size> struct AddWeightedTraits<T1, T2, D, src_size, src_size, dst_size> : arithm::ArithmFuncTraits<src_size, dst_size> |
||||
{ |
||||
}; |
||||
|
||||
template <typename T1, typename T2, typename D> struct TransformFunctorTraits< arithm::AddWeighted<T1, T2, D> > : AddWeightedTraits<T1, T2, D, sizeof(T1), sizeof(T2), sizeof(D)> |
||||
{ |
||||
}; |
||||
}}} |
||||
|
||||
namespace arithm |
||||
{ |
||||
template <typename T1, typename T2, typename D> |
||||
void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream) |
||||
{ |
||||
AddWeighted<T1, T2, D> op(alpha, beta, gamma); |
||||
|
||||
cudev::transform((PtrStepSz<T1>) src1, (PtrStepSz<T2>) src2, (PtrStepSz<D>) dst, op, WithOutMask(), stream); |
||||
} |
||||
|
||||
template void addWeighted<uchar, uchar, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<uchar, uchar, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<uchar, uchar, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<uchar, uchar, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<uchar, uchar, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<uchar, uchar, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<uchar, uchar, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
template void addWeighted<uchar, schar, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<uchar, schar, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<uchar, schar, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<uchar, schar, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<uchar, schar, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<uchar, schar, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<uchar, schar, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
template void addWeighted<uchar, ushort, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<uchar, ushort, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<uchar, ushort, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<uchar, ushort, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<uchar, ushort, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<uchar, ushort, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<uchar, ushort, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
template void addWeighted<uchar, short, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<uchar, short, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<uchar, short, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<uchar, short, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<uchar, short, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<uchar, short, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<uchar, short, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
template void addWeighted<uchar, int, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<uchar, int, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<uchar, int, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<uchar, int, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<uchar, int, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<uchar, int, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<uchar, int, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
template void addWeighted<uchar, float, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<uchar, float, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<uchar, float, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<uchar, float, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<uchar, float, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<uchar, float, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<uchar, float, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
template void addWeighted<uchar, double, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<uchar, double, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<uchar, double, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<uchar, double, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<uchar, double, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<uchar, double, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<uchar, double, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
|
||||
|
||||
template void addWeighted<schar, schar, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<schar, schar, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<schar, schar, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<schar, schar, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<schar, schar, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<schar, schar, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<schar, schar, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
template void addWeighted<schar, ushort, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<schar, ushort, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<schar, ushort, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<schar, ushort, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<schar, ushort, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<schar, ushort, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<schar, ushort, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
template void addWeighted<schar, short, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<schar, short, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<schar, short, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<schar, short, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<schar, short, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<schar, short, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<schar, short, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
template void addWeighted<schar, int, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<schar, int, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<schar, int, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<schar, int, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<schar, int, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<schar, int, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<schar, int, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
template void addWeighted<schar, float, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<schar, float, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<schar, float, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<schar, float, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<schar, float, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<schar, float, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<schar, float, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
template void addWeighted<schar, double, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<schar, double, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<schar, double, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<schar, double, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<schar, double, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<schar, double, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<schar, double, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
|
||||
|
||||
template void addWeighted<ushort, ushort, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<ushort, ushort, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<ushort, ushort, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<ushort, ushort, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<ushort, ushort, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<ushort, ushort, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<ushort, ushort, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
template void addWeighted<ushort, short, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<ushort, short, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<ushort, short, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<ushort, short, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<ushort, short, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<ushort, short, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<ushort, short, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
template void addWeighted<ushort, int, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<ushort, int, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<ushort, int, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<ushort, int, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<ushort, int, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<ushort, int, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<ushort, int, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
template void addWeighted<ushort, float, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<ushort, float, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<ushort, float, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<ushort, float, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<ushort, float, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<ushort, float, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<ushort, float, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
template void addWeighted<ushort, double, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<ushort, double, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<ushort, double, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<ushort, double, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<ushort, double, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<ushort, double, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<ushort, double, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
|
||||
|
||||
template void addWeighted<short, short, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<short, short, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<short, short, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<short, short, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<short, short, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<short, short, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<short, short, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
template void addWeighted<short, int, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<short, int, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<short, int, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<short, int, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<short, int, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<short, int, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<short, int, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
template void addWeighted<short, float, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<short, float, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<short, float, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<short, float, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<short, float, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<short, float, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<short, float, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
template void addWeighted<short, double, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<short, double, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<short, double, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<short, double, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<short, double, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<short, double, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<short, double, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
|
||||
|
||||
template void addWeighted<int, int, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<int, int, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<int, int, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<int, int, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<int, int, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<int, int, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<int, int, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
template void addWeighted<int, float, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<int, float, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<int, float, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<int, float, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<int, float, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<int, float, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<int, float, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
template void addWeighted<int, double, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<int, double, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<int, double, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<int, double, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<int, double, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<int, double, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<int, double, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
|
||||
|
||||
template void addWeighted<float, float, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<float, float, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<float, float, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<float, float, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<float, float, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<float, float, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<float, float, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
template void addWeighted<float, double, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<float, double, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<float, double, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<float, double, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<float, double, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<float, double, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<float, double, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
|
||||
|
||||
template void addWeighted<double, double, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<double, double, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<double, double, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<double, double, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<double, double, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<double, double, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
template void addWeighted<double, double, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); |
||||
} |
||||
|
||||
#endif /* CUDA_DISABLER */ |
@ -0,0 +1,145 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __ARITHM_FUNC_TRAITS_HPP__ |
||||
#define __ARITHM_FUNC_TRAITS_HPP__ |
||||
|
||||
#include <cstddef> |
||||
|
||||
namespace arithm |
||||
{ |
||||
template <size_t src_size, size_t dst_size> struct ArithmFuncTraits |
||||
{ |
||||
enum { simple_block_dim_x = 32 }; |
||||
enum { simple_block_dim_y = 8 }; |
||||
|
||||
enum { smart_block_dim_x = 32 }; |
||||
enum { smart_block_dim_y = 8 }; |
||||
enum { smart_shift = 1 }; |
||||
}; |
||||
|
||||
template <> struct ArithmFuncTraits<1, 1> |
||||
{ |
||||
enum { simple_block_dim_x = 32 }; |
||||
enum { simple_block_dim_y = 8 }; |
||||
|
||||
enum { smart_block_dim_x = 32 }; |
||||
enum { smart_block_dim_y = 8 }; |
||||
enum { smart_shift = 4 }; |
||||
}; |
||||
template <> struct ArithmFuncTraits<1, 2> |
||||
{ |
||||
enum { simple_block_dim_x = 32 }; |
||||
enum { simple_block_dim_y = 8 }; |
||||
|
||||
enum { smart_block_dim_x = 32 }; |
||||
enum { smart_block_dim_y = 8 }; |
||||
enum { smart_shift = 4 }; |
||||
}; |
||||
template <> struct ArithmFuncTraits<1, 4> |
||||
{ |
||||
enum { simple_block_dim_x = 32 }; |
||||
enum { simple_block_dim_y = 8 }; |
||||
|
||||
enum { smart_block_dim_x = 32 }; |
||||
enum { smart_block_dim_y = 8 }; |
||||
enum { smart_shift = 4 }; |
||||
}; |
||||
|
||||
template <> struct ArithmFuncTraits<2, 1> |
||||
{ |
||||
enum { simple_block_dim_x = 32 }; |
||||
enum { simple_block_dim_y = 8 }; |
||||
|
||||
enum { smart_block_dim_x = 32 }; |
||||
enum { smart_block_dim_y = 8 }; |
||||
enum { smart_shift = 4 }; |
||||
}; |
||||
template <> struct ArithmFuncTraits<2, 2> |
||||
{ |
||||
enum { simple_block_dim_x = 32 }; |
||||
enum { simple_block_dim_y = 8 }; |
||||
|
||||
enum { smart_block_dim_x = 32 }; |
||||
enum { smart_block_dim_y = 8 }; |
||||
enum { smart_shift = 4 }; |
||||
}; |
||||
template <> struct ArithmFuncTraits<2, 4> |
||||
{ |
||||
enum { simple_block_dim_x = 32 }; |
||||
enum { simple_block_dim_y = 8 }; |
||||
|
||||
enum { smart_block_dim_x = 32 }; |
||||
enum { smart_block_dim_y = 8 }; |
||||
enum { smart_shift = 4 }; |
||||
}; |
||||
|
||||
template <> struct ArithmFuncTraits<4, 1> |
||||
{ |
||||
enum { simple_block_dim_x = 32 }; |
||||
enum { simple_block_dim_y = 8 }; |
||||
|
||||
enum { smart_block_dim_x = 32 }; |
||||
enum { smart_block_dim_y = 8 }; |
||||
enum { smart_shift = 4 }; |
||||
}; |
||||
template <> struct ArithmFuncTraits<4, 2> |
||||
{ |
||||
enum { simple_block_dim_x = 32 }; |
||||
enum { simple_block_dim_y = 8 }; |
||||
|
||||
enum { smart_block_dim_x = 32 }; |
||||
enum { smart_block_dim_y = 8 }; |
||||
enum { smart_shift = 4 }; |
||||
}; |
||||
template <> struct ArithmFuncTraits<4, 4> |
||||
{ |
||||
enum { simple_block_dim_x = 32 }; |
||||
enum { simple_block_dim_y = 8 }; |
||||
|
||||
enum { smart_block_dim_x = 32 }; |
||||
enum { smart_block_dim_y = 8 }; |
||||
enum { smart_shift = 4 }; |
||||
}; |
||||
} |
||||
|
||||
#endif // __ARITHM_FUNC_TRAITS_HPP__
|
@ -0,0 +1,126 @@ |
||||
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||
// |
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||
// |
||||
// By downloading, copying, installing or using the software you agree to this license. |
||||
// If you do not agree to this license, do not download, install, |
||||
// copy or use the software. |
||||
// |
||||
// |
||||
// License Agreement |
||||
// For Open Source Computer Vision Library |
||||
// |
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
||||
// Third party copyrights are property of their respective owners. |
||||
// |
||||
// Redistribution and use in source and binary forms, with or without modification, |
||||
// are permitted provided that the following conditions are met: |
||||
// |
||||
// * Redistribution's of source code must retain the above copyright notice, |
||||
// this list of conditions and the following disclaimer. |
||||
// |
||||
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||
// this list of conditions and the following disclaimer in the documentation |
||||
// and/or other materials provided with the distribution. |
||||
// |
||||
// * The name of the copyright holders may not be used to endorse or promote products |
||||
// derived from this software without specific prior written permission. |
||||
// |
||||
// This software is provided by the copyright holders and contributors "as is" and |
||||
// any express or implied warranties, including, but not limited to, the implied |
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||
// indirect, incidental, special, exemplary, or consequential damages |
||||
// (including, but not limited to, procurement of substitute goods or services; |
||||
// loss of use, data, or profits; or business interruption) however caused |
||||
// and on any theory of liability, whether in contract, strict liability, |
||||
// or tort (including negligence or otherwise) arising in any way out of |
||||
// the use of this software, even if advised of the possibility of such damage. |
||||
// |
||||
//M*/ |
||||
|
||||
#if !defined CUDA_DISABLER |
||||
|
||||
#include "opencv2/core/cuda/common.hpp" |
||||
#include "opencv2/core/cuda/functional.hpp" |
||||
#include "opencv2/core/cuda/transform.hpp" |
||||
#include "opencv2/core/cuda/saturate_cast.hpp" |
||||
#include "opencv2/core/cuda/simd_functions.hpp" |
||||
|
||||
#include "arithm_func_traits.hpp" |
||||
|
||||
using namespace cv::gpu; |
||||
using namespace cv::gpu::cudev; |
||||
|
||||
namespace cv { namespace gpu { namespace cudev |
||||
{ |
||||
template <typename T> struct TransformFunctorTraits< bit_not<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)> |
||||
{ |
||||
}; |
||||
|
||||
template <typename T> struct TransformFunctorTraits< bit_and<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)> |
||||
{ |
||||
}; |
||||
|
||||
template <typename T> struct TransformFunctorTraits< bit_or<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)> |
||||
{ |
||||
}; |
||||
|
||||
template <typename T> struct TransformFunctorTraits< bit_xor<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)> |
||||
{ |
||||
}; |
||||
}}} |
||||
|
||||
namespace arithm |
||||
{ |
||||
template <typename T> void bitMatNot(PtrStepSzb src, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream) |
||||
{ |
||||
if (mask.data) |
||||
cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, bit_not<T>(), mask, stream); |
||||
else |
||||
cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, bit_not<T>(), WithOutMask(), stream); |
||||
} |
||||
|
||||
template <typename T> void bitMatAnd(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream) |
||||
{ |
||||
if (mask.data) |
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_and<T>(), mask, stream); |
||||
else |
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_and<T>(), WithOutMask(), stream); |
||||
} |
||||
|
||||
template <typename T> void bitMatOr(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream) |
||||
{ |
||||
if (mask.data) |
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_or<T>(), mask, stream); |
||||
else |
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_or<T>(), WithOutMask(), stream); |
||||
} |
||||
|
||||
template <typename T> void bitMatXor(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream) |
||||
{ |
||||
if (mask.data) |
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_xor<T>(), mask, stream); |
||||
else |
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_xor<T>(), WithOutMask(), stream); |
||||
} |
||||
|
||||
template void bitMatNot<uchar>(PtrStepSzb src, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void bitMatNot<ushort>(PtrStepSzb src, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void bitMatNot<uint>(PtrStepSzb src, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
|
||||
template void bitMatAnd<uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void bitMatAnd<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void bitMatAnd<uint>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
|
||||
template void bitMatOr<uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void bitMatOr<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void bitMatOr<uint>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
|
||||
template void bitMatXor<uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void bitMatXor<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void bitMatXor<uint>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
} |
||||
|
||||
#endif // CUDA_DISABLER |
@ -0,0 +1,104 @@ |
||||
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||
// |
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||
// |
||||
// By downloading, copying, installing or using the software you agree to this license. |
||||
// If you do not agree to this license, do not download, install, |
||||
// copy or use the software. |
||||
// |
||||
// |
||||
// License Agreement |
||||
// For Open Source Computer Vision Library |
||||
// |
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
||||
// Third party copyrights are property of their respective owners. |
||||
// |
||||
// Redistribution and use in source and binary forms, with or without modification, |
||||
// are permitted provided that the following conditions are met: |
||||
// |
||||
// * Redistribution's of source code must retain the above copyright notice, |
||||
// this list of conditions and the following disclaimer. |
||||
// |
||||
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||
// this list of conditions and the following disclaimer in the documentation |
||||
// and/or other materials provided with the distribution. |
||||
// |
||||
// * The name of the copyright holders may not be used to endorse or promote products |
||||
// derived from this software without specific prior written permission. |
||||
// |
||||
// This software is provided by the copyright holders and contributors "as is" and |
||||
// any express or implied warranties, including, but not limited to, the implied |
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||
// indirect, incidental, special, exemplary, or consequential damages |
||||
// (including, but not limited to, procurement of substitute goods or services; |
||||
// loss of use, data, or profits; or business interruption) however caused |
||||
// and on any theory of liability, whether in contract, strict liability, |
||||
// or tort (including negligence or otherwise) arising in any way out of |
||||
// the use of this software, even if advised of the possibility of such damage. |
||||
// |
||||
//M*/ |
||||
|
||||
#if !defined CUDA_DISABLER |
||||
|
||||
#include "opencv2/core/cuda/common.hpp" |
||||
#include "opencv2/core/cuda/functional.hpp" |
||||
#include "opencv2/core/cuda/transform.hpp" |
||||
#include "opencv2/core/cuda/saturate_cast.hpp" |
||||
#include "opencv2/core/cuda/simd_functions.hpp" |
||||
|
||||
#include "arithm_func_traits.hpp" |
||||
|
||||
using namespace cv::gpu; |
||||
using namespace cv::gpu::cudev; |
||||
|
||||
namespace cv { namespace gpu { namespace cudev |
||||
{ |
||||
template <typename T> struct TransformFunctorTraits< binder2nd< bit_and<T> > > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)> |
||||
{ |
||||
}; |
||||
|
||||
template <typename T> struct TransformFunctorTraits< binder2nd< bit_or<T> > > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)> |
||||
{ |
||||
}; |
||||
|
||||
template <typename T> struct TransformFunctorTraits< binder2nd< bit_xor<T> > > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)> |
||||
{ |
||||
}; |
||||
}}} |
||||
|
||||
namespace arithm |
||||
{ |
||||
template <typename T> void bitScalarAnd(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream) |
||||
{ |
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::gpu::cudev::bind2nd(bit_and<T>(), src2), WithOutMask(), stream); |
||||
} |
||||
|
||||
template <typename T> void bitScalarOr(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream) |
||||
{ |
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::gpu::cudev::bind2nd(bit_or<T>(), src2), WithOutMask(), stream); |
||||
} |
||||
|
||||
template <typename T> void bitScalarXor(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream) |
||||
{ |
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::gpu::cudev::bind2nd(bit_xor<T>(), src2), WithOutMask(), stream); |
||||
} |
||||
|
||||
template void bitScalarAnd<uchar>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void bitScalarAnd<ushort>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void bitScalarAnd<int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void bitScalarAnd<unsigned int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
template void bitScalarOr<uchar>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void bitScalarOr<ushort>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void bitScalarOr<int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void bitScalarOr<unsigned int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
template void bitScalarXor<uchar>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void bitScalarXor<ushort>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void bitScalarXor<int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void bitScalarXor<unsigned int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); |
||||
} |
||||
|
||||
#endif // CUDA_DISABLER |
@ -0,0 +1,206 @@ |
||||
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||
// |
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||
// |
||||
// By downloading, copying, installing or using the software you agree to this license. |
||||
// If you do not agree to this license, do not download, install, |
||||
// copy or use the software. |
||||
// |
||||
// |
||||
// License Agreement |
||||
// For Open Source Computer Vision Library |
||||
// |
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
||||
// Third party copyrights are property of their respective owners. |
||||
// |
||||
// Redistribution and use in source and binary forms, with or without modification, |
||||
// are permitted provided that the following conditions are met: |
||||
// |
||||
// * Redistribution's of source code must retain the above copyright notice, |
||||
// this list of conditions and the following disclaimer. |
||||
// |
||||
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||
// this list of conditions and the following disclaimer in the documentation |
||||
// and/or other materials provided with the distribution. |
||||
// |
||||
// * The name of the copyright holders may not be used to endorse or promote products |
||||
// derived from this software without specific prior written permission. |
||||
// |
||||
// This software is provided by the copyright holders and contributors "as is" and |
||||
// any express or implied warranties, including, but not limited to, the implied |
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||
// indirect, incidental, special, exemplary, or consequential damages |
||||
// (including, but not limited to, procurement of substitute goods or services; |
||||
// loss of use, data, or profits; or business interruption) however caused |
||||
// and on any theory of liability, whether in contract, strict liability, |
||||
// or tort (including negligence or otherwise) arising in any way out of |
||||
// the use of this software, even if advised of the possibility of such damage. |
||||
// |
||||
//M*/ |
||||
|
||||
#if !defined CUDA_DISABLER |
||||
|
||||
#include "opencv2/core/cuda/common.hpp" |
||||
#include "opencv2/core/cuda/functional.hpp" |
||||
#include "opencv2/core/cuda/transform.hpp" |
||||
#include "opencv2/core/cuda/saturate_cast.hpp" |
||||
#include "opencv2/core/cuda/simd_functions.hpp" |
||||
|
||||
#include "arithm_func_traits.hpp" |
||||
|
||||
using namespace cv::gpu; |
||||
using namespace cv::gpu::cudev; |
||||
|
||||
namespace arithm |
||||
{ |
||||
struct VCmpEq4 : binary_function<uint, uint, uint> |
||||
{ |
||||
__device__ __forceinline__ uint operator ()(uint a, uint b) const |
||||
{ |
||||
return vcmpeq4(a, b); |
||||
} |
||||
|
||||
__device__ __forceinline__ VCmpEq4() {} |
||||
__device__ __forceinline__ VCmpEq4(const VCmpEq4& other) {} |
||||
}; |
||||
struct VCmpNe4 : binary_function<uint, uint, uint> |
||||
{ |
||||
__device__ __forceinline__ uint operator ()(uint a, uint b) const |
||||
{ |
||||
return vcmpne4(a, b); |
||||
} |
||||
|
||||
__device__ __forceinline__ VCmpNe4() {} |
||||
__device__ __forceinline__ VCmpNe4(const VCmpNe4& other) {} |
||||
}; |
||||
struct VCmpLt4 : binary_function<uint, uint, uint> |
||||
{ |
||||
__device__ __forceinline__ uint operator ()(uint a, uint b) const |
||||
{ |
||||
return vcmplt4(a, b); |
||||
} |
||||
|
||||
__device__ __forceinline__ VCmpLt4() {} |
||||
__device__ __forceinline__ VCmpLt4(const VCmpLt4& other) {} |
||||
}; |
||||
struct VCmpLe4 : binary_function<uint, uint, uint> |
||||
{ |
||||
__device__ __forceinline__ uint operator ()(uint a, uint b) const |
||||
{ |
||||
return vcmple4(a, b); |
||||
} |
||||
|
||||
__device__ __forceinline__ VCmpLe4() {} |
||||
__device__ __forceinline__ VCmpLe4(const VCmpLe4& other) {} |
||||
}; |
||||
|
||||
template <class Op, typename T> |
||||
struct Cmp : binary_function<T, T, uchar> |
||||
{ |
||||
__device__ __forceinline__ uchar operator()(T a, T b) const |
||||
{ |
||||
Op op; |
||||
return -op(a, b); |
||||
} |
||||
}; |
||||
} |
||||
|
||||
namespace cv { namespace gpu { namespace cudev |
||||
{ |
||||
template <> struct TransformFunctorTraits< arithm::VCmpEq4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)> |
||||
{ |
||||
}; |
||||
template <> struct TransformFunctorTraits< arithm::VCmpNe4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)> |
||||
{ |
||||
}; |
||||
template <> struct TransformFunctorTraits< arithm::VCmpLt4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)> |
||||
{ |
||||
}; |
||||
template <> struct TransformFunctorTraits< arithm::VCmpLe4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)> |
||||
{ |
||||
}; |
||||
|
||||
template <class Op, typename T> struct TransformFunctorTraits< arithm::Cmp<Op, T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(uchar)> |
||||
{ |
||||
}; |
||||
}}} |
||||
|
||||
namespace arithm |
||||
{ |
||||
void cmpMatEq_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream) |
||||
{ |
||||
cudev::transform(src1, src2, dst, VCmpEq4(), WithOutMask(), stream); |
||||
} |
||||
void cmpMatNe_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream) |
||||
{ |
||||
cudev::transform(src1, src2, dst, VCmpNe4(), WithOutMask(), stream); |
||||
} |
||||
void cmpMatLt_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream) |
||||
{ |
||||
cudev::transform(src1, src2, dst, VCmpLt4(), WithOutMask(), stream); |
||||
} |
||||
void cmpMatLe_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream) |
||||
{ |
||||
cudev::transform(src1, src2, dst, VCmpLe4(), WithOutMask(), stream); |
||||
} |
||||
|
||||
template <template <typename> class Op, typename T> |
||||
void cmpMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream) |
||||
{ |
||||
Cmp<Op<T>, T> op; |
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, dst, op, WithOutMask(), stream); |
||||
} |
||||
|
||||
template <typename T> void cmpMatEq(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream) |
||||
{ |
||||
cmpMat<equal_to, T>(src1, src2, dst, stream); |
||||
} |
||||
template <typename T> void cmpMatNe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream) |
||||
{ |
||||
cmpMat<not_equal_to, T>(src1, src2, dst, stream); |
||||
} |
||||
template <typename T> void cmpMatLt(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream) |
||||
{ |
||||
cmpMat<less, T>(src1, src2, dst, stream); |
||||
} |
||||
template <typename T> void cmpMatLe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream) |
||||
{ |
||||
cmpMat<less_equal, T>(src1, src2, dst, stream); |
||||
} |
||||
|
||||
template void cmpMatEq<uchar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpMatEq<schar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpMatEq<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpMatEq<short >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpMatEq<int >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpMatEq<float >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpMatEq<double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
template void cmpMatNe<uchar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpMatNe<schar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpMatNe<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpMatNe<short >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpMatNe<int >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpMatNe<float >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpMatNe<double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
template void cmpMatLt<uchar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpMatLt<schar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpMatLt<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpMatLt<short >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpMatLt<int >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpMatLt<float >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpMatLt<double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
template void cmpMatLe<uchar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpMatLe<schar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpMatLe<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpMatLe<short >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpMatLe<int >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpMatLe<float >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpMatLe<double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); |
||||
} |
||||
|
||||
#endif // CUDA_DISABLER |
@ -0,0 +1,284 @@ |
||||
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||
// |
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||
// |
||||
// By downloading, copying, installing or using the software you agree to this license. |
||||
// If you do not agree to this license, do not download, install, |
||||
// copy or use the software. |
||||
// |
||||
// |
||||
// License Agreement |
||||
// For Open Source Computer Vision Library |
||||
// |
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
||||
// Third party copyrights are property of their respective owners. |
||||
// |
||||
// Redistribution and use in source and binary forms, with or without modification, |
||||
// are permitted provided that the following conditions are met: |
||||
// |
||||
// * Redistribution's of source code must retain the above copyright notice, |
||||
// this list of conditions and the following disclaimer. |
||||
// |
||||
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||
// this list of conditions and the following disclaimer in the documentation |
||||
// and/or other materials provided with the distribution. |
||||
// |
||||
// * The name of the copyright holders may not be used to endorse or promote products |
||||
// derived from this software without specific prior written permission. |
||||
// |
||||
// This software is provided by the copyright holders and contributors "as is" and |
||||
// any express or implied warranties, including, but not limited to, the implied |
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||
// indirect, incidental, special, exemplary, or consequential damages |
||||
// (including, but not limited to, procurement of substitute goods or services; |
||||
// loss of use, data, or profits; or business interruption) however caused |
||||
// and on any theory of liability, whether in contract, strict liability, |
||||
// or tort (including negligence or otherwise) arising in any way out of |
||||
// the use of this software, even if advised of the possibility of such damage. |
||||
// |
||||
//M*/ |
||||
|
||||
#if !defined CUDA_DISABLER |
||||
|
||||
#include "opencv2/core/cuda/common.hpp" |
||||
#include "opencv2/core/cuda/functional.hpp" |
||||
#include "opencv2/core/cuda/transform.hpp" |
||||
#include "opencv2/core/cuda/saturate_cast.hpp" |
||||
#include "opencv2/core/cuda/simd_functions.hpp" |
||||
#include "opencv2/core/cuda/vec_math.hpp" |
||||
|
||||
#include "arithm_func_traits.hpp" |
||||
|
||||
using namespace cv::gpu; |
||||
using namespace cv::gpu::cudev; |
||||
|
||||
namespace arithm |
||||
{ |
||||
template <class Op, typename T> |
||||
struct Cmp : binary_function<T, T, uchar> |
||||
{ |
||||
__device__ __forceinline__ uchar operator()(T a, T b) const |
||||
{ |
||||
Op op; |
||||
return -op(a, b); |
||||
} |
||||
}; |
||||
|
||||
#define TYPE_VEC(type, cn) typename TypeVec<type, cn>::vec_type |
||||
|
||||
template <class Op, typename T, int cn> struct CmpScalar; |
||||
template <class Op, typename T> |
||||
struct CmpScalar<Op, T, 1> : unary_function<T, uchar> |
||||
{ |
||||
const T val; |
||||
|
||||
__host__ explicit CmpScalar(T val_) : val(val_) {} |
||||
|
||||
__device__ __forceinline__ uchar operator()(T src) const |
||||
{ |
||||
Cmp<Op, T> op; |
||||
return op(src, val); |
||||
} |
||||
}; |
||||
template <class Op, typename T> |
||||
struct CmpScalar<Op, T, 2> : unary_function<TYPE_VEC(T, 2), TYPE_VEC(uchar, 2)> |
||||
{ |
||||
const TYPE_VEC(T, 2) val; |
||||
|
||||
__host__ explicit CmpScalar(TYPE_VEC(T, 2) val_) : val(val_) {} |
||||
|
||||
__device__ __forceinline__ TYPE_VEC(uchar, 2) operator()(const TYPE_VEC(T, 2) & src) const |
||||
{ |
||||
Cmp<Op, T> op; |
||||
return VecTraits<TYPE_VEC(uchar, 2)>::make(op(src.x, val.x), op(src.y, val.y)); |
||||
} |
||||
}; |
||||
template <class Op, typename T> |
||||
struct CmpScalar<Op, T, 3> : unary_function<TYPE_VEC(T, 3), TYPE_VEC(uchar, 3)> |
||||
{ |
||||
const TYPE_VEC(T, 3) val; |
||||
|
||||
__host__ explicit CmpScalar(TYPE_VEC(T, 3) val_) : val(val_) {} |
||||
|
||||
__device__ __forceinline__ TYPE_VEC(uchar, 3) operator()(const TYPE_VEC(T, 3) & src) const |
||||
{ |
||||
Cmp<Op, T> op; |
||||
return VecTraits<TYPE_VEC(uchar, 3)>::make(op(src.x, val.x), op(src.y, val.y), op(src.z, val.z)); |
||||
} |
||||
}; |
||||
template <class Op, typename T> |
||||
struct CmpScalar<Op, T, 4> : unary_function<TYPE_VEC(T, 4), TYPE_VEC(uchar, 4)> |
||||
{ |
||||
const TYPE_VEC(T, 4) val; |
||||
|
||||
__host__ explicit CmpScalar(TYPE_VEC(T, 4) val_) : val(val_) {} |
||||
|
||||
__device__ __forceinline__ TYPE_VEC(uchar, 4) operator()(const TYPE_VEC(T, 4) & src) const |
||||
{ |
||||
Cmp<Op, T> op; |
||||
return VecTraits<TYPE_VEC(uchar, 4)>::make(op(src.x, val.x), op(src.y, val.y), op(src.z, val.z), op(src.w, val.w)); |
||||
} |
||||
}; |
||||
|
||||
#undef TYPE_VEC |
||||
} |
||||
|
||||
namespace cv { namespace gpu { namespace cudev |
||||
{ |
||||
template <class Op, typename T> struct TransformFunctorTraits< arithm::CmpScalar<Op, T, 1> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(uchar)> |
||||
{ |
||||
}; |
||||
}}} |
||||
|
||||
namespace arithm |
||||
{ |
||||
template <template <typename> class Op, typename T, int cn> |
||||
void cmpScalar(PtrStepSzb src, double val[4], PtrStepSzb dst, cudaStream_t stream) |
||||
{ |
||||
typedef typename TypeVec<T, cn>::vec_type src_t; |
||||
typedef typename TypeVec<uchar, cn>::vec_type dst_t; |
||||
|
||||
T sval[] = {static_cast<T>(val[0]), static_cast<T>(val[1]), static_cast<T>(val[2]), static_cast<T>(val[3])}; |
||||
src_t val1 = VecTraits<src_t>::make(sval); |
||||
|
||||
CmpScalar<Op<T>, T, cn> op(val1); |
||||
cudev::transform((PtrStepSz<src_t>) src, (PtrStepSz<dst_t>) dst, op, WithOutMask(), stream); |
||||
} |
||||
|
||||
template <typename T> void cmpScalarEq(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream) |
||||
{ |
||||
typedef void (*func_t)(PtrStepSzb src, double val[4], PtrStepSzb dst, cudaStream_t stream); |
||||
static const func_t funcs[] = |
||||
{ |
||||
0, |
||||
cmpScalar<equal_to, T, 1>, |
||||
cmpScalar<equal_to, T, 2>, |
||||
cmpScalar<equal_to, T, 3>, |
||||
cmpScalar<equal_to, T, 4> |
||||
}; |
||||
|
||||
funcs[cn](src, val, dst, stream); |
||||
} |
||||
template <typename T> void cmpScalarNe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream) |
||||
{ |
||||
typedef void (*func_t)(PtrStepSzb src, double val[4], PtrStepSzb dst, cudaStream_t stream); |
||||
static const func_t funcs[] = |
||||
{ |
||||
0, |
||||
cmpScalar<not_equal_to, T, 1>, |
||||
cmpScalar<not_equal_to, T, 2>, |
||||
cmpScalar<not_equal_to, T, 3>, |
||||
cmpScalar<not_equal_to, T, 4> |
||||
}; |
||||
|
||||
funcs[cn](src, val, dst, stream); |
||||
} |
||||
template <typename T> void cmpScalarLt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream) |
||||
{ |
||||
typedef void (*func_t)(PtrStepSzb src, double val[4], PtrStepSzb dst, cudaStream_t stream); |
||||
static const func_t funcs[] = |
||||
{ |
||||
0, |
||||
cmpScalar<less, T, 1>, |
||||
cmpScalar<less, T, 2>, |
||||
cmpScalar<less, T, 3>, |
||||
cmpScalar<less, T, 4> |
||||
}; |
||||
|
||||
funcs[cn](src, val, dst, stream); |
||||
} |
||||
template <typename T> void cmpScalarLe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream) |
||||
{ |
||||
typedef void (*func_t)(PtrStepSzb src, double val[4], PtrStepSzb dst, cudaStream_t stream); |
||||
static const func_t funcs[] = |
||||
{ |
||||
0, |
||||
cmpScalar<less_equal, T, 1>, |
||||
cmpScalar<less_equal, T, 2>, |
||||
cmpScalar<less_equal, T, 3>, |
||||
cmpScalar<less_equal, T, 4> |
||||
}; |
||||
|
||||
funcs[cn](src, val, dst, stream); |
||||
} |
||||
template <typename T> void cmpScalarGt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream) |
||||
{ |
||||
typedef void (*func_t)(PtrStepSzb src, double val[4], PtrStepSzb dst, cudaStream_t stream); |
||||
static const func_t funcs[] = |
||||
{ |
||||
0, |
||||
cmpScalar<greater, T, 1>, |
||||
cmpScalar<greater, T, 2>, |
||||
cmpScalar<greater, T, 3>, |
||||
cmpScalar<greater, T, 4> |
||||
}; |
||||
|
||||
funcs[cn](src, val, dst, stream); |
||||
} |
||||
template <typename T> void cmpScalarGe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream) |
||||
{ |
||||
typedef void (*func_t)(PtrStepSzb src, double val[4], PtrStepSzb dst, cudaStream_t stream); |
||||
static const func_t funcs[] = |
||||
{ |
||||
0, |
||||
cmpScalar<greater_equal, T, 1>, |
||||
cmpScalar<greater_equal, T, 2>, |
||||
cmpScalar<greater_equal, T, 3>, |
||||
cmpScalar<greater_equal, T, 4> |
||||
}; |
||||
|
||||
funcs[cn](src, val, dst, stream); |
||||
} |
||||
|
||||
template void cmpScalarEq<uchar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpScalarEq<schar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpScalarEq<ushort>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpScalarEq<short >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpScalarEq<int >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpScalarEq<float >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpScalarEq<double>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
template void cmpScalarNe<uchar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpScalarNe<schar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpScalarNe<ushort>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpScalarNe<short >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpScalarNe<int >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpScalarNe<float >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpScalarNe<double>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
template void cmpScalarLt<uchar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpScalarLt<schar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpScalarLt<ushort>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpScalarLt<short >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpScalarLt<int >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpScalarLt<float >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpScalarLt<double>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
template void cmpScalarLe<uchar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpScalarLe<schar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpScalarLe<ushort>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpScalarLe<short >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpScalarLe<int >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpScalarLe<float >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpScalarLe<double>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
template void cmpScalarGt<uchar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpScalarGt<schar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpScalarGt<ushort>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpScalarGt<short >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpScalarGt<int >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpScalarGt<float >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpScalarGt<double>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
template void cmpScalarGe<uchar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpScalarGe<schar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpScalarGe<ushort>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpScalarGe<short >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpScalarGe<int >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpScalarGe<float >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); |
||||
template void cmpScalarGe<double>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream); |
||||
} |
||||
|
||||
#endif // CUDA_DISABLER |
@ -0,0 +1,175 @@ |
||||
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||
// |
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||
// |
||||
// By downloading, copying, installing or using the software you agree to this license. |
||||
// If you do not agree to this license, do not download, install, |
||||
// copy or use the software. |
||||
// |
||||
// |
||||
// License Agreement |
||||
// For Open Source Computer Vision Library |
||||
// |
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
||||
// Third party copyrights are property of their respective owners. |
||||
// |
||||
// Redistribution and use in source and binary forms, with or without modification, |
||||
// are permitted provided that the following conditions are met: |
||||
// |
||||
// * Redistribution's of source code must retain the above copyright notice, |
||||
// this list of conditions and the following disclaimer. |
||||
// |
||||
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||
// this list of conditions and the following disclaimer in the documentation |
||||
// and/or other materials provided with the distribution. |
||||
// |
||||
// * The name of the copyright holders may not be used to endorse or promote products |
||||
// derived from this software without specific prior written permission. |
||||
// |
||||
// This software is provided by the copyright holders and contributors "as is" and |
||||
// any express or implied warranties, including, but not limited to, the implied |
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||
// indirect, incidental, special, exemplary, or consequential damages |
||||
// (including, but not limited to, procurement of substitute goods or services; |
||||
// loss of use, data, or profits; or business interruption) however caused |
||||
// and on any theory of liability, whether in contract, strict liability, |
||||
// or tort (including negligence or otherwise) arising in any way out of |
||||
// the use of this software, even if advised of the possibility of such damage. |
||||
// |
||||
//M*/ |
||||
|
||||
#if !defined CUDA_DISABLER |
||||
|
||||
#include "opencv2/core/cuda/common.hpp" |
||||
#include "opencv2/core/cuda/vec_traits.hpp" |
||||
#include "opencv2/core/cuda/vec_math.hpp" |
||||
#include "opencv2/core/cuda/reduce.hpp" |
||||
#include "opencv2/core/cuda/emulation.hpp" |
||||
|
||||
using namespace cv::gpu; |
||||
using namespace cv::gpu::cudev; |
||||
|
||||
namespace countNonZero |
||||
{ |
||||
__device__ unsigned int blocks_finished = 0; |
||||
|
||||
template <int BLOCK_SIZE, typename T> |
||||
__global__ void kernel(const PtrStepSz<T> src, unsigned int* count, const int twidth, const int theight) |
||||
{ |
||||
__shared__ unsigned int scount[BLOCK_SIZE]; |
||||
|
||||
const int x0 = blockIdx.x * blockDim.x * twidth + threadIdx.x; |
||||
const int y0 = blockIdx.y * blockDim.y * theight + threadIdx.y; |
||||
|
||||
const int tid = threadIdx.y * blockDim.x + threadIdx.x; |
||||
|
||||
unsigned int mycount = 0; |
||||
|
||||
for (int i = 0, y = y0; i < theight && y < src.rows; ++i, y += blockDim.y) |
||||
{ |
||||
const T* ptr = src.ptr(y); |
||||
|
||||
for (int j = 0, x = x0; j < twidth && x < src.cols; ++j, x += blockDim.x) |
||||
{ |
||||
const T srcVal = ptr[x]; |
||||
|
||||
mycount += (srcVal != 0); |
||||
} |
||||
} |
||||
|
||||
cudev::reduce<BLOCK_SIZE>(scount, mycount, tid, plus<unsigned int>()); |
||||
|
||||
#if __CUDA_ARCH__ >= 200 |
||||
if (tid == 0) |
||||
::atomicAdd(count, mycount); |
||||
#else |
||||
__shared__ bool is_last; |
||||
const int bid = blockIdx.y * gridDim.x + blockIdx.x; |
||||
|
||||
if (tid == 0) |
||||
{ |
||||
count[bid] = mycount; |
||||
|
||||
__threadfence(); |
||||
|
||||
unsigned int ticket = ::atomicInc(&blocks_finished, gridDim.x * gridDim.y); |
||||
is_last = (ticket == gridDim.x * gridDim.y - 1); |
||||
} |
||||
|
||||
__syncthreads(); |
||||
|
||||
if (is_last) |
||||
{ |
||||
mycount = tid < gridDim.x * gridDim.y ? count[tid] : 0; |
||||
|
||||
cudev::reduce<BLOCK_SIZE>(scount, mycount, tid, plus<unsigned int>()); |
||||
|
||||
if (tid == 0) |
||||
{ |
||||
count[0] = mycount; |
||||
|
||||
blocks_finished = 0; |
||||
} |
||||
} |
||||
#endif |
||||
} |
||||
|
||||
const int threads_x = 32; |
||||
const int threads_y = 8; |
||||
|
||||
void getLaunchCfg(int cols, int rows, dim3& block, dim3& grid) |
||||
{ |
||||
block = dim3(threads_x, threads_y); |
||||
|
||||
grid = dim3(divUp(cols, block.x * block.y), |
||||
divUp(rows, block.y * block.x)); |
||||
|
||||
grid.x = ::min(grid.x, block.x); |
||||
grid.y = ::min(grid.y, block.y); |
||||
} |
||||
|
||||
void getBufSize(int cols, int rows, int& bufcols, int& bufrows) |
||||
{ |
||||
dim3 block, grid; |
||||
getLaunchCfg(cols, rows, block, grid); |
||||
|
||||
bufcols = grid.x * grid.y * sizeof(int); |
||||
bufrows = 1; |
||||
} |
||||
|
||||
template <typename T> |
||||
int run(const PtrStepSzb src, PtrStep<unsigned int> buf) |
||||
{ |
||||
dim3 block, grid; |
||||
getLaunchCfg(src.cols, src.rows, block, grid); |
||||
|
||||
const int twidth = divUp(divUp(src.cols, grid.x), block.x); |
||||
const int theight = divUp(divUp(src.rows, grid.y), block.y); |
||||
|
||||
unsigned int* count_buf = buf.ptr(0); |
||||
|
||||
cudaSafeCall( cudaMemset(count_buf, 0, sizeof(unsigned int)) ); |
||||
|
||||
kernel<threads_x * threads_y><<<grid, block>>>((PtrStepSz<T>) src, count_buf, twidth, theight); |
||||
cudaSafeCall( cudaGetLastError() ); |
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() ); |
||||
|
||||
unsigned int count; |
||||
cudaSafeCall(cudaMemcpy(&count, count_buf, sizeof(unsigned int), cudaMemcpyDeviceToHost)); |
||||
|
||||
return count; |
||||
} |
||||
|
||||
template int run<uchar >(const PtrStepSzb src, PtrStep<unsigned int> buf); |
||||
template int run<schar >(const PtrStepSzb src, PtrStep<unsigned int> buf); |
||||
template int run<ushort>(const PtrStepSzb src, PtrStep<unsigned int> buf); |
||||
template int run<short >(const PtrStepSzb src, PtrStep<unsigned int> buf); |
||||
template int run<int >(const PtrStepSzb src, PtrStep<unsigned int> buf); |
||||
template int run<float >(const PtrStepSzb src, PtrStep<unsigned int> buf); |
||||
template int run<double>(const PtrStepSzb src, PtrStep<unsigned int> buf); |
||||
} |
||||
|
||||
#endif // CUDA_DISABLER |
@ -0,0 +1,144 @@ |
||||
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||
// |
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||
// |
||||
// By downloading, copying, installing or using the software you agree to this license. |
||||
// If you do not agree to this license, do not download, install, |
||||
// copy or use the software. |
||||
// |
||||
// |
||||
// License Agreement |
||||
// For Open Source Computer Vision Library |
||||
// |
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
||||
// Third party copyrights are property of their respective owners. |
||||
// |
||||
// Redistribution and use in source and binary forms, with or without modification, |
||||
// are permitted provided that the following conditions are met: |
||||
// |
||||
// * Redistribution's of source code must retain the above copyright notice, |
||||
// this list of conditions and the following disclaimer. |
||||
// |
||||
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||
// this list of conditions and the following disclaimer in the documentation |
||||
// and/or other materials provided with the distribution. |
||||
// |
||||
// * The name of the copyright holders may not be used to endorse or promote products |
||||
// derived from this software without specific prior written permission. |
||||
// |
||||
// This software is provided by the copyright holders and contributors "as is" and |
||||
// any express or implied warranties, including, but not limited to, the implied |
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||
// indirect, incidental, special, exemplary, or consequential damages |
||||
// (including, but not limited to, procurement of substitute goods or services; |
||||
// loss of use, data, or profits; or business interruption) however caused |
||||
// and on any theory of liability, whether in contract, strict liability, |
||||
// or tort (including negligence or otherwise) arising in any way out of |
||||
// the use of this software, even if advised of the possibility of such damage. |
||||
// |
||||
//M*/ |
||||
|
||||
#if !defined CUDA_DISABLER |
||||
|
||||
#include "opencv2/core/cuda/common.hpp" |
||||
#include "opencv2/core/cuda/functional.hpp" |
||||
#include "opencv2/core/cuda/transform.hpp" |
||||
#include "opencv2/core/cuda/saturate_cast.hpp" |
||||
#include "opencv2/core/cuda/simd_functions.hpp" |
||||
|
||||
#include "arithm_func_traits.hpp" |
||||
|
||||
using namespace cv::gpu; |
||||
using namespace cv::gpu::cudev; |
||||
|
||||
namespace arithm |
||||
{ |
||||
template <typename T, typename S, typename D> struct DivInv : unary_function<T, D> |
||||
{ |
||||
S val; |
||||
|
||||
explicit DivInv(S val_) : val(val_) {} |
||||
|
||||
__device__ __forceinline__ D operator ()(T a) const |
||||
{ |
||||
return a != 0 ? saturate_cast<D>(val / a) : 0; |
||||
} |
||||
}; |
||||
} |
||||
|
||||
namespace cv { namespace gpu { namespace cudev |
||||
{ |
||||
template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::DivInv<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)> |
||||
{ |
||||
}; |
||||
}}} |
||||
|
||||
namespace arithm |
||||
{ |
||||
template <typename T, typename S, typename D> |
||||
void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream) |
||||
{ |
||||
DivInv<T, S, D> op(static_cast<S>(val)); |
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, WithOutMask(), stream); |
||||
} |
||||
|
||||
template void divInv<uchar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divInv<uchar, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divInv<uchar, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divInv<uchar, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divInv<uchar, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divInv<uchar, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divInv<uchar, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
template void divInv<schar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divInv<schar, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divInv<schar, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divInv<schar, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divInv<schar, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divInv<schar, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divInv<schar, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
//template void divInv<ushort, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
//template void divInv<ushort, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divInv<ushort, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divInv<ushort, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divInv<ushort, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divInv<ushort, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divInv<ushort, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
//template void divInv<short, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
//template void divInv<short, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divInv<short, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divInv<short, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divInv<short, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divInv<short, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divInv<short, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
//template void divInv<int, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
//template void divInv<int, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
//template void divInv<int, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
//template void divInv<int, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divInv<int, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divInv<int, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divInv<int, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
//template void divInv<float, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
//template void divInv<float, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
//template void divInv<float, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
//template void divInv<float, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
//template void divInv<float, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divInv<float, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divInv<float, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
//template void divInv<double, double, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
//template void divInv<double, double, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
//template void divInv<double, double, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
//template void divInv<double, double, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
//template void divInv<double, double, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
//template void divInv<double, double, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divInv<double, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
} |
||||
|
||||
#endif // CUDA_DISABLER |
@ -0,0 +1,230 @@ |
||||
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||
// |
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||
// |
||||
// By downloading, copying, installing or using the software you agree to this license. |
||||
// If you do not agree to this license, do not download, install, |
||||
// copy or use the software. |
||||
// |
||||
// |
||||
// License Agreement |
||||
// For Open Source Computer Vision Library |
||||
// |
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
||||
// Third party copyrights are property of their respective owners. |
||||
// |
||||
// Redistribution and use in source and binary forms, with or without modification, |
||||
// are permitted provided that the following conditions are met: |
||||
// |
||||
// * Redistribution's of source code must retain the above copyright notice, |
||||
// this list of conditions and the following disclaimer. |
||||
// |
||||
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||
// this list of conditions and the following disclaimer in the documentation |
||||
// and/or other materials provided with the distribution. |
||||
// |
||||
// * The name of the copyright holders may not be used to endorse or promote products |
||||
// derived from this software without specific prior written permission. |
||||
// |
||||
// This software is provided by the copyright holders and contributors "as is" and |
||||
// any express or implied warranties, including, but not limited to, the implied |
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||
// indirect, incidental, special, exemplary, or consequential damages |
||||
// (including, but not limited to, procurement of substitute goods or services; |
||||
// loss of use, data, or profits; or business interruption) however caused |
||||
// and on any theory of liability, whether in contract, strict liability, |
||||
// or tort (including negligence or otherwise) arising in any way out of |
||||
// the use of this software, even if advised of the possibility of such damage. |
||||
// |
||||
//M*/ |
||||
|
||||
#if !defined CUDA_DISABLER |
||||
|
||||
#include "opencv2/core/cuda/common.hpp" |
||||
#include "opencv2/core/cuda/functional.hpp" |
||||
#include "opencv2/core/cuda/transform.hpp" |
||||
#include "opencv2/core/cuda/saturate_cast.hpp" |
||||
#include "opencv2/core/cuda/simd_functions.hpp" |
||||
|
||||
#include "arithm_func_traits.hpp" |
||||
|
||||
using namespace cv::gpu; |
||||
using namespace cv::gpu::cudev; |
||||
|
||||
namespace arithm |
||||
{ |
||||
struct Div_8uc4_32f : binary_function<uint, float, uint> |
||||
{ |
||||
__device__ __forceinline__ uint operator ()(uint a, float b) const |
||||
{ |
||||
uint res = 0; |
||||
|
||||
if (b != 0) |
||||
{ |
||||
b = 1.0f / b; |
||||
res |= (saturate_cast<uchar>((0xffu & (a )) * b) ); |
||||
res |= (saturate_cast<uchar>((0xffu & (a >> 8)) * b) << 8); |
||||
res |= (saturate_cast<uchar>((0xffu & (a >> 16)) * b) << 16); |
||||
res |= (saturate_cast<uchar>((0xffu & (a >> 24)) * b) << 24); |
||||
} |
||||
|
||||
return res; |
||||
} |
||||
}; |
||||
|
||||
struct Div_16sc4_32f : binary_function<short4, float, short4> |
||||
{ |
||||
__device__ __forceinline__ short4 operator ()(short4 a, float b) const |
||||
{ |
||||
return b != 0 ? make_short4(saturate_cast<short>(a.x / b), saturate_cast<short>(a.y / b), |
||||
saturate_cast<short>(a.z / b), saturate_cast<short>(a.w / b)) |
||||
: make_short4(0,0,0,0); |
||||
} |
||||
}; |
||||
|
||||
template <typename T, typename D> struct Div : binary_function<T, T, D> |
||||
{ |
||||
__device__ __forceinline__ D operator ()(T a, T b) const |
||||
{ |
||||
return b != 0 ? saturate_cast<D>(a / b) : 0; |
||||
} |
||||
|
||||
__device__ __forceinline__ Div() {} |
||||
__device__ __forceinline__ Div(const Div& other) {} |
||||
}; |
||||
template <typename T> struct Div<T, float> : binary_function<T, T, float> |
||||
{ |
||||
__device__ __forceinline__ float operator ()(T a, T b) const |
||||
{ |
||||
return b != 0 ? static_cast<float>(a) / b : 0; |
||||
} |
||||
|
||||
__device__ __forceinline__ Div() {} |
||||
__device__ __forceinline__ Div(const Div& other) {} |
||||
}; |
||||
template <typename T> struct Div<T, double> : binary_function<T, T, double> |
||||
{ |
||||
__device__ __forceinline__ double operator ()(T a, T b) const |
||||
{ |
||||
return b != 0 ? static_cast<double>(a) / b : 0; |
||||
} |
||||
|
||||
__device__ __forceinline__ Div() {} |
||||
__device__ __forceinline__ Div(const Div& other) {} |
||||
}; |
||||
|
||||
template <typename T, typename S, typename D> struct DivScale : binary_function<T, T, D> |
||||
{ |
||||
S scale; |
||||
|
||||
explicit DivScale(S scale_) : scale(scale_) {} |
||||
|
||||
__device__ __forceinline__ D operator ()(T a, T b) const |
||||
{ |
||||
return b != 0 ? saturate_cast<D>(scale * a / b) : 0; |
||||
} |
||||
}; |
||||
} |
||||
|
||||
namespace cv { namespace gpu { namespace cudev |
||||
{ |
||||
template <> struct TransformFunctorTraits<arithm::Div_8uc4_32f> : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)> |
||||
{ |
||||
}; |
||||
|
||||
template <typename T, typename D> struct TransformFunctorTraits< arithm::Div<T, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)> |
||||
{ |
||||
}; |
||||
|
||||
template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::DivScale<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)> |
||||
{ |
||||
}; |
||||
}}} |
||||
|
||||
namespace arithm |
||||
{ |
||||
void divMat_8uc4_32f(PtrStepSz<uint> src1, PtrStepSzf src2, PtrStepSz<uint> dst, cudaStream_t stream) |
||||
{ |
||||
cudev::transform(src1, src2, dst, Div_8uc4_32f(), WithOutMask(), stream); |
||||
} |
||||
|
||||
void divMat_16sc4_32f(PtrStepSz<short4> src1, PtrStepSzf src2, PtrStepSz<short4> dst, cudaStream_t stream) |
||||
{ |
||||
cudev::transform(src1, src2, dst, Div_16sc4_32f(), WithOutMask(), stream); |
||||
} |
||||
|
||||
template <typename T, typename S, typename D> |
||||
void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream) |
||||
{ |
||||
if (scale == 1) |
||||
{ |
||||
Div<T, D> op; |
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, op, WithOutMask(), stream); |
||||
} |
||||
else |
||||
{ |
||||
DivScale<T, S, D> op(static_cast<S>(scale)); |
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, op, WithOutMask(), stream); |
||||
} |
||||
} |
||||
|
||||
template void divMat<uchar, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void divMat<uchar, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void divMat<uchar, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void divMat<uchar, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void divMat<uchar, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void divMat<uchar, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void divMat<uchar, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
|
||||
template void divMat<schar, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void divMat<schar, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void divMat<schar, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void divMat<schar, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void divMat<schar, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void divMat<schar, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void divMat<schar, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
|
||||
//template void divMat<ushort, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
//template void divMat<ushort, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void divMat<ushort, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void divMat<ushort, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void divMat<ushort, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void divMat<ushort, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void divMat<ushort, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
|
||||
//template void divMat<short, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
//template void divMat<short, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void divMat<short, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void divMat<short, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void divMat<short, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void divMat<short, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void divMat<short, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
|
||||
//template void divMat<int, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
//template void divMat<int, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
//template void divMat<int, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
//template void divMat<int, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void divMat<int, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void divMat<int, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void divMat<int, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
|
||||
//template void divMat<float, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
//template void divMat<float, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
//template void divMat<float, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
//template void divMat<float, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
//template void divMat<float, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void divMat<float, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void divMat<float, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
|
||||
//template void divMat<double, double, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
//template void divMat<double, double, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
//template void divMat<double, double, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
//template void divMat<double, double, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
//template void divMat<double, double, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
//template void divMat<double, double, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void divMat<double, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
} |
||||
|
||||
#endif // CUDA_DISABLER |
@ -0,0 +1,144 @@ |
||||
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||
// |
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||
// |
||||
// By downloading, copying, installing or using the software you agree to this license. |
||||
// If you do not agree to this license, do not download, install, |
||||
// copy or use the software. |
||||
// |
||||
// |
||||
// License Agreement |
||||
// For Open Source Computer Vision Library |
||||
// |
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
||||
// Third party copyrights are property of their respective owners. |
||||
// |
||||
// Redistribution and use in source and binary forms, with or without modification, |
||||
// are permitted provided that the following conditions are met: |
||||
// |
||||
// * Redistribution's of source code must retain the above copyright notice, |
||||
// this list of conditions and the following disclaimer. |
||||
// |
||||
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||
// this list of conditions and the following disclaimer in the documentation |
||||
// and/or other materials provided with the distribution. |
||||
// |
||||
// * The name of the copyright holders may not be used to endorse or promote products |
||||
// derived from this software without specific prior written permission. |
||||
// |
||||
// This software is provided by the copyright holders and contributors "as is" and |
||||
// any express or implied warranties, including, but not limited to, the implied |
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||
// indirect, incidental, special, exemplary, or consequential damages |
||||
// (including, but not limited to, procurement of substitute goods or services; |
||||
// loss of use, data, or profits; or business interruption) however caused |
||||
// and on any theory of liability, whether in contract, strict liability, |
||||
// or tort (including negligence or otherwise) arising in any way out of |
||||
// the use of this software, even if advised of the possibility of such damage. |
||||
// |
||||
//M*/ |
||||
|
||||
#if !defined CUDA_DISABLER |
||||
|
||||
#include "opencv2/core/cuda/common.hpp" |
||||
#include "opencv2/core/cuda/functional.hpp" |
||||
#include "opencv2/core/cuda/transform.hpp" |
||||
#include "opencv2/core/cuda/saturate_cast.hpp" |
||||
#include "opencv2/core/cuda/simd_functions.hpp" |
||||
|
||||
#include "arithm_func_traits.hpp" |
||||
|
||||
using namespace cv::gpu; |
||||
using namespace cv::gpu::cudev; |
||||
|
||||
namespace arithm |
||||
{ |
||||
template <typename T, typename S, typename D> struct DivScalar : unary_function<T, D> |
||||
{ |
||||
S val; |
||||
|
||||
explicit DivScalar(S val_) : val(val_) {} |
||||
|
||||
__device__ __forceinline__ D operator ()(T a) const |
||||
{ |
||||
return saturate_cast<D>(a / val); |
||||
} |
||||
}; |
||||
} |
||||
|
||||
namespace cv { namespace gpu { namespace cudev |
||||
{ |
||||
template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::DivScalar<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)> |
||||
{ |
||||
}; |
||||
}}} |
||||
|
||||
namespace arithm |
||||
{ |
||||
template <typename T, typename S, typename D> |
||||
void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream) |
||||
{ |
||||
DivScalar<T, S, D> op(static_cast<S>(val)); |
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, WithOutMask(), stream); |
||||
} |
||||
|
||||
template void divScalar<uchar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divScalar<uchar, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divScalar<uchar, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divScalar<uchar, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divScalar<uchar, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divScalar<uchar, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divScalar<uchar, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
template void divScalar<schar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divScalar<schar, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divScalar<schar, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divScalar<schar, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divScalar<schar, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divScalar<schar, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divScalar<schar, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
//template void divScalar<ushort, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
//template void divScalar<ushort, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divScalar<ushort, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divScalar<ushort, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divScalar<ushort, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divScalar<ushort, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divScalar<ushort, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
//template void divScalar<short, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
//template void divScalar<short, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divScalar<short, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divScalar<short, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divScalar<short, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divScalar<short, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divScalar<short, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
//template void divScalar<int, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
//template void divScalar<int, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
//template void divScalar<int, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
//template void divScalar<int, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divScalar<int, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divScalar<int, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divScalar<int, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
//template void divScalar<float, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
//template void divScalar<float, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
//template void divScalar<float, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
//template void divScalar<float, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
//template void divScalar<float, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divScalar<float, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divScalar<float, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
//template void divScalar<double, double, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
//template void divScalar<double, double, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
//template void divScalar<double, double, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
//template void divScalar<double, double, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
//template void divScalar<double, double, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
//template void divScalar<double, double, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void divScalar<double, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
} |
||||
|
||||
#endif // CUDA_DISABLER |
@ -0,0 +1,302 @@ |
||||
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||
// |
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||
// |
||||
// By downloading, copying, installing or using the software you agree to this license. |
||||
// If you do not agree to this license, do not download, install, |
||||
// copy or use the software. |
||||
// |
||||
// |
||||
// License Agreement |
||||
// For Open Source Computer Vision Library |
||||
// |
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
||||
// Third party copyrights are property of their respective owners. |
||||
// |
||||
// Redistribution and use in source and binary forms, with or without modification, |
||||
// are permitted provided that the following conditions are met: |
||||
// |
||||
// * Redistribution's of source code must retain the above copyright notice, |
||||
// this list of conditions and the following disclaimer. |
||||
// |
||||
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||
// this list of conditions and the following disclaimer in the documentation |
||||
// and/or other materials provided with the distribution. |
||||
// |
||||
// * The name of the copyright holders may not be used to endorse or promote products |
||||
// derived from this software without specific prior written permission. |
||||
// |
||||
// This software is provided by the copyright holders and contributors "as is" and |
||||
// any express or implied warranties, including, but not limited to, the implied |
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||
// indirect, incidental, special, exemplary, or consequential damages |
||||
// (including, but not limited to, procurement of substitute goods or services; |
||||
// loss of use, data, or profits; or business interruption) however caused |
||||
// and on any theory of liability, whether in contract, strict liability, |
||||
// or tort (including negligence or otherwise) arising in any way out of |
||||
// the use of this software, even if advised of the possibility of such damage. |
||||
// |
||||
//M*/ |
||||
|
||||
#if !defined CUDA_DISABLER |
||||
|
||||
#include "opencv2/core/cuda/common.hpp" |
||||
#include "opencv2/core/cuda/functional.hpp" |
||||
#include "opencv2/core/cuda/transform.hpp" |
||||
#include "opencv2/core/cuda/saturate_cast.hpp" |
||||
#include "opencv2/core/cuda/simd_functions.hpp" |
||||
#include "opencv2/core/cuda/limits.hpp" |
||||
#include "opencv2/core/cuda/type_traits.hpp" |
||||
|
||||
#include "arithm_func_traits.hpp" |
||||
|
||||
using namespace cv::gpu; |
||||
using namespace cv::gpu::cudev; |
||||
|
||||
////////////////////////////////////////////////////////////////////////// |
||||
// absMat |
||||
|
||||
namespace cv { namespace gpu { namespace cudev |
||||
{ |
||||
template <typename T> struct TransformFunctorTraits< abs_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)> |
||||
{ |
||||
}; |
||||
}}} |
||||
|
||||
namespace arithm |
||||
{ |
||||
template <typename T> |
||||
void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream) |
||||
{ |
||||
cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, abs_func<T>(), WithOutMask(), stream); |
||||
} |
||||
|
||||
template void absMat<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); |
||||
template void absMat<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); |
||||
template void absMat<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); |
||||
template void absMat<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); |
||||
template void absMat<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); |
||||
template void absMat<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); |
||||
template void absMat<double>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); |
||||
} |
||||
|
||||
////////////////////////////////////////////////////////////////////////// |
||||
// sqrMat |
||||
|
||||
namespace arithm |
||||
{ |
||||
template <typename T> struct Sqr : unary_function<T, T> |
||||
{ |
||||
__device__ __forceinline__ T operator ()(T x) const |
||||
{ |
||||
return saturate_cast<T>(x * x); |
||||
} |
||||
|
||||
__device__ __forceinline__ Sqr() {} |
||||
__device__ __forceinline__ Sqr(const Sqr& other) {} |
||||
}; |
||||
} |
||||
|
||||
namespace cv { namespace gpu { namespace cudev |
||||
{ |
||||
template <typename T> struct TransformFunctorTraits< arithm::Sqr<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)> |
||||
{ |
||||
}; |
||||
}}} |
||||
|
||||
namespace arithm |
||||
{ |
||||
template <typename T> |
||||
void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream) |
||||
{ |
||||
cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, Sqr<T>(), WithOutMask(), stream); |
||||
} |
||||
|
||||
template void sqrMat<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); |
||||
template void sqrMat<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); |
||||
template void sqrMat<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); |
||||
template void sqrMat<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); |
||||
template void sqrMat<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); |
||||
template void sqrMat<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); |
||||
template void sqrMat<double>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); |
||||
} |
||||
|
||||
////////////////////////////////////////////////////////////////////////// |
||||
// sqrtMat |
||||
|
||||
namespace cv { namespace gpu { namespace cudev |
||||
{ |
||||
template <typename T> struct TransformFunctorTraits< sqrt_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)> |
||||
{ |
||||
}; |
||||
}}} |
||||
|
||||
namespace arithm |
||||
{ |
||||
template <typename T> |
||||
void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream) |
||||
{ |
||||
cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, sqrt_func<T>(), WithOutMask(), stream); |
||||
} |
||||
|
||||
template void sqrtMat<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); |
||||
template void sqrtMat<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); |
||||
template void sqrtMat<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); |
||||
template void sqrtMat<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); |
||||
template void sqrtMat<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); |
||||
template void sqrtMat<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); |
||||
template void sqrtMat<double>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); |
||||
} |
||||
|
||||
////////////////////////////////////////////////////////////////////////// |
||||
// logMat |
||||
|
||||
namespace cv { namespace gpu { namespace cudev |
||||
{ |
||||
template <typename T> struct TransformFunctorTraits< log_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)> |
||||
{ |
||||
}; |
||||
}}} |
||||
|
||||
namespace arithm |
||||
{ |
||||
template <typename T> |
||||
void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream) |
||||
{ |
||||
cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, log_func<T>(), WithOutMask(), stream); |
||||
} |
||||
|
||||
template void logMat<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); |
||||
template void logMat<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); |
||||
template void logMat<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); |
||||
template void logMat<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); |
||||
template void logMat<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); |
||||
template void logMat<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); |
||||
template void logMat<double>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); |
||||
} |
||||
|
||||
////////////////////////////////////////////////////////////////////////// |
||||
// expMat |
||||
|
||||
namespace arithm |
||||
{ |
||||
template <typename T> struct Exp : unary_function<T, T> |
||||
{ |
||||
__device__ __forceinline__ T operator ()(T x) const |
||||
{ |
||||
exp_func<T> f; |
||||
return saturate_cast<T>(f(x)); |
||||
} |
||||
|
||||
__device__ __forceinline__ Exp() {} |
||||
__device__ __forceinline__ Exp(const Exp& other) {} |
||||
}; |
||||
} |
||||
|
||||
namespace cv { namespace gpu { namespace cudev |
||||
{ |
||||
template <typename T> struct TransformFunctorTraits< arithm::Exp<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)> |
||||
{ |
||||
}; |
||||
}}} |
||||
|
||||
namespace arithm |
||||
{ |
||||
template <typename T> |
||||
void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream) |
||||
{ |
||||
cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, Exp<T>(), WithOutMask(), stream); |
||||
} |
||||
|
||||
template void expMat<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); |
||||
template void expMat<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); |
||||
template void expMat<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); |
||||
template void expMat<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); |
||||
template void expMat<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); |
||||
template void expMat<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); |
||||
template void expMat<double>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); |
||||
} |
||||
|
||||
////////////////////////////////////////////////////////////////////////// |
||||
// pow |
||||
|
||||
namespace arithm |
||||
{ |
||||
template<typename T, bool Signed = numeric_limits<T>::is_signed> struct PowOp : unary_function<T, T> |
||||
{ |
||||
float power; |
||||
|
||||
PowOp(double power_) : power(static_cast<float>(power_)) {} |
||||
|
||||
__device__ __forceinline__ T operator()(T e) const |
||||
{ |
||||
return saturate_cast<T>(__powf((float)e, power)); |
||||
} |
||||
}; |
||||
template<typename T> struct PowOp<T, true> : unary_function<T, T> |
||||
{ |
||||
float power; |
||||
|
||||
PowOp(double power_) : power(static_cast<float>(power_)) {} |
||||
|
||||
__device__ __forceinline__ T operator()(T e) const |
||||
{ |
||||
T res = saturate_cast<T>(__powf((float)e, power)); |
||||
|
||||
if ((e < 0) && (1 & static_cast<int>(power))) |
||||
res *= -1; |
||||
|
||||
return res; |
||||
} |
||||
}; |
||||
template<> struct PowOp<float> : unary_function<float, float> |
||||
{ |
||||
const float power; |
||||
|
||||
PowOp(double power_) : power(static_cast<float>(power_)) {} |
||||
|
||||
__device__ __forceinline__ float operator()(float e) const |
||||
{ |
||||
return __powf(::fabs(e), power); |
||||
} |
||||
}; |
||||
template<> struct PowOp<double> : unary_function<double, double> |
||||
{ |
||||
double power; |
||||
|
||||
PowOp(double power_) : power(power_) {} |
||||
|
||||
__device__ __forceinline__ double operator()(double e) const |
||||
{ |
||||
return ::pow(::fabs(e), power); |
||||
} |
||||
}; |
||||
} |
||||
|
||||
namespace cv { namespace gpu { namespace cudev |
||||
{ |
||||
template <typename T> struct TransformFunctorTraits< arithm::PowOp<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)> |
||||
{ |
||||
}; |
||||
}}} |
||||
|
||||
namespace arithm |
||||
{ |
||||
template<typename T> |
||||
void pow(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream) |
||||
{ |
||||
cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, PowOp<T>(power), WithOutMask(), stream); |
||||
} |
||||
|
||||
template void pow<uchar>(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); |
||||
template void pow<schar>(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); |
||||
template void pow<short>(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); |
||||
template void pow<ushort>(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); |
||||
template void pow<int>(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); |
||||
template void pow<float>(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); |
||||
template void pow<double>(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); |
||||
} |
||||
|
||||
#endif // CUDA_DISABLER |
@ -0,0 +1,246 @@ |
||||
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||
// |
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||
// |
||||
// By downloading, copying, installing or using the software you agree to this license. |
||||
// If you do not agree to this license, do not download, install, |
||||
// copy or use the software. |
||||
// |
||||
// |
||||
// License Agreement |
||||
// For Open Source Computer Vision Library |
||||
// |
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
||||
// Third party copyrights are property of their respective owners. |
||||
// |
||||
// Redistribution and use in source and binary forms, with or without modification, |
||||
// are permitted provided that the following conditions are met: |
||||
// |
||||
// * Redistribution's of source code must retain the above copyright notice, |
||||
// this list of conditions and the following disclaimer. |
||||
// |
||||
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||
// this list of conditions and the following disclaimer in the documentation |
||||
// and/or other materials provided with the distribution. |
||||
// |
||||
// * The name of the copyright holders may not be used to endorse or promote products |
||||
// derived from this software without specific prior written permission. |
||||
// |
||||
// This software is provided by the copyright holders and contributors "as is" and |
||||
// any express or implied warranties, including, but not limited to, the implied |
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||
// indirect, incidental, special, exemplary, or consequential damages |
||||
// (including, but not limited to, procurement of substitute goods or services; |
||||
// loss of use, data, or profits; or business interruption) however caused |
||||
// and on any theory of liability, whether in contract, strict liability, |
||||
// or tort (including negligence or otherwise) arising in any way out of |
||||
// the use of this software, even if advised of the possibility of such damage. |
||||
// |
||||
//M*/ |
||||
|
||||
#if !defined CUDA_DISABLER |
||||
|
||||
#include "opencv2/core/cuda/common.hpp" |
||||
#include "opencv2/core/cuda/vec_traits.hpp" |
||||
#include "opencv2/core/cuda/vec_math.hpp" |
||||
#include "opencv2/core/cuda/reduce.hpp" |
||||
#include "opencv2/core/cuda/emulation.hpp" |
||||
#include "opencv2/core/cuda/limits.hpp" |
||||
#include "opencv2/core/cuda/utility.hpp" |
||||
|
||||
using namespace cv::gpu; |
||||
using namespace cv::gpu::cudev; |
||||
|
||||
namespace minMax |
||||
{ |
||||
__device__ unsigned int blocks_finished = 0; |
||||
|
||||
// To avoid shared bank conflicts we convert each value into value of |
||||
// appropriate type (32 bits minimum) |
||||
template <typename T> struct MinMaxTypeTraits; |
||||
template <> struct MinMaxTypeTraits<uchar> { typedef int best_type; }; |
||||
template <> struct MinMaxTypeTraits<schar> { typedef int best_type; }; |
||||
template <> struct MinMaxTypeTraits<ushort> { typedef int best_type; }; |
||||
template <> struct MinMaxTypeTraits<short> { typedef int best_type; }; |
||||
template <> struct MinMaxTypeTraits<int> { typedef int best_type; }; |
||||
template <> struct MinMaxTypeTraits<float> { typedef float best_type; }; |
||||
template <> struct MinMaxTypeTraits<double> { typedef double best_type; }; |
||||
|
||||
template <int BLOCK_SIZE, typename R> |
||||
struct GlobalReduce |
||||
{ |
||||
static __device__ void run(R& mymin, R& mymax, R* minval, R* maxval, int tid, int bid, R* sminval, R* smaxval) |
||||
{ |
||||
#if __CUDA_ARCH__ >= 200 |
||||
if (tid == 0) |
||||
{ |
||||
Emulation::glob::atomicMin(minval, mymin); |
||||
Emulation::glob::atomicMax(maxval, mymax); |
||||
} |
||||
#else |
||||
__shared__ bool is_last; |
||||
|
||||
if (tid == 0) |
||||
{ |
||||
minval[bid] = mymin; |
||||
maxval[bid] = mymax; |
||||
|
||||
__threadfence(); |
||||
|
||||
unsigned int ticket = ::atomicAdd(&blocks_finished, 1); |
||||
is_last = (ticket == gridDim.x * gridDim.y - 1); |
||||
} |
||||
|
||||
__syncthreads(); |
||||
|
||||
if (is_last) |
||||
{ |
||||
int idx = ::min(tid, gridDim.x * gridDim.y - 1); |
||||
|
||||
mymin = minval[idx]; |
||||
mymax = maxval[idx]; |
||||
|
||||
const minimum<R> minOp; |
||||
const maximum<R> maxOp; |
||||
cudev::reduce<BLOCK_SIZE>(smem_tuple(sminval, smaxval), thrust::tie(mymin, mymax), tid, thrust::make_tuple(minOp, maxOp)); |
||||
|
||||
if (tid == 0) |
||||
{ |
||||
minval[0] = mymin; |
||||
maxval[0] = mymax; |
||||
|
||||
blocks_finished = 0; |
||||
} |
||||
} |
||||
#endif |
||||
} |
||||
}; |
||||
|
||||
template <int BLOCK_SIZE, typename T, typename R, class Mask> |
||||
__global__ void kernel(const PtrStepSz<T> src, const Mask mask, R* minval, R* maxval, const int twidth, const int theight) |
||||
{ |
||||
__shared__ R sminval[BLOCK_SIZE]; |
||||
__shared__ R smaxval[BLOCK_SIZE]; |
||||
|
||||
const int x0 = blockIdx.x * blockDim.x * twidth + threadIdx.x; |
||||
const int y0 = blockIdx.y * blockDim.y * theight + threadIdx.y; |
||||
|
||||
const int tid = threadIdx.y * blockDim.x + threadIdx.x; |
||||
const int bid = blockIdx.y * gridDim.x + blockIdx.x; |
||||
|
||||
R mymin = numeric_limits<R>::max(); |
||||
R mymax = -numeric_limits<R>::max(); |
||||
|
||||
const minimum<R> minOp; |
||||
const maximum<R> maxOp; |
||||
|
||||
for (int i = 0, y = y0; i < theight && y < src.rows; ++i, y += blockDim.y) |
||||
{ |
||||
const T* ptr = src.ptr(y); |
||||
|
||||
for (int j = 0, x = x0; j < twidth && x < src.cols; ++j, x += blockDim.x) |
||||
{ |
||||
if (mask(y, x)) |
||||
{ |
||||
const R srcVal = ptr[x]; |
||||
|
||||
mymin = minOp(mymin, srcVal); |
||||
mymax = maxOp(mymax, srcVal); |
||||
} |
||||
} |
||||
} |
||||
|
||||
cudev::reduce<BLOCK_SIZE>(smem_tuple(sminval, smaxval), thrust::tie(mymin, mymax), tid, thrust::make_tuple(minOp, maxOp)); |
||||
|
||||
GlobalReduce<BLOCK_SIZE, R>::run(mymin, mymax, minval, maxval, tid, bid, sminval, smaxval); |
||||
} |
||||
|
||||
const int threads_x = 32; |
||||
const int threads_y = 8; |
||||
|
||||
void getLaunchCfg(int cols, int rows, dim3& block, dim3& grid) |
||||
{ |
||||
block = dim3(threads_x, threads_y); |
||||
|
||||
grid = dim3(divUp(cols, block.x * block.y), |
||||
divUp(rows, block.y * block.x)); |
||||
|
||||
grid.x = ::min(grid.x, block.x); |
||||
grid.y = ::min(grid.y, block.y); |
||||
} |
||||
|
||||
void getBufSize(int cols, int rows, int& bufcols, int& bufrows) |
||||
{ |
||||
dim3 block, grid; |
||||
getLaunchCfg(cols, rows, block, grid); |
||||
|
||||
bufcols = grid.x * grid.y * sizeof(double); |
||||
bufrows = 2; |
||||
} |
||||
|
||||
__global__ void setDefaultKernel(int* minval_buf, int* maxval_buf) |
||||
{ |
||||
*minval_buf = numeric_limits<int>::max(); |
||||
*maxval_buf = numeric_limits<int>::min(); |
||||
} |
||||
__global__ void setDefaultKernel(float* minval_buf, float* maxval_buf) |
||||
{ |
||||
*minval_buf = numeric_limits<float>::max(); |
||||
*maxval_buf = -numeric_limits<float>::max(); |
||||
} |
||||
__global__ void setDefaultKernel(double* minval_buf, double* maxval_buf) |
||||
{ |
||||
*minval_buf = numeric_limits<double>::max(); |
||||
*maxval_buf = -numeric_limits<double>::max(); |
||||
} |
||||
|
||||
template <typename R> |
||||
void setDefault(R* minval_buf, R* maxval_buf) |
||||
{ |
||||
setDefaultKernel<<<1, 1>>>(minval_buf, maxval_buf); |
||||
} |
||||
|
||||
template <typename T> |
||||
void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf) |
||||
{ |
||||
typedef typename MinMaxTypeTraits<T>::best_type R; |
||||
|
||||
dim3 block, grid; |
||||
getLaunchCfg(src.cols, src.rows, block, grid); |
||||
|
||||
const int twidth = divUp(divUp(src.cols, grid.x), block.x); |
||||
const int theight = divUp(divUp(src.rows, grid.y), block.y); |
||||
|
||||
R* minval_buf = (R*) buf.ptr(0); |
||||
R* maxval_buf = (R*) buf.ptr(1); |
||||
|
||||
setDefault(minval_buf, maxval_buf); |
||||
|
||||
if (mask.data) |
||||
kernel<threads_x * threads_y><<<grid, block>>>((PtrStepSz<T>) src, SingleMask(mask), minval_buf, maxval_buf, twidth, theight); |
||||
else |
||||
kernel<threads_x * threads_y><<<grid, block>>>((PtrStepSz<T>) src, WithOutMask(), minval_buf, maxval_buf, twidth, theight); |
||||
|
||||
cudaSafeCall( cudaGetLastError() ); |
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() ); |
||||
|
||||
R minval_, maxval_; |
||||
cudaSafeCall( cudaMemcpy(&minval_, minval_buf, sizeof(R), cudaMemcpyDeviceToHost) ); |
||||
cudaSafeCall( cudaMemcpy(&maxval_, maxval_buf, sizeof(R), cudaMemcpyDeviceToHost) ); |
||||
*minval = minval_; |
||||
*maxval = maxval_; |
||||
} |
||||
|
||||
template void run<uchar >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf); |
||||
template void run<schar >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf); |
||||
template void run<ushort>(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf); |
||||
template void run<short >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf); |
||||
template void run<int >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf); |
||||
template void run<float >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf); |
||||
template void run<double>(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf); |
||||
} |
||||
|
||||
#endif // CUDA_DISABLER |
@ -0,0 +1,228 @@ |
||||
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||
// |
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||
// |
||||
// By downloading, copying, installing or using the software you agree to this license. |
||||
// If you do not agree to this license, do not download, install, |
||||
// copy or use the software. |
||||
// |
||||
// |
||||
// License Agreement |
||||
// For Open Source Computer Vision Library |
||||
// |
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
||||
// Third party copyrights are property of their respective owners. |
||||
// |
||||
// Redistribution and use in source and binary forms, with or without modification, |
||||
// are permitted provided that the following conditions are met: |
||||
// |
||||
// * Redistribution's of source code must retain the above copyright notice, |
||||
// this list of conditions and the following disclaimer. |
||||
// |
||||
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||
// this list of conditions and the following disclaimer in the documentation |
||||
// and/or other materials provided with the distribution. |
||||
// |
||||
// * The name of the copyright holders may not be used to endorse or promote products |
||||
// derived from this software without specific prior written permission. |
||||
// |
||||
// This software is provided by the copyright holders and contributors "as is" and |
||||
// any express or implied warranties, including, but not limited to, the implied |
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||
// indirect, incidental, special, exemplary, or consequential damages |
||||
// (including, but not limited to, procurement of substitute goods or services; |
||||
// loss of use, data, or profits; or business interruption) however caused |
||||
// and on any theory of liability, whether in contract, strict liability, |
||||
// or tort (including negligence or otherwise) arising in any way out of |
||||
// the use of this software, even if advised of the possibility of such damage. |
||||
// |
||||
//M*/ |
||||
|
||||
#if !defined CUDA_DISABLER |
||||
|
||||
#include "opencv2/core/cuda/common.hpp" |
||||
#include "opencv2/core/cuda/functional.hpp" |
||||
#include "opencv2/core/cuda/transform.hpp" |
||||
#include "opencv2/core/cuda/saturate_cast.hpp" |
||||
#include "opencv2/core/cuda/simd_functions.hpp" |
||||
|
||||
#include "arithm_func_traits.hpp" |
||||
|
||||
using namespace cv::gpu; |
||||
using namespace cv::gpu::cudev; |
||||
|
||||
////////////////////////////////////////////////////////////////////////// |
||||
// min |
||||
|
||||
namespace arithm |
||||
{ |
||||
struct VMin4 : binary_function<uint, uint, uint> |
||||
{ |
||||
__device__ __forceinline__ uint operator ()(uint a, uint b) const |
||||
{ |
||||
return vmin4(a, b); |
||||
} |
||||
|
||||
__device__ __forceinline__ VMin4() {} |
||||
__device__ __forceinline__ VMin4(const VMin4& other) {} |
||||
}; |
||||
|
||||
struct VMin2 : binary_function<uint, uint, uint> |
||||
{ |
||||
__device__ __forceinline__ uint operator ()(uint a, uint b) const |
||||
{ |
||||
return vmin2(a, b); |
||||
} |
||||
|
||||
__device__ __forceinline__ VMin2() {} |
||||
__device__ __forceinline__ VMin2(const VMin2& other) {} |
||||
}; |
||||
} |
||||
|
||||
namespace cv { namespace gpu { namespace cudev |
||||
{ |
||||
template <> struct TransformFunctorTraits< arithm::VMin4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)> |
||||
{ |
||||
}; |
||||
|
||||
template <> struct TransformFunctorTraits< arithm::VMin2 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)> |
||||
{ |
||||
}; |
||||
|
||||
template <typename T> struct TransformFunctorTraits< minimum<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)> |
||||
{ |
||||
}; |
||||
|
||||
template <typename T> struct TransformFunctorTraits< binder2nd< minimum<T> > > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)> |
||||
{ |
||||
}; |
||||
}}} |
||||
|
||||
namespace arithm |
||||
{ |
||||
void minMat_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream) |
||||
{ |
||||
cudev::transform(src1, src2, dst, VMin4(), WithOutMask(), stream); |
||||
} |
||||
|
||||
void minMat_v2(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream) |
||||
{ |
||||
cudev::transform(src1, src2, dst, VMin2(), WithOutMask(), stream); |
||||
} |
||||
|
||||
template <typename T> void minMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream) |
||||
{ |
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, minimum<T>(), WithOutMask(), stream); |
||||
} |
||||
|
||||
template void minMat<uchar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void minMat<schar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void minMat<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void minMat<short >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void minMat<int >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void minMat<float >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void minMat<double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
template <typename T> void minScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream) |
||||
{ |
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::gpu::cudev::bind2nd(minimum<T>(), src2), WithOutMask(), stream); |
||||
} |
||||
|
||||
template void minScalar<uchar >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void minScalar<schar >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void minScalar<ushort>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void minScalar<short >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void minScalar<int >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void minScalar<float >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void minScalar<double>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); |
||||
} |
||||
|
||||
////////////////////////////////////////////////////////////////////////// |
||||
// max |
||||
|
||||
namespace arithm |
||||
{ |
||||
struct VMax4 : binary_function<uint, uint, uint> |
||||
{ |
||||
__device__ __forceinline__ uint operator ()(uint a, uint b) const |
||||
{ |
||||
return vmax4(a, b); |
||||
} |
||||
|
||||
__device__ __forceinline__ VMax4() {} |
||||
__device__ __forceinline__ VMax4(const VMax4& other) {} |
||||
}; |
||||
|
||||
struct VMax2 : binary_function<uint, uint, uint> |
||||
{ |
||||
__device__ __forceinline__ uint operator ()(uint a, uint b) const |
||||
{ |
||||
return vmax2(a, b); |
||||
} |
||||
|
||||
__device__ __forceinline__ VMax2() {} |
||||
__device__ __forceinline__ VMax2(const VMax2& other) {} |
||||
}; |
||||
} |
||||
|
||||
namespace cv { namespace gpu { namespace cudev |
||||
{ |
||||
template <> struct TransformFunctorTraits< arithm::VMax4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)> |
||||
{ |
||||
}; |
||||
|
||||
template <> struct TransformFunctorTraits< arithm::VMax2 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)> |
||||
{ |
||||
}; |
||||
|
||||
template <typename T> struct TransformFunctorTraits< maximum<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)> |
||||
{ |
||||
}; |
||||
|
||||
template <typename T> struct TransformFunctorTraits< binder2nd< maximum<T> > > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)> |
||||
{ |
||||
}; |
||||
}}} |
||||
|
||||
namespace arithm |
||||
{ |
||||
void maxMat_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream) |
||||
{ |
||||
cudev::transform(src1, src2, dst, VMax4(), WithOutMask(), stream); |
||||
} |
||||
|
||||
void maxMat_v2(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream) |
||||
{ |
||||
cudev::transform(src1, src2, dst, VMax2(), WithOutMask(), stream); |
||||
} |
||||
|
||||
template <typename T> void maxMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream) |
||||
{ |
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, maximum<T>(), WithOutMask(), stream); |
||||
} |
||||
|
||||
template void maxMat<uchar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void maxMat<schar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void maxMat<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void maxMat<short >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void maxMat<int >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void maxMat<float >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void maxMat<double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
template <typename T> void maxScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream) |
||||
{ |
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::gpu::cudev::bind2nd(maximum<T>(), src2), WithOutMask(), stream); |
||||
} |
||||
|
||||
template void maxScalar<uchar >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void maxScalar<schar >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void maxScalar<ushort>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void maxScalar<short >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void maxScalar<int >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void maxScalar<float >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); |
||||
template void maxScalar<double>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); |
||||
} |
||||
|
||||
#endif // CUDA_DISABLER |
@ -0,0 +1,235 @@ |
||||
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||
// |
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||
// |
||||
// By downloading, copying, installing or using the software you agree to this license. |
||||
// If you do not agree to this license, do not download, install, |
||||
// copy or use the software. |
||||
// |
||||
// |
||||
// License Agreement |
||||
// For Open Source Computer Vision Library |
||||
// |
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
||||
// Third party copyrights are property of their respective owners. |
||||
// |
||||
// Redistribution and use in source and binary forms, with or without modification, |
||||
// are permitted provided that the following conditions are met: |
||||
// |
||||
// * Redistribution's of source code must retain the above copyright notice, |
||||
// this list of conditions and the following disclaimer. |
||||
// |
||||
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||
// this list of conditions and the following disclaimer in the documentation |
||||
// and/or other materials provided with the distribution. |
||||
// |
||||
// * The name of the copyright holders may not be used to endorse or promote products |
||||
// derived from this software without specific prior written permission. |
||||
// |
||||
// This software is provided by the copyright holders and contributors "as is" and |
||||
// any express or implied warranties, including, but not limited to, the implied |
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||
// indirect, incidental, special, exemplary, or consequential damages |
||||
// (including, but not limited to, procurement of substitute goods or services; |
||||
// loss of use, data, or profits; or business interruption) however caused |
||||
// and on any theory of liability, whether in contract, strict liability, |
||||
// or tort (including negligence or otherwise) arising in any way out of |
||||
// the use of this software, even if advised of the possibility of such damage. |
||||
// |
||||
//M*/ |
||||
|
||||
#if !defined CUDA_DISABLER |
||||
|
||||
#include "opencv2/core/cuda/common.hpp" |
||||
#include "opencv2/core/cuda/vec_traits.hpp" |
||||
#include "opencv2/core/cuda/vec_math.hpp" |
||||
#include "opencv2/core/cuda/reduce.hpp" |
||||
#include "opencv2/core/cuda/emulation.hpp" |
||||
#include "opencv2/core/cuda/limits.hpp" |
||||
#include "opencv2/core/cuda/utility.hpp" |
||||
|
||||
using namespace cv::gpu; |
||||
using namespace cv::gpu::cudev; |
||||
|
||||
namespace minMaxLoc |
||||
{ |
||||
// To avoid shared bank conflicts we convert each value into value of |
||||
// appropriate type (32 bits minimum) |
||||
template <typename T> struct MinMaxTypeTraits; |
||||
template <> struct MinMaxTypeTraits<unsigned char> { typedef int best_type; }; |
||||
template <> struct MinMaxTypeTraits<signed char> { typedef int best_type; }; |
||||
template <> struct MinMaxTypeTraits<unsigned short> { typedef int best_type; }; |
||||
template <> struct MinMaxTypeTraits<short> { typedef int best_type; }; |
||||
template <> struct MinMaxTypeTraits<int> { typedef int best_type; }; |
||||
template <> struct MinMaxTypeTraits<float> { typedef float best_type; }; |
||||
template <> struct MinMaxTypeTraits<double> { typedef double best_type; }; |
||||
|
||||
template <int BLOCK_SIZE, typename T, class Mask> |
||||
__global__ void kernel_pass_1(const PtrStepSz<T> src, const Mask mask, T* minval, T* maxval, unsigned int* minloc, unsigned int* maxloc, const int twidth, const int theight) |
||||
{ |
||||
typedef typename MinMaxTypeTraits<T>::best_type work_type; |
||||
|
||||
__shared__ work_type sminval[BLOCK_SIZE]; |
||||
__shared__ work_type smaxval[BLOCK_SIZE]; |
||||
__shared__ unsigned int sminloc[BLOCK_SIZE]; |
||||
__shared__ unsigned int smaxloc[BLOCK_SIZE]; |
||||
|
||||
const int x0 = blockIdx.x * blockDim.x * twidth + threadIdx.x; |
||||
const int y0 = blockIdx.y * blockDim.y * theight + threadIdx.y; |
||||
|
||||
const int tid = threadIdx.y * blockDim.x + threadIdx.x; |
||||
const int bid = blockIdx.y * gridDim.x + blockIdx.x; |
||||
|
||||
work_type mymin = numeric_limits<work_type>::max(); |
||||
work_type mymax = -numeric_limits<work_type>::max(); |
||||
unsigned int myminloc = 0; |
||||
unsigned int mymaxloc = 0; |
||||
|
||||
for (int i = 0, y = y0; i < theight && y < src.rows; ++i, y += blockDim.y) |
||||
{ |
||||
const T* ptr = src.ptr(y); |
||||
|
||||
for (int j = 0, x = x0; j < twidth && x < src.cols; ++j, x += blockDim.x) |
||||
{ |
||||
if (mask(y, x)) |
||||
{ |
||||
const work_type srcVal = ptr[x]; |
||||
|
||||
if (srcVal < mymin) |
||||
{ |
||||
mymin = srcVal; |
||||
myminloc = y * src.cols + x; |
||||
} |
||||
|
||||
if (srcVal > mymax) |
||||
{ |
||||
mymax = srcVal; |
||||
mymaxloc = y * src.cols + x; |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
reduceKeyVal<BLOCK_SIZE>(smem_tuple(sminval, smaxval), thrust::tie(mymin, mymax), |
||||
smem_tuple(sminloc, smaxloc), thrust::tie(myminloc, mymaxloc), |
||||
tid, |
||||
thrust::make_tuple(less<work_type>(), greater<work_type>())); |
||||
|
||||
if (tid == 0) |
||||
{ |
||||
minval[bid] = (T) mymin; |
||||
maxval[bid] = (T) mymax; |
||||
minloc[bid] = myminloc; |
||||
maxloc[bid] = mymaxloc; |
||||
} |
||||
} |
||||
template <int BLOCK_SIZE, typename T> |
||||
__global__ void kernel_pass_2(T* minval, T* maxval, unsigned int* minloc, unsigned int* maxloc, int count) |
||||
{ |
||||
typedef typename MinMaxTypeTraits<T>::best_type work_type; |
||||
|
||||
__shared__ work_type sminval[BLOCK_SIZE]; |
||||
__shared__ work_type smaxval[BLOCK_SIZE]; |
||||
__shared__ unsigned int sminloc[BLOCK_SIZE]; |
||||
__shared__ unsigned int smaxloc[BLOCK_SIZE]; |
||||
|
||||
unsigned int idx = ::min(threadIdx.x, count - 1); |
||||
|
||||
work_type mymin = minval[idx]; |
||||
work_type mymax = maxval[idx]; |
||||
unsigned int myminloc = minloc[idx]; |
||||
unsigned int mymaxloc = maxloc[idx]; |
||||
|
||||
reduceKeyVal<BLOCK_SIZE>(smem_tuple(sminval, smaxval), thrust::tie(mymin, mymax), |
||||
smem_tuple(sminloc, smaxloc), thrust::tie(myminloc, mymaxloc), |
||||
threadIdx.x, |
||||
thrust::make_tuple(less<work_type>(), greater<work_type>())); |
||||
|
||||
if (threadIdx.x == 0) |
||||
{ |
||||
minval[0] = (T) mymin; |
||||
maxval[0] = (T) mymax; |
||||
minloc[0] = myminloc; |
||||
maxloc[0] = mymaxloc; |
||||
} |
||||
} |
||||
|
||||
const int threads_x = 32; |
||||
const int threads_y = 8; |
||||
|
||||
void getLaunchCfg(int cols, int rows, dim3& block, dim3& grid) |
||||
{ |
||||
block = dim3(threads_x, threads_y); |
||||
|
||||
grid = dim3(divUp(cols, block.x * block.y), |
||||
divUp(rows, block.y * block.x)); |
||||
|
||||
grid.x = ::min(grid.x, block.x); |
||||
grid.y = ::min(grid.y, block.y); |
||||
} |
||||
|
||||
void getBufSize(int cols, int rows, size_t elem_size, int& b1cols, int& b1rows, int& b2cols, int& b2rows) |
||||
{ |
||||
dim3 block, grid; |
||||
getLaunchCfg(cols, rows, block, grid); |
||||
|
||||
// For values |
||||
b1cols = (int)(grid.x * grid.y * elem_size); |
||||
b1rows = 2; |
||||
|
||||
// For locations |
||||
b2cols = grid.x * grid.y * sizeof(int); |
||||
b2rows = 2; |
||||
} |
||||
|
||||
template <typename T> |
||||
void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf) |
||||
{ |
||||
dim3 block, grid; |
||||
getLaunchCfg(src.cols, src.rows, block, grid); |
||||
|
||||
const int twidth = divUp(divUp(src.cols, grid.x), block.x); |
||||
const int theight = divUp(divUp(src.rows, grid.y), block.y); |
||||
|
||||
T* minval_buf = (T*) valbuf.ptr(0); |
||||
T* maxval_buf = (T*) valbuf.ptr(1); |
||||
unsigned int* minloc_buf = locbuf.ptr(0); |
||||
unsigned int* maxloc_buf = locbuf.ptr(1); |
||||
|
||||
if (mask.data) |
||||
kernel_pass_1<threads_x * threads_y><<<grid, block>>>((PtrStepSz<T>) src, SingleMask(mask), minval_buf, maxval_buf, minloc_buf, maxloc_buf, twidth, theight); |
||||
else |
||||
kernel_pass_1<threads_x * threads_y><<<grid, block>>>((PtrStepSz<T>) src, WithOutMask(), minval_buf, maxval_buf, minloc_buf, maxloc_buf, twidth, theight); |
||||
|
||||
cudaSafeCall( cudaGetLastError() ); |
||||
|
||||
kernel_pass_2<threads_x * threads_y><<<1, threads_x * threads_y>>>(minval_buf, maxval_buf, minloc_buf, maxloc_buf, grid.x * grid.y); |
||||
cudaSafeCall( cudaGetLastError() ); |
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() ); |
||||
|
||||
T minval_, maxval_; |
||||
cudaSafeCall( cudaMemcpy(&minval_, minval_buf, sizeof(T), cudaMemcpyDeviceToHost) ); |
||||
cudaSafeCall( cudaMemcpy(&maxval_, maxval_buf, sizeof(T), cudaMemcpyDeviceToHost) ); |
||||
*minval = minval_; |
||||
*maxval = maxval_; |
||||
|
||||
unsigned int minloc_, maxloc_; |
||||
cudaSafeCall( cudaMemcpy(&minloc_, minloc_buf, sizeof(unsigned int), cudaMemcpyDeviceToHost) ); |
||||
cudaSafeCall( cudaMemcpy(&maxloc_, maxloc_buf, sizeof(unsigned int), cudaMemcpyDeviceToHost) ); |
||||
minloc[1] = minloc_ / src.cols; minloc[0] = minloc_ - minloc[1] * src.cols; |
||||
maxloc[1] = maxloc_ / src.cols; maxloc[0] = maxloc_ - maxloc[1] * src.cols; |
||||
} |
||||
|
||||
template void run<unsigned char >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf); |
||||
template void run<signed char >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf); |
||||
template void run<unsigned short>(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf); |
||||
template void run<short >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf); |
||||
template void run<int >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf); |
||||
template void run<float >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf); |
||||
template void run<double>(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf); |
||||
} |
||||
|
||||
#endif // CUDA_DISABLER |
@ -0,0 +1,211 @@ |
||||
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||
// |
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||
// |
||||
// By downloading, copying, installing or using the software you agree to this license. |
||||
// If you do not agree to this license, do not download, install, |
||||
// copy or use the software. |
||||
// |
||||
// |
||||
// License Agreement |
||||
// For Open Source Computer Vision Library |
||||
// |
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
||||
// Third party copyrights are property of their respective owners. |
||||
// |
||||
// Redistribution and use in source and binary forms, with or without modification, |
||||
// are permitted provided that the following conditions are met: |
||||
// |
||||
// * Redistribution's of source code must retain the above copyright notice, |
||||
// this list of conditions and the following disclaimer. |
||||
// |
||||
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||
// this list of conditions and the following disclaimer in the documentation |
||||
// and/or other materials provided with the distribution. |
||||
// |
||||
// * The name of the copyright holders may not be used to endorse or promote products |
||||
// derived from this software without specific prior written permission. |
||||
// |
||||
// This software is provided by the copyright holders and contributors "as is" and |
||||
// any express or implied warranties, including, but not limited to, the implied |
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||
// indirect, incidental, special, exemplary, or consequential damages |
||||
// (including, but not limited to, procurement of substitute goods or services; |
||||
// loss of use, data, or profits; or business interruption) however caused |
||||
// and on any theory of liability, whether in contract, strict liability, |
||||
// or tort (including negligence or otherwise) arising in any way out of |
||||
// the use of this software, even if advised of the possibility of such damage. |
||||
// |
||||
//M*/ |
||||
|
||||
#if !defined CUDA_DISABLER |
||||
|
||||
#include "opencv2/core/cuda/common.hpp" |
||||
#include "opencv2/core/cuda/functional.hpp" |
||||
#include "opencv2/core/cuda/transform.hpp" |
||||
#include "opencv2/core/cuda/saturate_cast.hpp" |
||||
#include "opencv2/core/cuda/simd_functions.hpp" |
||||
|
||||
#include "arithm_func_traits.hpp" |
||||
|
||||
using namespace cv::gpu; |
||||
using namespace cv::gpu::cudev; |
||||
|
||||
namespace arithm |
||||
{ |
||||
struct Mul_8uc4_32f : binary_function<uint, float, uint> |
||||
{ |
||||
__device__ __forceinline__ uint operator ()(uint a, float b) const |
||||
{ |
||||
uint res = 0; |
||||
|
||||
res |= (saturate_cast<uchar>((0xffu & (a )) * b) ); |
||||
res |= (saturate_cast<uchar>((0xffu & (a >> 8)) * b) << 8); |
||||
res |= (saturate_cast<uchar>((0xffu & (a >> 16)) * b) << 16); |
||||
res |= (saturate_cast<uchar>((0xffu & (a >> 24)) * b) << 24); |
||||
|
||||
return res; |
||||
} |
||||
|
||||
__device__ __forceinline__ Mul_8uc4_32f() {} |
||||
__device__ __forceinline__ Mul_8uc4_32f(const Mul_8uc4_32f& other) {} |
||||
}; |
||||
|
||||
struct Mul_16sc4_32f : binary_function<short4, float, short4> |
||||
{ |
||||
__device__ __forceinline__ short4 operator ()(short4 a, float b) const |
||||
{ |
||||
return make_short4(saturate_cast<short>(a.x * b), saturate_cast<short>(a.y * b), |
||||
saturate_cast<short>(a.z * b), saturate_cast<short>(a.w * b)); |
||||
} |
||||
|
||||
__device__ __forceinline__ Mul_16sc4_32f() {} |
||||
__device__ __forceinline__ Mul_16sc4_32f(const Mul_16sc4_32f& other) {} |
||||
}; |
||||
|
||||
template <typename T, typename D> struct Mul : binary_function<T, T, D> |
||||
{ |
||||
__device__ __forceinline__ D operator ()(T a, T b) const |
||||
{ |
||||
return saturate_cast<D>(a * b); |
||||
} |
||||
|
||||
__device__ __forceinline__ Mul() {} |
||||
__device__ __forceinline__ Mul(const Mul& other) {} |
||||
}; |
||||
|
||||
template <typename T, typename S, typename D> struct MulScale : binary_function<T, T, D> |
||||
{ |
||||
S scale; |
||||
|
||||
explicit MulScale(S scale_) : scale(scale_) {} |
||||
|
||||
__device__ __forceinline__ D operator ()(T a, T b) const |
||||
{ |
||||
return saturate_cast<D>(scale * a * b); |
||||
} |
||||
}; |
||||
} |
||||
|
||||
namespace cv { namespace gpu { namespace cudev |
||||
{ |
||||
template <> struct TransformFunctorTraits<arithm::Mul_8uc4_32f> : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)> |
||||
{ |
||||
}; |
||||
|
||||
template <typename T, typename D> struct TransformFunctorTraits< arithm::Mul<T, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)> |
||||
{ |
||||
}; |
||||
|
||||
template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::MulScale<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)> |
||||
{ |
||||
}; |
||||
}}} |
||||
|
||||
namespace arithm |
||||
{ |
||||
void mulMat_8uc4_32f(PtrStepSz<uint> src1, PtrStepSzf src2, PtrStepSz<uint> dst, cudaStream_t stream) |
||||
{ |
||||
cudev::transform(src1, src2, dst, Mul_8uc4_32f(), WithOutMask(), stream); |
||||
} |
||||
|
||||
void mulMat_16sc4_32f(PtrStepSz<short4> src1, PtrStepSzf src2, PtrStepSz<short4> dst, cudaStream_t stream) |
||||
{ |
||||
cudev::transform(src1, src2, dst, Mul_16sc4_32f(), WithOutMask(), stream); |
||||
} |
||||
|
||||
template <typename T, typename S, typename D> |
||||
void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream) |
||||
{ |
||||
if (scale == 1) |
||||
{ |
||||
Mul<T, D> op; |
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, op, WithOutMask(), stream); |
||||
} |
||||
else |
||||
{ |
||||
MulScale<T, S, D> op(static_cast<S>(scale)); |
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, op, WithOutMask(), stream); |
||||
} |
||||
} |
||||
|
||||
template void mulMat<uchar, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void mulMat<uchar, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void mulMat<uchar, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void mulMat<uchar, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void mulMat<uchar, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void mulMat<uchar, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void mulMat<uchar, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
|
||||
template void mulMat<schar, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void mulMat<schar, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void mulMat<schar, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void mulMat<schar, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void mulMat<schar, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void mulMat<schar, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void mulMat<schar, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
|
||||
//template void mulMat<ushort, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
//template void mulMat<ushort, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void mulMat<ushort, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void mulMat<ushort, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void mulMat<ushort, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void mulMat<ushort, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void mulMat<ushort, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
|
||||
//template void mulMat<short, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
//template void mulMat<short, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void mulMat<short, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void mulMat<short, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void mulMat<short, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void mulMat<short, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void mulMat<short, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
|
||||
//template void mulMat<int, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
//template void mulMat<int, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
//template void mulMat<int, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
//template void mulMat<int, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void mulMat<int, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void mulMat<int, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void mulMat<int, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
|
||||
//template void mulMat<float, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
//template void mulMat<float, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
//template void mulMat<float, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
//template void mulMat<float, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
//template void mulMat<float, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void mulMat<float, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void mulMat<float, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
|
||||
//template void mulMat<double, double, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
//template void mulMat<double, double, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
//template void mulMat<double, double, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
//template void mulMat<double, double, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
//template void mulMat<double, double, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
//template void mulMat<double, double, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
template void mulMat<double, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); |
||||
} |
||||
|
||||
#endif // CUDA_DISABLER |
@ -0,0 +1,144 @@ |
||||
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||
// |
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||
// |
||||
// By downloading, copying, installing or using the software you agree to this license. |
||||
// If you do not agree to this license, do not download, install, |
||||
// copy or use the software. |
||||
// |
||||
// |
||||
// License Agreement |
||||
// For Open Source Computer Vision Library |
||||
// |
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
||||
// Third party copyrights are property of their respective owners. |
||||
// |
||||
// Redistribution and use in source and binary forms, with or without modification, |
||||
// are permitted provided that the following conditions are met: |
||||
// |
||||
// * Redistribution's of source code must retain the above copyright notice, |
||||
// this list of conditions and the following disclaimer. |
||||
// |
||||
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||
// this list of conditions and the following disclaimer in the documentation |
||||
// and/or other materials provided with the distribution. |
||||
// |
||||
// * The name of the copyright holders may not be used to endorse or promote products |
||||
// derived from this software without specific prior written permission. |
||||
// |
||||
// This software is provided by the copyright holders and contributors "as is" and |
||||
// any express or implied warranties, including, but not limited to, the implied |
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||
// indirect, incidental, special, exemplary, or consequential damages |
||||
// (including, but not limited to, procurement of substitute goods or services; |
||||
// loss of use, data, or profits; or business interruption) however caused |
||||
// and on any theory of liability, whether in contract, strict liability, |
||||
// or tort (including negligence or otherwise) arising in any way out of |
||||
// the use of this software, even if advised of the possibility of such damage. |
||||
// |
||||
//M*/ |
||||
|
||||
#if !defined CUDA_DISABLER |
||||
|
||||
#include "opencv2/core/cuda/common.hpp" |
||||
#include "opencv2/core/cuda/functional.hpp" |
||||
#include "opencv2/core/cuda/transform.hpp" |
||||
#include "opencv2/core/cuda/saturate_cast.hpp" |
||||
#include "opencv2/core/cuda/simd_functions.hpp" |
||||
|
||||
#include "arithm_func_traits.hpp" |
||||
|
||||
using namespace cv::gpu; |
||||
using namespace cv::gpu::cudev; |
||||
|
||||
namespace arithm |
||||
{ |
||||
template <typename T, typename S, typename D> struct MulScalar : unary_function<T, D> |
||||
{ |
||||
S val; |
||||
|
||||
explicit MulScalar(S val_) : val(val_) {} |
||||
|
||||
__device__ __forceinline__ D operator ()(T a) const |
||||
{ |
||||
return saturate_cast<D>(a * val); |
||||
} |
||||
}; |
||||
} |
||||
|
||||
namespace cv { namespace gpu { namespace cudev |
||||
{ |
||||
template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::MulScalar<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)> |
||||
{ |
||||
}; |
||||
}}} |
||||
|
||||
namespace arithm |
||||
{ |
||||
template <typename T, typename S, typename D> |
||||
void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream) |
||||
{ |
||||
MulScalar<T, S, D> op(static_cast<S>(val)); |
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, WithOutMask(), stream); |
||||
} |
||||
|
||||
template void mulScalar<uchar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void mulScalar<uchar, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void mulScalar<uchar, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void mulScalar<uchar, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void mulScalar<uchar, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void mulScalar<uchar, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void mulScalar<uchar, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
template void mulScalar<schar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void mulScalar<schar, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void mulScalar<schar, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void mulScalar<schar, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void mulScalar<schar, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void mulScalar<schar, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void mulScalar<schar, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
//template void mulScalar<ushort, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
//template void mulScalar<ushort, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void mulScalar<ushort, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void mulScalar<ushort, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void mulScalar<ushort, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void mulScalar<ushort, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void mulScalar<ushort, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
//template void mulScalar<short, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
//template void mulScalar<short, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void mulScalar<short, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void mulScalar<short, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void mulScalar<short, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void mulScalar<short, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void mulScalar<short, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
//template void mulScalar<int, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
//template void mulScalar<int, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
//template void mulScalar<int, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
//template void mulScalar<int, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void mulScalar<int, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void mulScalar<int, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void mulScalar<int, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
//template void mulScalar<float, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
//template void mulScalar<float, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
//template void mulScalar<float, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
//template void mulScalar<float, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
//template void mulScalar<float, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void mulScalar<float, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void mulScalar<float, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
|
||||
//template void mulScalar<double, double, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
//template void mulScalar<double, double, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
//template void mulScalar<double, double, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
//template void mulScalar<double, double, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
//template void mulScalar<double, double, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
//template void mulScalar<double, double, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
template void mulScalar<double, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); |
||||
} |
||||
|
||||
#endif // CUDA_DISABLER |
@ -0,0 +1,330 @@ |
||||
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||
// |
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||
// |
||||
// By downloading, copying, installing or using the software you agree to this license. |
||||
// If you do not agree to this license, do not download, install, |
||||
// copy or use the software. |
||||
// |
||||
// |
||||
// License Agreement |
||||
// For Open Source Computer Vision Library |
||||
// |
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
||||
// Third party copyrights are property of their respective owners. |
||||
// |
||||
// Redistribution and use in source and binary forms, with or without modification, |
||||
// are permitted provided that the following conditions are met: |
||||
// |
||||
// * Redistribution's of source code must retain the above copyright notice, |
||||
// this list of conditions and the following disclaimer. |
||||
// |
||||
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||
// this list of conditions and the following disclaimer in the documentation |
||||
// and/or other materials provided with the distribution. |
||||
// |
||||
// * The name of the copyright holders may not be used to endorse or promote products |
||||
// derived from this software without specific prior written permission. |
||||
// |
||||
// This software is provided by the copyright holders and contributors "as is" and |
||||
// any express or implied warranties, including, but not limited to, the implied |
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||
// indirect, incidental, special, exemplary, or consequential damages |
||||
// (including, but not limited to, procurement of substitute goods or services; |
||||
// loss of use, data, or profits; or business interruption) however caused |
||||
// and on any theory of liability, whether in contract, strict liability, |
||||
// or tort (including negligence or otherwise) arising in any way out of |
||||
// the use of this software, even if advised of the possibility of such damage. |
||||
// |
||||
//M*/ |
||||
|
||||
#if !defined CUDA_DISABLER |
||||
|
||||
#include "opencv2/core/cuda/common.hpp" |
||||
#include "opencv2/core/cuda/saturate_cast.hpp" |
||||
#include "opencv2/core/cuda/vec_traits.hpp" |
||||
#include "opencv2/core/cuda/vec_math.hpp" |
||||
#include "opencv2/core/cuda/reduce.hpp" |
||||
#include "opencv2/core/cuda/limits.hpp" |
||||
|
||||
#include "unroll_detail.hpp" |
||||
|
||||
using namespace cv::gpu; |
||||
using namespace cv::gpu::cudev; |
||||
|
||||
namespace reduce |
||||
{ |
||||
struct Sum |
||||
{ |
||||
template <typename T> |
||||
__device__ __forceinline__ T startValue() const |
||||
{ |
||||
return VecTraits<T>::all(0); |
||||
} |
||||
|
||||
template <typename T> |
||||
__device__ __forceinline__ T operator ()(T a, T b) const |
||||
{ |
||||
return a + b; |
||||
} |
||||
|
||||
template <typename T> |
||||
__device__ __forceinline__ T result(T r, double) const |
||||
{ |
||||
return r; |
||||
} |
||||
|
||||
__device__ __forceinline__ Sum() {} |
||||
__device__ __forceinline__ Sum(const Sum&) {} |
||||
}; |
||||
|
||||
struct Avg |
||||
{ |
||||
template <typename T> |
||||
__device__ __forceinline__ T startValue() const |
||||
{ |
||||
return VecTraits<T>::all(0); |
||||
} |
||||
|
||||
template <typename T> |
||||
__device__ __forceinline__ T operator ()(T a, T b) const |
||||
{ |
||||
return a + b; |
||||
} |
||||
|
||||
template <typename T> |
||||
__device__ __forceinline__ typename TypeVec<double, VecTraits<T>::cn>::vec_type result(T r, double sz) const |
||||
{ |
||||
return r / sz; |
||||
} |
||||
|
||||
__device__ __forceinline__ Avg() {} |
||||
__device__ __forceinline__ Avg(const Avg&) {} |
||||
}; |
||||
|
||||
struct Min |
||||
{ |
||||
template <typename T> |
||||
__device__ __forceinline__ T startValue() const |
||||
{ |
||||
return VecTraits<T>::all(numeric_limits<typename VecTraits<T>::elem_type>::max()); |
||||
} |
||||
|
||||
template <typename T> |
||||
__device__ __forceinline__ T operator ()(T a, T b) const |
||||
{ |
||||
minimum<T> minOp; |
||||
return minOp(a, b); |
||||
} |
||||
|
||||
template <typename T> |
||||
__device__ __forceinline__ T result(T r, double) const |
||||
{ |
||||
return r; |
||||
} |
||||
|
||||
__device__ __forceinline__ Min() {} |
||||
__device__ __forceinline__ Min(const Min&) {} |
||||
}; |
||||
|
||||
struct Max |
||||
{ |
||||
template <typename T> |
||||
__device__ __forceinline__ T startValue() const |
||||
{ |
||||
return VecTraits<T>::all(-numeric_limits<typename VecTraits<T>::elem_type>::max()); |
||||
} |
||||
|
||||
template <typename T> |
||||
__device__ __forceinline__ T operator ()(T a, T b) const |
||||
{ |
||||
maximum<T> maxOp; |
||||
return maxOp(a, b); |
||||
} |
||||
|
||||
template <typename T> |
||||
__device__ __forceinline__ T result(T r, double) const |
||||
{ |
||||
return r; |
||||
} |
||||
|
||||
__device__ __forceinline__ Max() {} |
||||
__device__ __forceinline__ Max(const Max&) {} |
||||
}; |
||||
|
||||
/////////////////////////////////////////////////////////// |
||||
|
||||
template <typename T, typename S, typename D, class Op> |
||||
__global__ void rowsKernel(const PtrStepSz<T> src, D* dst, const Op op) |
||||
{ |
||||
__shared__ S smem[16 * 16]; |
||||
|
||||
const int x = blockIdx.x * 16 + threadIdx.x; |
||||
|
||||
S myVal = op.template startValue<S>(); |
||||
|
||||
if (x < src.cols) |
||||
{ |
||||
for (int y = threadIdx.y; y < src.rows; y += 16) |
||||
{ |
||||
S srcVal = src(y, x); |
||||
myVal = op(myVal, srcVal); |
||||
} |
||||
} |
||||
|
||||
smem[threadIdx.x * 16 + threadIdx.y] = myVal; |
||||
|
||||
__syncthreads(); |
||||
|
||||
volatile S* srow = smem + threadIdx.y * 16; |
||||
|
||||
myVal = srow[threadIdx.x]; |
||||
cudev::reduce<16>(srow, myVal, threadIdx.x, op); |
||||
|
||||
if (threadIdx.x == 0) |
||||
srow[0] = myVal; |
||||
|
||||
__syncthreads(); |
||||
|
||||
if (threadIdx.y == 0 && x < src.cols) |
||||
dst[x] = (D) op.result(smem[threadIdx.x * 16], src.rows); |
||||
} |
||||
|
||||
template <typename T, typename S, typename D, class Op> |
||||
void rowsCaller(PtrStepSz<T> src, D* dst, cudaStream_t stream) |
||||
{ |
||||
const dim3 block(16, 16); |
||||
const dim3 grid(divUp(src.cols, block.x)); |
||||
|
||||
Op op; |
||||
rowsKernel<T, S, D, Op><<<grid, block, 0, stream>>>(src, dst, op); |
||||
cudaSafeCall( cudaGetLastError() ); |
||||
|
||||
if (stream == 0) |
||||
cudaSafeCall( cudaDeviceSynchronize() ); |
||||
} |
||||
|
||||
template <typename T, typename S, typename D> |
||||
void rows(PtrStepSzb src, void* dst, int op, cudaStream_t stream) |
||||
{ |
||||
typedef void (*func_t)(PtrStepSz<T> src, D* dst, cudaStream_t stream); |
||||
static const func_t funcs[] = |
||||
{ |
||||
rowsCaller<T, S, D, Sum>, |
||||
rowsCaller<T, S, D, Avg>, |
||||
rowsCaller<T, S, D, Max>, |
||||
rowsCaller<T, S, D, Min> |
||||
}; |
||||
|
||||
funcs[op]((PtrStepSz<T>) src, (D*) dst, stream); |
||||
} |
||||
|
||||
template void rows<unsigned char, int, unsigned char>(PtrStepSzb src, void* dst, int op, cudaStream_t stream); |
||||
template void rows<unsigned char, int, int>(PtrStepSzb src, void* dst, int op, cudaStream_t stream); |
||||
template void rows<unsigned char, float, float>(PtrStepSzb src, void* dst, int op, cudaStream_t stream); |
||||
template void rows<unsigned char, double, double>(PtrStepSzb src, void* dst, int op, cudaStream_t stream); |
||||
|
||||
template void rows<unsigned short, int, unsigned short>(PtrStepSzb src, void* dst, int op, cudaStream_t stream); |
||||
template void rows<unsigned short, int, int>(PtrStepSzb src, void* dst, int op, cudaStream_t stream); |
||||
template void rows<unsigned short, float, float>(PtrStepSzb src, void* dst, int op, cudaStream_t stream); |
||||
template void rows<unsigned short, double, double>(PtrStepSzb src, void* dst, int op, cudaStream_t stream); |
||||
|
||||
template void rows<short, int, short>(PtrStepSzb src, void* dst, int op, cudaStream_t stream); |
||||
template void rows<short, int, int>(PtrStepSzb src, void* dst, int op, cudaStream_t stream); |
||||
template void rows<short, float, float>(PtrStepSzb src, void* dst, int op, cudaStream_t stream); |
||||
template void rows<short, double, double>(PtrStepSzb src, void* dst, int op, cudaStream_t stream); |
||||
|
||||
template void rows<int, int, int>(PtrStepSzb src, void* dst, int op, cudaStream_t stream); |
||||
template void rows<int, float, float>(PtrStepSzb src, void* dst, int op, cudaStream_t stream); |
||||
template void rows<int, double, double>(PtrStepSzb src, void* dst, int op, cudaStream_t stream); |
||||
|
||||
template void rows<float, float, float>(PtrStepSzb src, void* dst, int op, cudaStream_t stream); |
||||
template void rows<float, double, double>(PtrStepSzb src, void* dst, int op, cudaStream_t stream); |
||||
|
||||
template void rows<double, double, double>(PtrStepSzb src, void* dst, int op, cudaStream_t stream); |
||||
|
||||
/////////////////////////////////////////////////////////// |
||||
|
||||
template <int BLOCK_SIZE, typename T, typename S, typename D, int cn, class Op> |
||||
__global__ void colsKernel(const PtrStepSz<typename TypeVec<T, cn>::vec_type> src, typename TypeVec<D, cn>::vec_type* dst, const Op op) |
||||
{ |
||||
typedef typename TypeVec<T, cn>::vec_type src_type; |
||||
typedef typename TypeVec<S, cn>::vec_type work_type; |
||||
typedef typename TypeVec<D, cn>::vec_type dst_type; |
||||
|
||||
__shared__ S smem[BLOCK_SIZE * cn]; |
||||
|
||||
const int y = blockIdx.x; |
||||
|
||||
const src_type* srcRow = src.ptr(y); |
||||
|
||||
work_type myVal = op.template startValue<work_type>(); |
||||
|
||||
for (int x = threadIdx.x; x < src.cols; x += BLOCK_SIZE) |
||||
myVal = op(myVal, saturate_cast<work_type>(srcRow[x])); |
||||
|
||||
cudev::reduce<BLOCK_SIZE>(detail::Unroll<cn>::template smem_tuple<BLOCK_SIZE>(smem), detail::Unroll<cn>::tie(myVal), threadIdx.x, detail::Unroll<cn>::op(op)); |
||||
|
||||
if (threadIdx.x == 0) |
||||
dst[y] = saturate_cast<dst_type>(op.result(myVal, src.cols)); |
||||
} |
||||
|
||||
template <typename T, typename S, typename D, int cn, class Op> void colsCaller(PtrStepSzb src, void* dst, cudaStream_t stream) |
||||
{ |
||||
const int BLOCK_SIZE = 256; |
||||
|
||||
const dim3 block(BLOCK_SIZE); |
||||
const dim3 grid(src.rows); |
||||
|
||||
Op op; |
||||
colsKernel<BLOCK_SIZE, T, S, D, cn, Op><<<grid, block, 0, stream>>>((PtrStepSz<typename TypeVec<T, cn>::vec_type>) src, (typename TypeVec<D, cn>::vec_type*) dst, op); |
||||
cudaSafeCall( cudaGetLastError() ); |
||||
|
||||
if (stream == 0) |
||||
cudaSafeCall( cudaDeviceSynchronize() ); |
||||
|
||||
} |
||||
|
||||
template <typename T, typename S, typename D> void cols(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream) |
||||
{ |
||||
typedef void (*func_t)(PtrStepSzb src, void* dst, cudaStream_t stream); |
||||
static const func_t funcs[5][4] = |
||||
{ |
||||
{0,0,0,0}, |
||||
{colsCaller<T, S, D, 1, Sum>, colsCaller<T, S, D, 1, Avg>, colsCaller<T, S, D, 1, Max>, colsCaller<T, S, D, 1, Min>}, |
||||
{colsCaller<T, S, D, 2, Sum>, colsCaller<T, S, D, 2, Avg>, colsCaller<T, S, D, 2, Max>, colsCaller<T, S, D, 2, Min>}, |
||||
{colsCaller<T, S, D, 3, Sum>, colsCaller<T, S, D, 3, Avg>, colsCaller<T, S, D, 3, Max>, colsCaller<T, S, D, 3, Min>}, |
||||
{colsCaller<T, S, D, 4, Sum>, colsCaller<T, S, D, 4, Avg>, colsCaller<T, S, D, 4, Max>, colsCaller<T, S, D, 4, Min>}, |
||||
}; |
||||
|
||||
funcs[cn][op](src, dst, stream); |
||||
} |
||||
|
||||
template void cols<unsigned char, int, unsigned char>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream); |
||||
template void cols<unsigned char, int, int>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream); |
||||
template void cols<unsigned char, float, float>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream); |
||||
template void cols<unsigned char, double, double>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream); |
||||
|
||||
template void cols<unsigned short, int, unsigned short>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream); |
||||
template void cols<unsigned short, int, int>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream); |
||||
template void cols<unsigned short, float, float>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream); |
||||
template void cols<unsigned short, double, double>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream); |
||||
|
||||
template void cols<short, int, short>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream); |
||||
template void cols<short, int, int>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream); |
||||
template void cols<short, float, float>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream); |
||||
template void cols<short, double, double>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream); |
||||
|
||||
template void cols<int, int, int>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream); |
||||
template void cols<int, float, float>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream); |
||||
template void cols<int, double, double>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream); |
||||
|
||||
template void cols<float, float, float>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream); |
||||
template void cols<float, double, double>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream); |
||||
|
||||
template void cols<double, double, double>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream); |
||||
} |
||||
|
||||
#endif /* CUDA_DISABLER */ |
@ -0,0 +1,185 @@ |
||||
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||
// |
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||
// |
||||
// By downloading, copying, installing or using the software you agree to this license. |
||||
// If you do not agree to this license, do not download, install, |
||||
// copy or use the software. |
||||
// |
||||
// |
||||
// License Agreement |
||||
// For Open Source Computer Vision Library |
||||
// |
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
||||
// Third party copyrights are property of their respective owners. |
||||
// |
||||
// Redistribution and use in source and binary forms, with or without modification, |
||||
// are permitted provided that the following conditions are met: |
||||
// |
||||
// * Redistribution's of source code must retain the above copyright notice, |
||||
// this list of conditions and the following disclaimer. |
||||
// |
||||
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||
// this list of conditions and the following disclaimer in the documentation |
||||
// and/or other materials provided with the distribution. |
||||
// |
||||
// * The name of the copyright holders may not be used to endorse or promote products |
||||
// derived from this software without specific prior written permission. |
||||
// |
||||
// This software is provided by the copyright holders and contributors "as is" and |
||||
// any express or implied warranties, including, but not limited to, the implied |
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||
// indirect, incidental, special, exemplary, or consequential damages |
||||
// (including, but not limited to, procurement of substitute goods or services; |
||||
// loss of use, data, or profits; or business interruption) however caused |
||||
// and on any theory of liability, whether in contract, strict liability, |
||||
// or tort (including negligence or otherwise) arising in any way out of |
||||
// the use of this software, even if advised of the possibility of such damage. |
||||
// |
||||
//M*/ |
||||
|
||||
#if !defined CUDA_DISABLER |
||||
|
||||
#include "opencv2/core/cuda/common.hpp" |
||||
#include "opencv2/core/cuda/functional.hpp" |
||||
#include "opencv2/core/cuda/transform.hpp" |
||||
#include "opencv2/core/cuda/saturate_cast.hpp" |
||||
#include "opencv2/core/cuda/simd_functions.hpp" |
||||
|
||||
#include "arithm_func_traits.hpp" |
||||
|
||||
using namespace cv::gpu; |
||||
using namespace cv::gpu::cudev; |
||||
|
||||
namespace arithm |
||||
{ |
||||
struct VSub4 : binary_function<uint, uint, uint> |
||||
{ |
||||
__device__ __forceinline__ uint operator ()(uint a, uint b) const |
||||
{ |
||||
return vsub4(a, b); |
||||
} |
||||
|
||||
__device__ __forceinline__ VSub4() {} |
||||
__device__ __forceinline__ VSub4(const VSub4& other) {} |
||||
}; |
||||
|
||||
struct VSub2 : binary_function<uint, uint, uint> |
||||
{ |
||||
__device__ __forceinline__ uint operator ()(uint a, uint b) const |
||||
{ |
||||
return vsub2(a, b); |
||||
} |
||||
|
||||
__device__ __forceinline__ VSub2() {} |
||||
__device__ __forceinline__ VSub2(const VSub2& other) {} |
||||
}; |
||||
|
||||
template <typename T, typename D> struct SubMat : binary_function<T, T, D> |
||||
{ |
||||
__device__ __forceinline__ D operator ()(T a, T b) const |
||||
{ |
||||
return saturate_cast<D>(a - b); |
||||
} |
||||
|
||||
__device__ __forceinline__ SubMat() {} |
||||
__device__ __forceinline__ SubMat(const SubMat& other) {} |
||||
}; |
||||
} |
||||
|
||||
namespace cv { namespace gpu { namespace cudev |
||||
{ |
||||
template <> struct TransformFunctorTraits< arithm::VSub4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)> |
||||
{ |
||||
}; |
||||
|
||||
template <> struct TransformFunctorTraits< arithm::VSub2 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)> |
||||
{ |
||||
}; |
||||
|
||||
template <typename T, typename D> struct TransformFunctorTraits< arithm::SubMat<T, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)> |
||||
{ |
||||
}; |
||||
}}} |
||||
|
||||
namespace arithm |
||||
{ |
||||
void subMat_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream) |
||||
{ |
||||
cudev::transform(src1, src2, dst, VSub4(), WithOutMask(), stream); |
||||
} |
||||
|
||||
void subMat_v2(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream) |
||||
{ |
||||
cudev::transform(src1, src2, dst, VSub2(), WithOutMask(), stream); |
||||
} |
||||
|
||||
template <typename T, typename D> |
||||
void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream) |
||||
{ |
||||
if (mask.data) |
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, SubMat<T, D>(), mask, stream); |
||||
else |
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, SubMat<T, D>(), WithOutMask(), stream); |
||||
} |
||||
|
||||
template void subMat<uchar, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subMat<uchar, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subMat<uchar, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subMat<uchar, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subMat<uchar, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subMat<uchar, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subMat<uchar, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
|
||||
template void subMat<schar, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subMat<schar, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subMat<schar, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subMat<schar, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subMat<schar, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subMat<schar, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subMat<schar, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
|
||||
//template void subMat<ushort, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void subMat<ushort, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subMat<ushort, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subMat<ushort, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subMat<ushort, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subMat<ushort, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subMat<ushort, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
|
||||
//template void subMat<short, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void subMat<short, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subMat<short, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subMat<short, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subMat<short, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subMat<short, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subMat<short, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
|
||||
//template void subMat<int, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void subMat<int, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void subMat<int, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void subMat<int, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subMat<int, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subMat<int, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subMat<int, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
|
||||
//template void subMat<float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void subMat<float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void subMat<float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void subMat<float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void subMat<float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subMat<float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subMat<float, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
|
||||
//template void subMat<double, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void subMat<double, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void subMat<double, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void subMat<double, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void subMat<double, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void subMat<double, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subMat<double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
} |
||||
|
||||
#endif // CUDA_DISABLER |
@ -0,0 +1,148 @@ |
||||
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||
// |
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||
// |
||||
// By downloading, copying, installing or using the software you agree to this license. |
||||
// If you do not agree to this license, do not download, install, |
||||
// copy or use the software. |
||||
// |
||||
// |
||||
// License Agreement |
||||
// For Open Source Computer Vision Library |
||||
// |
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
||||
// Third party copyrights are property of their respective owners. |
||||
// |
||||
// Redistribution and use in source and binary forms, with or without modification, |
||||
// are permitted provided that the following conditions are met: |
||||
// |
||||
// * Redistribution's of source code must retain the above copyright notice, |
||||
// this list of conditions and the following disclaimer. |
||||
// |
||||
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||
// this list of conditions and the following disclaimer in the documentation |
||||
// and/or other materials provided with the distribution. |
||||
// |
||||
// * The name of the copyright holders may not be used to endorse or promote products |
||||
// derived from this software without specific prior written permission. |
||||
// |
||||
// This software is provided by the copyright holders and contributors "as is" and |
||||
// any express or implied warranties, including, but not limited to, the implied |
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||
// indirect, incidental, special, exemplary, or consequential damages |
||||
// (including, but not limited to, procurement of substitute goods or services; |
||||
// loss of use, data, or profits; or business interruption) however caused |
||||
// and on any theory of liability, whether in contract, strict liability, |
||||
// or tort (including negligence or otherwise) arising in any way out of |
||||
// the use of this software, even if advised of the possibility of such damage. |
||||
// |
||||
//M*/ |
||||
|
||||
#if !defined CUDA_DISABLER |
||||
|
||||
#include "opencv2/core/cuda/common.hpp" |
||||
#include "opencv2/core/cuda/functional.hpp" |
||||
#include "opencv2/core/cuda/transform.hpp" |
||||
#include "opencv2/core/cuda/saturate_cast.hpp" |
||||
#include "opencv2/core/cuda/simd_functions.hpp" |
||||
|
||||
#include "arithm_func_traits.hpp" |
||||
|
||||
using namespace cv::gpu; |
||||
using namespace cv::gpu::cudev; |
||||
|
||||
namespace arithm |
||||
{ |
||||
template <typename T, typename S, typename D> struct SubScalar : unary_function<T, D> |
||||
{ |
||||
S val; |
||||
|
||||
explicit SubScalar(S val_) : val(val_) {} |
||||
|
||||
__device__ __forceinline__ D operator ()(T a) const |
||||
{ |
||||
return saturate_cast<D>(a - val); |
||||
} |
||||
}; |
||||
} |
||||
|
||||
namespace cv { namespace gpu { namespace cudev |
||||
{ |
||||
template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::SubScalar<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)> |
||||
{ |
||||
}; |
||||
}}} |
||||
|
||||
namespace arithm |
||||
{ |
||||
template <typename T, typename S, typename D> |
||||
void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream) |
||||
{ |
||||
SubScalar<T, S, D> op(static_cast<S>(val)); |
||||
|
||||
if (mask.data) |
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, mask, stream); |
||||
else |
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, WithOutMask(), stream); |
||||
} |
||||
|
||||
template void subScalar<uchar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subScalar<uchar, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subScalar<uchar, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subScalar<uchar, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subScalar<uchar, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subScalar<uchar, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subScalar<uchar, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
|
||||
template void subScalar<schar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subScalar<schar, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subScalar<schar, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subScalar<schar, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subScalar<schar, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subScalar<schar, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subScalar<schar, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
|
||||
//template void subScalar<ushort, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void subScalar<ushort, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subScalar<ushort, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subScalar<ushort, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subScalar<ushort, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subScalar<ushort, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subScalar<ushort, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
|
||||
//template void subScalar<short, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void subScalar<short, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subScalar<short, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subScalar<short, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subScalar<short, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subScalar<short, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subScalar<short, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
|
||||
//template void subScalar<int, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void subScalar<int, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void subScalar<int, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void subScalar<int, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subScalar<int, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subScalar<int, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subScalar<int, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
|
||||
//template void subScalar<float, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void subScalar<float, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void subScalar<float, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void subScalar<float, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void subScalar<float, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subScalar<float, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subScalar<float, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
|
||||
//template void subScalar<double, double, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void subScalar<double, double, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void subScalar<double, double, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void subScalar<double, double, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void subScalar<double, double, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
//template void subScalar<double, double, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
template void subScalar<double, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); |
||||
} |
||||
|
||||
#endif // CUDA_DISABLER |
@ -0,0 +1,380 @@ |
||||
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||
// |
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||
// |
||||
// By downloading, copying, installing or using the software you agree to this license. |
||||
// If you do not agree to this license, do not download, install, |
||||
// copy or use the software. |
||||
// |
||||
// |
||||
// License Agreement |
||||
// For Open Source Computer Vision Library |
||||
// |
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
||||
// Third party copyrights are property of their respective owners. |
||||
// |
||||
// Redistribution and use in source and binary forms, with or without modification, |
||||
// are permitted provided that the following conditions are met: |
||||
// |
||||
// * Redistribution's of source code must retain the above copyright notice, |
||||
// this list of conditions and the following disclaimer. |
||||
// |
||||
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||
// this list of conditions and the following disclaimer in the documentation |
||||
// and/or other materials provided with the distribution. |
||||
// |
||||
// * The name of the copyright holders may not be used to endorse or promote products |
||||
// derived from this software without specific prior written permission. |
||||
// |
||||
// This software is provided by the copyright holders and contributors "as is" and |
||||
// any express or implied warranties, including, but not limited to, the implied |
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||
// indirect, incidental, special, exemplary, or consequential damages |
||||
// (including, but not limited to, procurement of substitute goods or services; |
||||
// loss of use, data, or profits; or business interruption) however caused |
||||
// and on any theory of liability, whether in contract, strict liability, |
||||
// or tort (including negligence or otherwise) arising in any way out of |
||||
// the use of this software, even if advised of the possibility of such damage. |
||||
// |
||||
//M*/ |
||||
|
||||
#if !defined CUDA_DISABLER |
||||
|
||||
#include "opencv2/core/cuda/common.hpp" |
||||
#include "opencv2/core/cuda/vec_traits.hpp" |
||||
#include "opencv2/core/cuda/vec_math.hpp" |
||||
#include "opencv2/core/cuda/reduce.hpp" |
||||
#include "opencv2/core/cuda/emulation.hpp" |
||||
#include "opencv2/core/cuda/utility.hpp" |
||||
|
||||
#include "unroll_detail.hpp" |
||||
|
||||
using namespace cv::gpu; |
||||
using namespace cv::gpu::cudev; |
||||
|
||||
namespace sum |
||||
{ |
||||
__device__ unsigned int blocks_finished = 0; |
||||
|
||||
template <typename R, int cn> struct AtomicAdd; |
||||
template <typename R> struct AtomicAdd<R, 1> |
||||
{ |
||||
static __device__ void run(R* ptr, R val) |
||||
{ |
||||
Emulation::glob::atomicAdd(ptr, val); |
||||
} |
||||
}; |
||||
template <typename R> struct AtomicAdd<R, 2> |
||||
{ |
||||
typedef typename TypeVec<R, 2>::vec_type val_type; |
||||
|
||||
static __device__ void run(R* ptr, val_type val) |
||||
{ |
||||
Emulation::glob::atomicAdd(ptr, val.x); |
||||
Emulation::glob::atomicAdd(ptr + 1, val.y); |
||||
} |
||||
}; |
||||
template <typename R> struct AtomicAdd<R, 3> |
||||
{ |
||||
typedef typename TypeVec<R, 3>::vec_type val_type; |
||||
|
||||
static __device__ void run(R* ptr, val_type val) |
||||
{ |
||||
Emulation::glob::atomicAdd(ptr, val.x); |
||||
Emulation::glob::atomicAdd(ptr + 1, val.y); |
||||
Emulation::glob::atomicAdd(ptr + 2, val.z); |
||||
} |
||||
}; |
||||
template <typename R> struct AtomicAdd<R, 4> |
||||
{ |
||||
typedef typename TypeVec<R, 4>::vec_type val_type; |
||||
|
||||
static __device__ void run(R* ptr, val_type val) |
||||
{ |
||||
Emulation::glob::atomicAdd(ptr, val.x); |
||||
Emulation::glob::atomicAdd(ptr + 1, val.y); |
||||
Emulation::glob::atomicAdd(ptr + 2, val.z); |
||||
Emulation::glob::atomicAdd(ptr + 3, val.w); |
||||
} |
||||
}; |
||||
|
||||
template <int BLOCK_SIZE, typename R, int cn> |
||||
struct GlobalReduce |
||||
{ |
||||
typedef typename TypeVec<R, cn>::vec_type result_type; |
||||
|
||||
static __device__ void run(result_type& sum, result_type* result, int tid, int bid, R* smem) |
||||
{ |
||||
#if __CUDA_ARCH__ >= 200 |
||||
if (tid == 0) |
||||
AtomicAdd<R, cn>::run((R*) result, sum); |
||||
#else |
||||
__shared__ bool is_last; |
||||
|
||||
if (tid == 0) |
||||
{ |
||||
result[bid] = sum; |
||||
|
||||
__threadfence(); |
||||
|
||||
unsigned int ticket = ::atomicAdd(&blocks_finished, 1); |
||||
is_last = (ticket == gridDim.x * gridDim.y - 1); |
||||
} |
||||
|
||||
__syncthreads(); |
||||
|
||||
if (is_last) |
||||
{ |
||||
sum = tid < gridDim.x * gridDim.y ? result[tid] : VecTraits<result_type>::all(0); |
||||
|
||||
cudev::reduce<BLOCK_SIZE>(detail::Unroll<cn>::template smem_tuple<BLOCK_SIZE>(smem), detail::Unroll<cn>::tie(sum), tid, detail::Unroll<cn>::op(plus<R>())); |
||||
|
||||
if (tid == 0) |
||||
{ |
||||
result[0] = sum; |
||||
blocks_finished = 0; |
||||
} |
||||
} |
||||
#endif |
||||
} |
||||
}; |
||||
|
||||
template <int BLOCK_SIZE, typename src_type, typename result_type, class Mask, class Op> |
||||
__global__ void kernel(const PtrStepSz<src_type> src, result_type* result, const Mask mask, const Op op, const int twidth, const int theight) |
||||
{ |
||||
typedef typename VecTraits<src_type>::elem_type T; |
||||
typedef typename VecTraits<result_type>::elem_type R; |
||||
const int cn = VecTraits<src_type>::cn; |
||||
|
||||
__shared__ R smem[BLOCK_SIZE * cn]; |
||||
|
||||
const int x0 = blockIdx.x * blockDim.x * twidth + threadIdx.x; |
||||
const int y0 = blockIdx.y * blockDim.y * theight + threadIdx.y; |
||||
|
||||
const int tid = threadIdx.y * blockDim.x + threadIdx.x; |
||||
const int bid = blockIdx.y * gridDim.x + blockIdx.x; |
||||
|
||||
result_type sum = VecTraits<result_type>::all(0); |
||||
|
||||
for (int i = 0, y = y0; i < theight && y < src.rows; ++i, y += blockDim.y) |
||||
{ |
||||
const src_type* ptr = src.ptr(y); |
||||
|
||||
for (int j = 0, x = x0; j < twidth && x < src.cols; ++j, x += blockDim.x) |
||||
{ |
||||
if (mask(y, x)) |
||||
{ |
||||
const src_type srcVal = ptr[x]; |
||||
sum = sum + op(saturate_cast<result_type>(srcVal)); |
||||
} |
||||
} |
||||
} |
||||
|
||||
cudev::reduce<BLOCK_SIZE>(detail::Unroll<cn>::template smem_tuple<BLOCK_SIZE>(smem), detail::Unroll<cn>::tie(sum), tid, detail::Unroll<cn>::op(plus<R>())); |
||||
|
||||
GlobalReduce<BLOCK_SIZE, R, cn>::run(sum, result, tid, bid, smem); |
||||
} |
||||
|
||||
const int threads_x = 32; |
||||
const int threads_y = 8; |
||||
|
||||
void getLaunchCfg(int cols, int rows, dim3& block, dim3& grid) |
||||
{ |
||||
block = dim3(threads_x, threads_y); |
||||
|
||||
grid = dim3(divUp(cols, block.x * block.y), |
||||
divUp(rows, block.y * block.x)); |
||||
|
||||
grid.x = ::min(grid.x, block.x); |
||||
grid.y = ::min(grid.y, block.y); |
||||
} |
||||
|
||||
void getBufSize(int cols, int rows, int cn, int& bufcols, int& bufrows) |
||||
{ |
||||
dim3 block, grid; |
||||
getLaunchCfg(cols, rows, block, grid); |
||||
|
||||
bufcols = grid.x * grid.y * sizeof(double) * cn; |
||||
bufrows = 1; |
||||
} |
||||
|
||||
template <typename T, typename R, int cn, template <typename> class Op> |
||||
void caller(PtrStepSzb src_, void* buf_, double* out, PtrStepSzb mask) |
||||
{ |
||||
typedef typename TypeVec<T, cn>::vec_type src_type; |
||||
typedef typename TypeVec<R, cn>::vec_type result_type; |
||||
|
||||
PtrStepSz<src_type> src(src_); |
||||
result_type* buf = (result_type*) buf_; |
||||
|
||||
dim3 block, grid; |
||||
getLaunchCfg(src.cols, src.rows, block, grid); |
||||
|
||||
const int twidth = divUp(divUp(src.cols, grid.x), block.x); |
||||
const int theight = divUp(divUp(src.rows, grid.y), block.y); |
||||
|
||||
Op<result_type> op; |
||||
|
||||
if (mask.data) |
||||
kernel<threads_x * threads_y><<<grid, block>>>(src, buf, SingleMask(mask), op, twidth, theight); |
||||
else |
||||
kernel<threads_x * threads_y><<<grid, block>>>(src, buf, WithOutMask(), op, twidth, theight); |
||||
cudaSafeCall( cudaGetLastError() ); |
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() ); |
||||
|
||||
R result[4] = {0, 0, 0, 0}; |
||||
cudaSafeCall( cudaMemcpy(&result, buf, sizeof(result_type), cudaMemcpyDeviceToHost) ); |
||||
|
||||
out[0] = result[0]; |
||||
out[1] = result[1]; |
||||
out[2] = result[2]; |
||||
out[3] = result[3]; |
||||
} |
||||
|
||||
template <typename T> struct SumType; |
||||
template <> struct SumType<uchar> { typedef unsigned int R; }; |
||||
template <> struct SumType<schar> { typedef int R; }; |
||||
template <> struct SumType<ushort> { typedef unsigned int R; }; |
||||
template <> struct SumType<short> { typedef int R; }; |
||||
template <> struct SumType<int> { typedef int R; }; |
||||
template <> struct SumType<float> { typedef float R; }; |
||||
template <> struct SumType<double> { typedef double R; }; |
||||
|
||||
template <typename T, int cn> |
||||
void run(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask) |
||||
{ |
||||
typedef typename SumType<T>::R R; |
||||
caller<T, R, cn, identity>(src, buf, out, mask); |
||||
} |
||||
|
||||
template void run<uchar, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void run<uchar, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void run<uchar, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void run<uchar, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
|
||||
template void run<schar, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void run<schar, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void run<schar, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void run<schar, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
|
||||
template void run<ushort, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void run<ushort, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void run<ushort, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void run<ushort, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
|
||||
template void run<short, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void run<short, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void run<short, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void run<short, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
|
||||
template void run<int, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void run<int, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void run<int, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void run<int, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
|
||||
template void run<float, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void run<float, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void run<float, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void run<float, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
|
||||
template void run<double, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void run<double, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void run<double, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void run<double, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
|
||||
template <typename T, int cn> |
||||
void runAbs(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask) |
||||
{ |
||||
typedef typename SumType<T>::R R; |
||||
caller<T, R, cn, abs_func>(src, buf, out, mask); |
||||
} |
||||
|
||||
template void runAbs<uchar, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void runAbs<uchar, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void runAbs<uchar, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void runAbs<uchar, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
|
||||
template void runAbs<schar, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void runAbs<schar, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void runAbs<schar, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void runAbs<schar, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
|
||||
template void runAbs<ushort, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void runAbs<ushort, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void runAbs<ushort, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void runAbs<ushort, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
|
||||
template void runAbs<short, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void runAbs<short, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void runAbs<short, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void runAbs<short, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
|
||||
template void runAbs<int, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void runAbs<int, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void runAbs<int, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void runAbs<int, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
|
||||
template void runAbs<float, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void runAbs<float, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void runAbs<float, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void runAbs<float, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
|
||||
template void runAbs<double, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void runAbs<double, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void runAbs<double, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void runAbs<double, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
|
||||
template <typename T> struct Sqr : unary_function<T, T> |
||||
{ |
||||
__device__ __forceinline__ T operator ()(T x) const |
||||
{ |
||||
return x * x; |
||||
} |
||||
}; |
||||
|
||||
template <typename T, int cn> |
||||
void runSqr(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask) |
||||
{ |
||||
caller<T, double, cn, Sqr>(src, buf, out, mask); |
||||
} |
||||
|
||||
template void runSqr<uchar, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void runSqr<uchar, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void runSqr<uchar, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void runSqr<uchar, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
|
||||
template void runSqr<schar, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void runSqr<schar, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void runSqr<schar, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void runSqr<schar, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
|
||||
template void runSqr<ushort, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void runSqr<ushort, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void runSqr<ushort, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void runSqr<ushort, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
|
||||
template void runSqr<short, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void runSqr<short, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void runSqr<short, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void runSqr<short, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
|
||||
template void runSqr<int, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void runSqr<int, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void runSqr<int, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void runSqr<int, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
|
||||
template void runSqr<float, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void runSqr<float, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void runSqr<float, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void runSqr<float, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
|
||||
template void runSqr<double, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void runSqr<double, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void runSqr<double, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
template void runSqr<double, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); |
||||
} |
||||
|
||||
#endif // CUDA_DISABLER |
@ -0,0 +1,114 @@ |
||||
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||
// |
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||
// |
||||
// By downloading, copying, installing or using the software you agree to this license. |
||||
// If you do not agree to this license, do not download, install, |
||||
// copy or use the software. |
||||
// |
||||
// |
||||
// License Agreement |
||||
// For Open Source Computer Vision Library |
||||
// |
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
||||
// Third party copyrights are property of their respective owners. |
||||
// |
||||
// Redistribution and use in source and binary forms, with or without modification, |
||||
// are permitted provided that the following conditions are met: |
||||
// |
||||
// * Redistribution's of source code must retain the above copyright notice, |
||||
// this list of conditions and the following disclaimer. |
||||
// |
||||
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||
// this list of conditions and the following disclaimer in the documentation |
||||
// and/or other materials provided with the distribution. |
||||
// |
||||
// * The name of the copyright holders may not be used to endorse or promote products |
||||
// derived from this software without specific prior written permission. |
||||
// |
||||
// This software is provided by the copyright holders and contributors "as is" and |
||||
// any express or implied warranties, including, but not limited to, the implied |
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||
// indirect, incidental, special, exemplary, or consequential damages |
||||
// (including, but not limited to, procurement of substitute goods or services; |
||||
// loss of use, data, or profits; or business interruption) however caused |
||||
// and on any theory of liability, whether in contract, strict liability, |
||||
// or tort (including negligence or otherwise) arising in any way out of |
||||
// the use of this software, even if advised of the possibility of such damage. |
||||
// |
||||
//M*/ |
||||
|
||||
#if !defined CUDA_DISABLER |
||||
|
||||
#include "opencv2/core/cuda/common.hpp" |
||||
#include "opencv2/core/cuda/functional.hpp" |
||||
#include "opencv2/core/cuda/transform.hpp" |
||||
#include "opencv2/core/cuda/saturate_cast.hpp" |
||||
#include "opencv2/core/cuda/simd_functions.hpp" |
||||
|
||||
#include "arithm_func_traits.hpp" |
||||
|
||||
using namespace cv::gpu; |
||||
using namespace cv::gpu::cudev; |
||||
|
||||
namespace cv { namespace gpu { namespace cudev |
||||
{ |
||||
template <typename T> struct TransformFunctorTraits< thresh_binary_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)> |
||||
{ |
||||
}; |
||||
|
||||
template <typename T> struct TransformFunctorTraits< thresh_binary_inv_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)> |
||||
{ |
||||
}; |
||||
|
||||
template <typename T> struct TransformFunctorTraits< thresh_trunc_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)> |
||||
{ |
||||
}; |
||||
|
||||
template <typename T> struct TransformFunctorTraits< thresh_to_zero_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)> |
||||
{ |
||||
}; |
||||
|
||||
template <typename T> struct TransformFunctorTraits< thresh_to_zero_inv_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)> |
||||
{ |
||||
}; |
||||
}}} |
||||
|
||||
namespace arithm |
||||
{ |
||||
template <template <typename> class Op, typename T> |
||||
void threshold_caller(PtrStepSz<T> src, PtrStepSz<T> dst, T thresh, T maxVal, cudaStream_t stream) |
||||
{ |
||||
Op<T> op(thresh, maxVal); |
||||
cudev::transform(src, dst, op, WithOutMask(), stream); |
||||
} |
||||
|
||||
template <typename T> |
||||
void threshold(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream) |
||||
{ |
||||
typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<T> dst, T thresh, T maxVal, cudaStream_t stream); |
||||
|
||||
static const caller_t callers[] = |
||||
{ |
||||
threshold_caller<thresh_binary_func, T>, |
||||
threshold_caller<thresh_binary_inv_func, T>, |
||||
threshold_caller<thresh_trunc_func, T>, |
||||
threshold_caller<thresh_to_zero_func, T>, |
||||
threshold_caller<thresh_to_zero_inv_func, T> |
||||
}; |
||||
|
||||
callers[type]((PtrStepSz<T>) src, (PtrStepSz<T>) dst, static_cast<T>(thresh), static_cast<T>(maxVal), stream); |
||||
} |
||||
|
||||
template void threshold<uchar>(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream); |
||||
template void threshold<schar>(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream); |
||||
template void threshold<ushort>(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream); |
||||
template void threshold<short>(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream); |
||||
template void threshold<int>(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream); |
||||
template void threshold<float>(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream); |
||||
template void threshold<double>(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream); |
||||
} |
||||
|
||||
#endif // CUDA_DISABLER |
@ -0,0 +1,122 @@ |
||||
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||
// |
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||
// |
||||
// By downloading, copying, installing or using the software you agree to this license. |
||||
// If you do not agree to this license, do not download, install, |
||||
// copy or use the software. |
||||
// |
||||
// |
||||
// License Agreement |
||||
// For Open Source Computer Vision Library |
||||
// |
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
||||
// Third party copyrights are property of their respective owners. |
||||
// |
||||
// Redistribution and use in source and binary forms, with or without modification, |
||||
// are permitted provided that the following conditions are met: |
||||
// |
||||
// * Redistribution's of source code must retain the above copyright notice, |
||||
// this list of conditions and the following disclaimer. |
||||
// |
||||
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||
// this list of conditions and the following disclaimer in the documentation |
||||
// and/or other materials provided with the distribution. |
||||
// |
||||
// * The name of the copyright holders may not be used to endorse or promote products |
||||
// derived from this software without specific prior written permission. |
||||
// |
||||
// This software is provided by the copyright holders and contributors "as is" and |
||||
// any express or implied warranties, including, but not limited to, the implied |
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||
// indirect, incidental, special, exemplary, or consequential damages |
||||
// (including, but not limited to, procurement of substitute goods or services; |
||||
// loss of use, data, or profits; or business interruption) however caused |
||||
// and on any theory of liability, whether in contract, strict liability, |
||||
// or tort (including negligence or otherwise) arising in any way out of |
||||
// the use of this software, even if advised of the possibility of such damage. |
||||
// |
||||
//M*/ |
||||
|
||||
#if !defined CUDA_DISABLER |
||||
|
||||
#include "opencv2/core/cuda/common.hpp" |
||||
|
||||
using namespace cv::gpu; |
||||
using namespace cv::gpu::cudev; |
||||
|
||||
namespace arithm |
||||
{ |
||||
const int TRANSPOSE_TILE_DIM = 16; |
||||
const int TRANSPOSE_BLOCK_ROWS = 16; |
||||
|
||||
template <typename T> |
||||
__global__ void transposeKernel(const PtrStepSz<T> src, PtrStep<T> dst) |
||||
{ |
||||
__shared__ T tile[TRANSPOSE_TILE_DIM][TRANSPOSE_TILE_DIM + 1]; |
||||
|
||||
int blockIdx_x, blockIdx_y; |
||||
|
||||
// do diagonal reordering |
||||
if (gridDim.x == gridDim.y) |
||||
{ |
||||
blockIdx_y = blockIdx.x; |
||||
blockIdx_x = (blockIdx.x + blockIdx.y) % gridDim.x; |
||||
} |
||||
else |
||||
{ |
||||
int bid = blockIdx.x + gridDim.x * blockIdx.y; |
||||
blockIdx_y = bid % gridDim.y; |
||||
blockIdx_x = ((bid / gridDim.y) + blockIdx_y) % gridDim.x; |
||||
} |
||||
|
||||
int xIndex = blockIdx_x * TRANSPOSE_TILE_DIM + threadIdx.x; |
||||
int yIndex = blockIdx_y * TRANSPOSE_TILE_DIM + threadIdx.y; |
||||
|
||||
if (xIndex < src.cols) |
||||
{ |
||||
for (int i = 0; i < TRANSPOSE_TILE_DIM; i += TRANSPOSE_BLOCK_ROWS) |
||||
{ |
||||
if (yIndex + i < src.rows) |
||||
{ |
||||
tile[threadIdx.y + i][threadIdx.x] = src(yIndex + i, xIndex); |
||||
} |
||||
} |
||||
} |
||||
|
||||
__syncthreads(); |
||||
|
||||
xIndex = blockIdx_y * TRANSPOSE_TILE_DIM + threadIdx.x; |
||||
yIndex = blockIdx_x * TRANSPOSE_TILE_DIM + threadIdx.y; |
||||
|
||||
if (xIndex < src.rows) |
||||
{ |
||||
for (int i = 0; i < TRANSPOSE_TILE_DIM; i += TRANSPOSE_BLOCK_ROWS) |
||||
{ |
||||
if (yIndex + i < src.cols) |
||||
{ |
||||
dst(yIndex + i, xIndex) = tile[threadIdx.x][threadIdx.y + i]; |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
template <typename T> void transpose(PtrStepSz<T> src, PtrStepSz<T> dst, cudaStream_t stream) |
||||
{ |
||||
const dim3 block(TRANSPOSE_TILE_DIM, TRANSPOSE_TILE_DIM); |
||||
const dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y)); |
||||
|
||||
transposeKernel<<<grid, block, 0, stream>>>(src, dst); |
||||
cudaSafeCall( cudaGetLastError() ); |
||||
|
||||
if (stream == 0) |
||||
cudaSafeCall( cudaDeviceSynchronize() ); |
||||
} |
||||
|
||||
template void transpose<int>(PtrStepSz<int> src, PtrStepSz<int> dst, cudaStream_t stream); |
||||
template void transpose<double>(PtrStepSz<double> src, PtrStepSz<double> dst, cudaStream_t stream); |
||||
} |
||||
|
||||
#endif // CUDA_DISABLER |
@ -0,0 +1,135 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __UNROLL_DETAIL_HPP__ |
||||
#define __UNROLL_DETAIL_HPP__ |
||||
|
||||
#include <thrust/tuple.h> |
||||
#include "opencv2/core/cuda/common.hpp" |
||||
#include "opencv2/core/cuda/vec_traits.hpp" |
||||
|
||||
namespace detail |
||||
{ |
||||
template <int cn> struct Unroll; |
||||
template <> struct Unroll<1> |
||||
{ |
||||
template <int BLOCK_SIZE, typename R> |
||||
static __device__ __forceinline__ volatile R* smem_tuple(R* smem) |
||||
{ |
||||
return smem; |
||||
} |
||||
|
||||
template <typename R> |
||||
static __device__ __forceinline__ R& tie(R& val) |
||||
{ |
||||
return val; |
||||
} |
||||
|
||||
template <class Op> |
||||
static __device__ __forceinline__ const Op& op(const Op& op) |
||||
{ |
||||
return op; |
||||
} |
||||
}; |
||||
template <> struct Unroll<2> |
||||
{ |
||||
template <int BLOCK_SIZE, typename R> |
||||
static __device__ __forceinline__ thrust::tuple<volatile R*, volatile R*> smem_tuple(R* smem) |
||||
{ |
||||
return cv::gpu::cudev::smem_tuple(smem, smem + BLOCK_SIZE); |
||||
} |
||||
|
||||
template <typename R> |
||||
static __device__ __forceinline__ thrust::tuple<typename cv::gpu::cudev::VecTraits<R>::elem_type&, typename cv::gpu::cudev::VecTraits<R>::elem_type&> tie(R& val) |
||||
{ |
||||
return thrust::tie(val.x, val.y); |
||||
} |
||||
|
||||
template <class Op> |
||||
static __device__ __forceinline__ const thrust::tuple<Op, Op> op(const Op& op) |
||||
{ |
||||
return thrust::make_tuple(op, op); |
||||
} |
||||
}; |
||||
template <> struct Unroll<3> |
||||
{ |
||||
template <int BLOCK_SIZE, typename R> |
||||
static __device__ __forceinline__ thrust::tuple<volatile R*, volatile R*, volatile R*> smem_tuple(R* smem) |
||||
{ |
||||
return cv::gpu::cudev::smem_tuple(smem, smem + BLOCK_SIZE, smem + 2 * BLOCK_SIZE); |
||||
} |
||||
|
||||
template <typename R> |
||||
static __device__ __forceinline__ thrust::tuple<typename cv::gpu::cudev::VecTraits<R>::elem_type&, typename cv::gpu::cudev::VecTraits<R>::elem_type&, typename cv::gpu::cudev::VecTraits<R>::elem_type&> tie(R& val) |
||||
{ |
||||
return thrust::tie(val.x, val.y, val.z); |
||||
} |
||||
|
||||
template <class Op> |
||||
static __device__ __forceinline__ const thrust::tuple<Op, Op, Op> op(const Op& op) |
||||
{ |
||||
return thrust::make_tuple(op, op, op); |
||||
} |
||||
}; |
||||
template <> struct Unroll<4> |
||||
{ |
||||
template <int BLOCK_SIZE, typename R> |
||||
static __device__ __forceinline__ thrust::tuple<volatile R*, volatile R*, volatile R*, volatile R*> smem_tuple(R* smem) |
||||
{ |
||||
return cv::gpu::cudev::smem_tuple(smem, smem + BLOCK_SIZE, smem + 2 * BLOCK_SIZE, smem + 3 * BLOCK_SIZE); |
||||
} |
||||
|
||||
template <typename R> |
||||
static __device__ __forceinline__ thrust::tuple<typename cv::gpu::cudev::VecTraits<R>::elem_type&, typename cv::gpu::cudev::VecTraits<R>::elem_type&, typename cv::gpu::cudev::VecTraits<R>::elem_type&, typename cv::gpu::cudev::VecTraits<R>::elem_type&> tie(R& val) |
||||
{ |
||||
return thrust::tie(val.x, val.y, val.z, val.w); |
||||
} |
||||
|
||||
template <class Op> |
||||
static __device__ __forceinline__ const thrust::tuple<Op, Op, Op, Op> op(const Op& op) |
||||
{ |
||||
return thrust::make_tuple(op, op, op, op); |
||||
} |
||||
}; |
||||
} |
||||
|
||||
#endif // __UNROLL_DETAIL_HPP__
|
@ -0,0 +1,43 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp" |
@ -0,0 +1,58 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_PRECOMP_H__ |
||||
#define __OPENCV_PRECOMP_H__ |
||||
|
||||
#include <limits> |
||||
|
||||
#include "opencv2/gpuarithm.hpp" |
||||
#include "opencv2/core/utility.hpp" |
||||
#include "opencv2/core/core_c.h" |
||||
|
||||
#include "opencv2/core/gpu_private.hpp" |
||||
|
||||
#ifdef HAVE_CUBLAS |
||||
#include <cublas.h> |
||||
#endif |
||||
|
||||
#endif /* __OPENCV_PRECOMP_H__ */ |
@ -0,0 +1,120 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "test_precomp.hpp" |
||||
|
||||
#ifdef HAVE_CUDA |
||||
|
||||
using namespace std; |
||||
using namespace cv; |
||||
using namespace cv::gpu; |
||||
using namespace cvtest; |
||||
using namespace testing; |
||||
|
||||
int main(int argc, char** argv) |
||||
{ |
||||
try |
||||
{ |
||||
const std::string keys = |
||||
"{ h help ? | | Print help}" |
||||
"{ i info | | Print information about system and exit }" |
||||
"{ device | -1 | Device on which tests will be executed (-1 means all devices) }" |
||||
; |
||||
|
||||
CommandLineParser cmd(argc, (const char**)argv, keys); |
||||
|
||||
if (cmd.has("help")) |
||||
{ |
||||
cmd.printMessage(); |
||||
return 0; |
||||
} |
||||
|
||||
printCudaInfo(); |
||||
|
||||
if (cmd.has("info")) |
||||
{ |
||||
return 0; |
||||
} |
||||
|
||||
int device = cmd.get<int>("device"); |
||||
if (device < 0) |
||||
{ |
||||
DeviceManager::instance().loadAll(); |
||||
|
||||
cout << "Run tests on all supported devices \n" << endl; |
||||
} |
||||
else |
||||
{ |
||||
DeviceManager::instance().load(device); |
||||
|
||||
DeviceInfo info(device); |
||||
cout << "Run tests on device " << device << " [" << info.name() << "] \n" << endl; |
||||
} |
||||
|
||||
TS::ptr()->init("gpu"); |
||||
InitGoogleTest(&argc, argv); |
||||
|
||||
return RUN_ALL_TESTS(); |
||||
} |
||||
catch (const exception& e) |
||||
{ |
||||
cerr << e.what() << endl; |
||||
return -1; |
||||
} |
||||
catch (...) |
||||
{ |
||||
cerr << "Unknown error" << endl; |
||||
return -1; |
||||
} |
||||
|
||||
return 0; |
||||
} |
||||
|
||||
#else // HAVE_CUDA
|
||||
|
||||
int main() |
||||
{ |
||||
printf("OpenCV was built without CUDA support\n"); |
||||
return 0; |
||||
} |
||||
|
||||
#endif // HAVE_CUDA
|
@ -0,0 +1,43 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "test_precomp.hpp" |
@ -0,0 +1,60 @@ |
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifdef __GNUC__ |
||||
# pragma GCC diagnostic ignored "-Wmissing-declarations" |
||||
# if defined __clang__ || defined __APPLE__ |
||||
# pragma GCC diagnostic ignored "-Wmissing-prototypes" |
||||
# pragma GCC diagnostic ignored "-Wextra" |
||||
# endif |
||||
#endif |
||||
|
||||
#ifndef __OPENCV_TEST_PRECOMP_HPP__ |
||||
#define __OPENCV_TEST_PRECOMP_HPP__ |
||||
|
||||
#include "opencv2/ts.hpp" |
||||
#include "opencv2/ts/gpu_test.hpp" |
||||
|
||||
#include "opencv2/core.hpp" |
||||
#include "opencv2/gpuarithm.hpp" |
||||
|
||||
#endif |
@ -1,3 +1,3 @@ |
||||
set(the_description "Images stitching") |
||||
ocv_define_module(stitching opencv_imgproc opencv_features2d opencv_calib3d opencv_objdetect OPTIONAL opencv_gpu opencv_nonfree) |
||||
ocv_define_module(stitching opencv_imgproc opencv_features2d opencv_calib3d opencv_objdetect OPTIONAL opencv_gpu opencv_gpuarithm opencv_nonfree) |
||||
|
||||
|
Loading…
Reference in new issue