From ec70282bf75341db1d60c9f4a19328d716709044 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Thu, 25 Apr 2013 15:23:44 +0400 Subject: [PATCH] switched to Input/Output Array in min/max operations --- .../gpuarithm/include/opencv2/gpuarithm.hpp | 18 +- modules/gpuarithm/src/element_operations.cpp | 247 ++++++------------ modules/gpuimgproc/src/hough.cpp | 2 +- modules/nonfree/src/surf_gpu.cpp | 2 +- 4 files changed, 82 insertions(+), 187 deletions(-) diff --git a/modules/gpuarithm/include/opencv2/gpuarithm.hpp b/modules/gpuarithm/include/opencv2/gpuarithm.hpp index 943b3a1d8a..42d69ef946 100644 --- a/modules/gpuarithm/include/opencv2/gpuarithm.hpp +++ b/modules/gpuarithm/include/opencv2/gpuarithm.hpp @@ -115,6 +115,12 @@ CV_EXPORTS void rshift(InputArray src, Scalar_ val, OutputArray dst, Stream //! supports 1, 3 and 4 channels images with CV_8U, CV_16U or CV_32S depth CV_EXPORTS void lshift(InputArray src, Scalar_ val, OutputArray dst, Stream& stream = Stream::Null()); +//! computes per-element minimum of two arrays (dst = min(src1, src2)) +CV_EXPORTS void min(InputArray src1, InputArray src2, OutputArray dst, Stream& stream = Stream::Null()); + +//! computes per-element maximum of two arrays (dst = max(src1, src2)) +CV_EXPORTS void max(InputArray src1, InputArray src2, OutputArray dst, Stream& stream = Stream::Null()); + //! computes the weighted sum of two arrays (dst = alpha*src1 + beta*src2 + gamma) CV_EXPORTS void addWeighted(const GpuMat& src1, double alpha, const GpuMat& src2, double beta, double gamma, GpuMat& dst, int dtype = -1, Stream& stream = Stream::Null()); @@ -125,18 +131,6 @@ static inline void scaleAdd(const GpuMat& src1, double alpha, const GpuMat& src2 addWeighted(src1, alpha, src2, 1.0, 0.0, dst, -1, stream); } -//! computes per-element minimum of two arrays (dst = min(src1, src2)) -CV_EXPORTS void min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream = Stream::Null()); - -//! computes per-element minimum of array and scalar (dst = min(src1, src2)) -CV_EXPORTS void min(const GpuMat& src1, double src2, GpuMat& dst, Stream& stream = Stream::Null()); - -//! computes per-element maximum of two arrays (dst = max(src1, src2)) -CV_EXPORTS void max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream = Stream::Null()); - -//! computes per-element maximum of array and scalar (dst = max(src1, src2)) -CV_EXPORTS void max(const GpuMat& src1, double src2, GpuMat& dst, Stream& stream = Stream::Null()); - //! implements generalized matrix product algorithm GEMM from BLAS CV_EXPORTS void gemm(const GpuMat& src1, const GpuMat& src2, double alpha, const GpuMat& src3, double beta, GpuMat& dst, int flags = 0, Stream& stream = Stream::Null()); diff --git a/modules/gpuarithm/src/element_operations.cpp b/modules/gpuarithm/src/element_operations.cpp index 5a0f206aed..425b699a04 100644 --- a/modules/gpuarithm/src/element_operations.cpp +++ b/modules/gpuarithm/src/element_operations.cpp @@ -48,46 +48,30 @@ using namespace cv::gpu; #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) void cv::gpu::add(InputArray, InputArray, OutputArray, InputArray, int, Stream&) { throw_no_cuda(); } - void cv::gpu::subtract(InputArray, InputArray, OutputArray, InputArray, int, Stream&) { throw_no_cuda(); } - void cv::gpu::multiply(InputArray, InputArray, OutputArray, double, int, Stream&) { throw_no_cuda(); } - void cv::gpu::divide(InputArray, InputArray, OutputArray, double, int, Stream&) { throw_no_cuda(); } - void cv::gpu::absdiff(InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda(); } void cv::gpu::abs(InputArray, OutputArray, Stream&) { throw_no_cuda(); } - void cv::gpu::sqr(InputArray, OutputArray, Stream&) { throw_no_cuda(); } - void cv::gpu::sqrt(InputArray, OutputArray, Stream&) { throw_no_cuda(); } - void cv::gpu::exp(InputArray, OutputArray, Stream&) { throw_no_cuda(); } - void cv::gpu::log(InputArray, OutputArray, Stream&) { throw_no_cuda(); } - void cv::gpu::pow(InputArray, double, OutputArray, Stream&) { throw_no_cuda(); } void cv::gpu::compare(InputArray, InputArray, OutputArray, int, Stream&) { throw_no_cuda(); } void cv::gpu::bitwise_not(InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); } - void cv::gpu::bitwise_or(InputArray, InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); } - void cv::gpu::bitwise_and(InputArray, InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); } - void cv::gpu::bitwise_xor(InputArray, InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); } void cv::gpu::rshift(InputArray, Scalar_, OutputArray, Stream&) { throw_no_cuda(); } - void cv::gpu::lshift(InputArray, Scalar_, OutputArray, Stream&) { throw_no_cuda(); } -void cv::gpu::min(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); } -void cv::gpu::min(const GpuMat&, double, GpuMat&, Stream&) { throw_no_cuda(); } - -void cv::gpu::max(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); } -void cv::gpu::max(const GpuMat&, double, GpuMat&, Stream&) { throw_no_cuda(); } +void cv::gpu::min(InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda(); } +void cv::gpu::max(InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda(); } void cv::gpu::addWeighted(const GpuMat&, double, const GpuMat&, double, double, GpuMat&, int, Stream&) { throw_no_cuda(); } @@ -2262,6 +2246,15 @@ void cv::gpu::lshift(InputArray _src, Scalar_ val, OutputArray _dst, Stream ////////////////////////////////////////////////////////////////////////////// // Minimum and maximum operations +namespace +{ + enum + { + MIN_OP, + MAX_OP + }; +} + namespace arithm { void minMat_v4(PtrStepSz src1, PtrStepSz src2, PtrStepSz dst, cudaStream_t stream); @@ -2275,116 +2268,49 @@ namespace arithm template void maxScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); } -void cv::gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s) +void minMaxMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double, Stream& _stream, int op) { using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); - static const func_t funcs[] = + static const func_t funcs[2][7] = { - minMat, - minMat, - minMat, - minMat, - minMat, - minMat, - minMat - }; - - const int depth = src1.depth(); - const int cn = src1.channels(); - - CV_Assert( depth <= CV_64F ); - CV_Assert( src2.type() == src1.type() && src2.size() == src1.size() ); - - if (depth == CV_64F) - { - if (!deviceSupports(NATIVE_DOUBLE)) - CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double"); - } - - dst.create(src1.size(), src1.type()); - - cudaStream_t stream = StreamAccessor::getStream(s); - - PtrStepSzb src1_(src1.rows, src1.cols * cn, src1.data, src1.step); - PtrStepSzb src2_(src1.rows, src1.cols * cn, src2.data, src2.step); - PtrStepSzb dst_(src1.rows, src1.cols * cn, dst.data, dst.step); - - if (depth == CV_8U || depth == CV_16U) - { - const intptr_t src1ptr = reinterpret_cast(src1_.data); - const intptr_t src2ptr = reinterpret_cast(src2_.data); - const intptr_t dstptr = reinterpret_cast(dst_.data); - - const bool isAllAligned = (src1ptr & 31) == 0 && (src2ptr & 31) == 0 && (dstptr & 31) == 0; - - if (isAllAligned) { - if (depth == CV_8U && (src1_.cols & 3) == 0) - { - const int vcols = src1_.cols >> 2; - - minMat_v4(PtrStepSz(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step), - PtrStepSz(src1_.rows, vcols, (unsigned int*) src2_.data, src2_.step), - PtrStepSz(src1_.rows, vcols, (unsigned int*) dst_.data, dst_.step), - stream); - - return; - } - else if (depth == CV_16U && (src1_.cols & 1) == 0) - { - const int vcols = src1_.cols >> 1; - - minMat_v2(PtrStepSz(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step), - PtrStepSz(src1_.rows, vcols, (unsigned int*) src2_.data, src2_.step), - PtrStepSz(src1_.rows, vcols, (unsigned int*) dst_.data, dst_.step), - stream); - - return; - } + minMat, + minMat, + minMat, + minMat, + minMat, + minMat, + minMat + }, + { + maxMat, + maxMat, + maxMat, + maxMat, + maxMat, + maxMat, + maxMat } - } - - const func_t func = funcs[depth]; - - if (!func) - CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types"); - - func(src1_, src2_, dst_, stream); -} - -void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s) -{ - using namespace arithm; + }; - typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); - static const func_t funcs[] = + typedef void (*opt_func_t)(PtrStepSz src1, PtrStepSz src2, PtrStepSz dst, cudaStream_t stream); + static const opt_func_t funcs_v4[2] = + { + minMat_v4, maxMat_v4 + }; + static const opt_func_t funcs_v2[2] = { - maxMat, - maxMat, - maxMat, - maxMat, - maxMat, - maxMat, - maxMat + minMat_v2, maxMat_v2 }; const int depth = src1.depth(); const int cn = src1.channels(); CV_Assert( depth <= CV_64F ); - CV_Assert( src2.type() == src1.type() && src2.size() == src1.size() ); - if (depth == CV_64F) - { - if (!deviceSupports(NATIVE_DOUBLE)) - CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double"); - } - - dst.create(src1.size(), src1.type()); - - cudaStream_t stream = StreamAccessor::getStream(s); + cudaStream_t stream = StreamAccessor::getStream(_stream); PtrStepSzb src1_(src1.rows, src1.cols * cn, src1.data, src1.step); PtrStepSzb src2_(src1.rows, src1.cols * cn, src2.data, src2.step); @@ -2404,10 +2330,10 @@ void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s { const int vcols = src1_.cols >> 2; - maxMat_v4(PtrStepSz(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step), - PtrStepSz(src1_.rows, vcols, (unsigned int*) src2_.data, src2_.step), - PtrStepSz(src1_.rows, vcols, (unsigned int*) dst_.data, dst_.step), - stream); + funcs_v4[op](PtrStepSz(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step), + PtrStepSz(src1_.rows, vcols, (unsigned int*) src2_.data, src2_.step), + PtrStepSz(src1_.rows, vcols, (unsigned int*) dst_.data, dst_.step), + stream); return; } @@ -2415,17 +2341,17 @@ void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s { const int vcols = src1_.cols >> 1; - maxMat_v2(PtrStepSz(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step), - PtrStepSz(src1_.rows, vcols, (unsigned int*) src2_.data, src2_.step), - PtrStepSz(src1_.rows, vcols, (unsigned int*) dst_.data, dst_.step), - stream); + funcs_v2[op](PtrStepSz(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step), + PtrStepSz(src1_.rows, vcols, (unsigned int*) src2_.data, src2_.step), + PtrStepSz(src1_.rows, vcols, (unsigned int*) dst_.data, dst_.step), + stream); return; } } } - const func_t func = funcs[depth]; + const func_t func = funcs[op][depth]; if (!func) CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types"); @@ -2441,20 +2367,31 @@ namespace } } -void cv::gpu::min(const GpuMat& src, double val, GpuMat& dst, Stream& stream) +void minMaxScalar(const GpuMat& src, Scalar val, bool, GpuMat& dst, const GpuMat&, double, Stream& stream, int op) { using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); - static const func_t funcs[] = + static const func_t funcs[2][7] = { - minScalar, - minScalar, - minScalar, - minScalar, - minScalar, - minScalar, - minScalar + { + minScalar, + minScalar, + minScalar, + minScalar, + minScalar, + minScalar, + minScalar + }, + { + maxScalar, + maxScalar, + maxScalar, + maxScalar, + maxScalar, + maxScalar, + maxScalar + } }; typedef double (*cast_func_t)(double sc); @@ -2468,53 +2405,17 @@ void cv::gpu::min(const GpuMat& src, double val, GpuMat& dst, Stream& stream) CV_Assert( depth <= CV_64F ); CV_Assert( src.channels() == 1 ); - if (depth == CV_64F) - { - if (!deviceSupports(NATIVE_DOUBLE)) - CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double"); - } - - dst.create(src.size(), src.type()); - - funcs[depth](src, cast_func[depth](val), dst, StreamAccessor::getStream(stream)); + funcs[op][depth](src, cast_func[depth](val[0]), dst, StreamAccessor::getStream(stream)); } -void cv::gpu::max(const GpuMat& src, double val, GpuMat& dst, Stream& stream) +void cv::gpu::min(InputArray src1, InputArray src2, OutputArray dst, Stream& stream) { - using namespace arithm; - - typedef void (*func_t)(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); - static const func_t funcs[] = - { - maxScalar, - maxScalar, - maxScalar, - maxScalar, - maxScalar, - maxScalar, - maxScalar - }; - - typedef double (*cast_func_t)(double sc); - static const cast_func_t cast_func[] = - { - castScalar, castScalar, castScalar, castScalar, castScalar, castScalar, castScalar - }; - - const int depth = src.depth(); - - CV_Assert( depth <= CV_64F ); - CV_Assert( src.channels() == 1 ); - - if (depth == CV_64F) - { - if (!deviceSupports(NATIVE_DOUBLE)) - CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double"); - } - - dst.create(src.size(), src.type()); + arithm_op(src1, src2, dst, noArray(), 1.0, -1, stream, minMaxMat, minMaxScalar, MIN_OP); +} - funcs[depth](src, cast_func[depth](val), dst, StreamAccessor::getStream(stream)); +void cv::gpu::max(InputArray src1, InputArray src2, OutputArray dst, Stream& stream) +{ + arithm_op(src1, src2, dst, noArray(), 1.0, -1, stream, minMaxMat, minMaxScalar, MAX_OP); } //////////////////////////////////////////////////////////////////////// diff --git a/modules/gpuimgproc/src/hough.cpp b/modules/gpuimgproc/src/hough.cpp index bc0a8a400d..15e5297623 100644 --- a/modules/gpuimgproc/src/hough.cpp +++ b/modules/gpuimgproc/src/hough.cpp @@ -761,7 +761,7 @@ namespace { buildRTable_gpu(edgePointList.ptr(0), edgePointList.ptr(1), edgePointList.cols, r_table, r_sizes.ptr(), make_short2(templCenter.x, templCenter.y), levels); - min(r_sizes, maxSize, r_sizes); + gpu::min(r_sizes, maxSize, r_sizes); } } diff --git a/modules/nonfree/src/surf_gpu.cpp b/modules/nonfree/src/surf_gpu.cpp index ace9bb53ab..82ade2927e 100644 --- a/modules/nonfree/src/surf_gpu.cpp +++ b/modules/nonfree/src/surf_gpu.cpp @@ -147,7 +147,7 @@ namespace if (use_mask) { - min(mask, 1.0, surf_.mask1); + gpu::min(mask, 1.0, surf_.mask1); gpu::integralBuffered(surf_.mask1, surf_.maskSum, surf_.intBuffer); maskOffset = bindMaskSumTex(surf_.maskSum); }