From 1c9f4e7ca24aeaf2b01f7600ba439a0661dd87b9 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Tue, 10 May 2011 12:39:12 +0000 Subject: [PATCH] fixed gpu::meanStdDev and gpu::norm under CUDA 4.0 fixed compilation under Win64 --- modules/gpu/src/matrix_operations.cpp | 10 ++++---- modules/gpu/src/matrix_reductions.cpp | 37 +++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 5 deletions(-) diff --git a/modules/gpu/src/matrix_operations.cpp b/modules/gpu/src/matrix_operations.cpp index 6467ee7880..e1c34dda86 100644 --- a/modules/gpu/src/matrix_operations.cpp +++ b/modules/gpu/src/matrix_operations.cpp @@ -596,11 +596,11 @@ bool cv::gpu::CudaMem::canMapHostMemory() namespace { - int alignUp(int what, int alignment) + size_t alignUpStep(size_t what, size_t alignment) { - int alignMask = alignment-1; - int inverseAlignMask = ~alignMask; - int res = (what + alignMask) & inverseAlignMask; + size_t alignMask = alignment-1; + size_t inverseAlignMask = ~alignMask; + size_t res = (what + alignMask) & inverseAlignMask; return res; } } @@ -626,7 +626,7 @@ void cv::gpu::CudaMem::create(int _rows, int _cols, int _type, int _alloc_type) { cudaDeviceProp prop; cudaSafeCall( cudaGetDeviceProperties(&prop, getDevice()) ); - step = alignUp(step, prop.textureAlignment); + step = alignUpStep(step, prop.textureAlignment); } int64 _nettosize = (int64)step*rows; size_t nettosize = (size_t)_nettosize; diff --git a/modules/gpu/src/matrix_reductions.cpp b/modules/gpu/src/matrix_reductions.cpp index 1cc8831a52..0d4fae35e4 100644 --- a/modules/gpu/src/matrix_reductions.cpp +++ b/modules/gpu/src/matrix_reductions.cpp @@ -78,9 +78,28 @@ void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev) sz.width = src.cols; sz.height = src.rows; +#if NPP_VERSION_MAJOR >= 4 + + GpuMat d_buf(1, 2, CV_64F); + + nppSafeCall( nppiMean_StdDev_8u_C1R(src.ptr(), src.step, sz, d_buf.ptr(), d_buf.ptr() + 1) ); + + cudaSafeCall( cudaThreadSynchronize() ); + + double buf[2]; + + Mat _buf(1, 2, CV_64F, buf); + d_buf.download(_buf); + mean[0] = buf[0]; + stddev[0] = buf[1]; + +#else + nppSafeCall( nppiMean_StdDev_8u_C1R(src.ptr(), src.step, sz, mean.val, stddev.val) ); cudaSafeCall( cudaThreadSynchronize() ); + +#endif } @@ -131,14 +150,32 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType) sz.height = src1.rows; int funcIdx = normType >> 1; + +#if NPP_VERSION_MAJOR >= 4 + + GpuMat d_buf(1, 1, CV_64F); + + nppSafeCall( npp_norm_diff_func[funcIdx](src1.ptr(), src1.step, + src2.ptr(), src2.step, + sz, d_buf.ptr()) ); + + cudaSafeCall( cudaThreadSynchronize() ); + double retVal; + Mat _buf(1, 1, CV_64F, &retVal); + d_buf.download(_buf); + +#else + double retVal; nppSafeCall( npp_norm_diff_func[funcIdx](src1.ptr(), src1.step, src2.ptr(), src2.step, sz, &retVal) ); cudaSafeCall( cudaThreadSynchronize() ); +#endif + return retVal; }