From 58e472754a424297883ac6a9f3e9306525ebcb00 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Mon, 3 Jun 2013 13:36:02 +0400 Subject: [PATCH] fixed norm diff function (it uses pre-allocated buffer now) --- modules/gpu/src/matrix_reductions.cpp | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/modules/gpu/src/matrix_reductions.cpp b/modules/gpu/src/matrix_reductions.cpp index 761abb525f..056e5ef701 100644 --- a/modules/gpu/src/matrix_reductions.cpp +++ b/modules/gpu/src/matrix_reductions.cpp @@ -187,10 +187,20 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType) CV_Assert(src1.size() == src2.size() && src1.type() == src2.type()); CV_Assert(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2); - typedef NppStatus (*npp_norm_diff_func_t)(const Npp8u* pSrc1, int nSrcStep1, const Npp8u* pSrc2, int nSrcStep2, - NppiSize oSizeROI, Npp64f* pRetVal); +#if CUDA_VERSION < 5050 + typedef NppStatus (*func_t)(const Npp8u* pSrc1, int nSrcStep1, const Npp8u* pSrc2, int nSrcStep2, NppiSize oSizeROI, Npp64f* pRetVal); - static const npp_norm_diff_func_t npp_norm_diff_func[] = {nppiNormDiff_Inf_8u_C1R, nppiNormDiff_L1_8u_C1R, nppiNormDiff_L2_8u_C1R}; + static const func_t funcs[] = {nppiNormDiff_Inf_8u_C1R, nppiNormDiff_L1_8u_C1R, nppiNormDiff_L2_8u_C1R}; +#else + typedef NppStatus (*func_t)(const Npp8u* pSrc1, int nSrcStep1, const Npp8u* pSrc2, int nSrcStep2, + NppiSize oSizeROI, Npp64f* pRetVal, Npp8u * pDeviceBuffer); + + typedef NppStatus (*buf_size_func_t)(NppiSize oSizeROI, int* hpBufferSize); + + static const func_t funcs[] = {nppiNormDiff_Inf_8u_C1R, nppiNormDiff_L1_8u_C1R, nppiNormDiff_L2_8u_C1R}; + + static const buf_size_func_t buf_size_funcs[] = {nppiNormDiffInfGetBufferHostSize_8u_C1R, nppiNormDiffL1GetBufferHostSize_8u_C1R, nppiNormDiffL2GetBufferHostSize_8u_C1R}; +#endif NppiSize sz; sz.width = src1.cols; @@ -202,7 +212,16 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType) DeviceBuffer dbuf; - nppSafeCall( npp_norm_diff_func[funcIdx](src1.ptr(), static_cast(src1.step), src2.ptr(), static_cast(src2.step), sz, dbuf) ); +#if CUDA_VERSION < 5050 + nppSafeCall( funcs[funcIdx](src1.ptr(), static_cast(src1.step), src2.ptr(), static_cast(src2.step), sz, dbuf) ); +#else + int bufSize; + buf_size_funcs[funcIdx](sz, &bufSize); + + GpuMat buf(1, bufSize, CV_8UC1); + + nppSafeCall( funcs[funcIdx](src1.ptr(), static_cast(src1.step), src2.ptr(), static_cast(src2.step), sz, dbuf, buf.data) ); +#endif cudaSafeCall( cudaDeviceSynchronize() );