diff --git a/modules/gpu/src/matrix_reductions.cpp b/modules/gpu/src/matrix_reductions.cpp index 0d4fae35e4..d182fd0d43 100644 --- a/modules/gpu/src/matrix_reductions.cpp +++ b/modules/gpu/src/matrix_reductions.cpp @@ -66,6 +66,42 @@ int cv::gpu::countNonZero(const GpuMat&, GpuMat&) { throw_nogpu(); return 0; } #else +namespace +{ + class DeviceBuffer + { + public: + explicit DeviceBuffer(int count_ = 1) : count(count_) + { + cudaSafeCall( cudaMalloc(&pdev, count * sizeof(double)) ); + } + ~DeviceBuffer() + { + cudaSafeCall( cudaFree(pdev) ); + } + + operator double*() {return pdev;} + + void download(double* hptr) + { + double hbuf; + cudaSafeCall( cudaMemcpy(&hbuf, pdev, sizeof(double), cudaMemcpyDeviceToHost) ); + *hptr = hbuf; + } + void download(double** hptrs) + { + AutoBuffer hbuf(count); + cudaSafeCall( cudaMemcpy((void*)hbuf, pdev, count * sizeof(double), cudaMemcpyDeviceToHost) ); + for (int i = 0; i < count; ++i) + *hptrs[i] = hbuf[i]; + } + + private: + double* pdev; + int count; + }; +} + //////////////////////////////////////////////////////////////////////// // meanStdDev @@ -80,18 +116,14 @@ void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev) #if NPP_VERSION_MAJOR >= 4 - GpuMat d_buf(1, 2, CV_64F); + DeviceBuffer dbuf(2); - nppSafeCall( nppiMean_StdDev_8u_C1R(src.ptr(), src.step, sz, d_buf.ptr(), d_buf.ptr() + 1) ); + nppSafeCall( nppiMean_StdDev_8u_C1R(src.ptr(), src.step, sz, dbuf, (double*)dbuf + 1) ); cudaSafeCall( cudaThreadSynchronize() ); - - double buf[2]; - - Mat _buf(1, 2, CV_64F, buf); - d_buf.download(_buf); - mean[0] = buf[0]; - stddev[0] = buf[1]; + + double* ptrs[2] = {mean.val, stddev.val}; + dbuf.download(ptrs); #else @@ -150,27 +182,22 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType) sz.height = src1.rows; int funcIdx = normType >> 1; + + double retVal; #if NPP_VERSION_MAJOR >= 4 - GpuMat d_buf(1, 1, CV_64F); + DeviceBuffer dbuf; - nppSafeCall( npp_norm_diff_func[funcIdx](src1.ptr(), src1.step, - src2.ptr(), src2.step, - sz, d_buf.ptr()) ); + nppSafeCall( npp_norm_diff_func[funcIdx](src1.ptr(), src1.step, src2.ptr(), src2.step, sz, dbuf) ); cudaSafeCall( cudaThreadSynchronize() ); - double retVal; - Mat _buf(1, 1, CV_64F, &retVal); - d_buf.download(_buf); + dbuf.download(&retVal); #else - double retVal; - nppSafeCall( npp_norm_diff_func[funcIdx](src1.ptr(), src1.step, - src2.ptr(), src2.step, - sz, &retVal) ); + nppSafeCall( npp_norm_diff_func[funcIdx](src1.ptr(), src1.step, src2.ptr(), src2.step, sz, &retVal) ); cudaSafeCall( cudaThreadSynchronize() );