From 6c253510495d25b8178f53e67c1b7d9655ab5fdf Mon Sep 17 00:00:00 2001 From: Tomoaki Teshima Date: Thu, 15 Mar 2018 18:40:05 +0900 Subject: [PATCH 1/2] make the asynchronous call to NPP safe * Stop calling nppSetStream --- modules/core/include/opencv2/core/private.cuda.hpp | 6 ++++-- modules/cudaarithm/src/reductions.cpp | 2 -- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/core/include/opencv2/core/private.cuda.hpp b/modules/core/include/opencv2/core/private.cuda.hpp index cea280b122..003ff126ba 100644 --- a/modules/core/include/opencv2/core/private.cuda.hpp +++ b/modules/core/include/opencv2/core/private.cuda.hpp @@ -108,6 +108,8 @@ static inline void throw_no_cuda() { CV_Error(cv::Error::GpuNotSupported, "The l #else // HAVE_CUDA +#define nppSafeSetStream(oldStream, newStream) { if(oldStream != newStream) { cudaStreamSynchronize(oldStream); nppSetStream(newStream); } } + static inline void throw_no_cuda() { CV_Error(cv::Error::StsNotImplemented, "The called functionality is disabled for current build or platform"); } namespace cv { namespace cuda @@ -139,13 +141,13 @@ namespace cv { namespace cuda inline explicit NppStreamHandler(Stream& newStream) { oldStream = nppGetStream(); - nppSetStream(StreamAccessor::getStream(newStream)); + nppSafeSetStream(oldStream, StreamAccessor::getStream(newStream)); } inline explicit NppStreamHandler(cudaStream_t newStream) { oldStream = nppGetStream(); - nppSetStream(newStream); + nppSafeSetStream(oldStream, newStream); } inline ~NppStreamHandler() diff --git a/modules/cudaarithm/src/reductions.cpp b/modules/cudaarithm/src/reductions.cpp index 50185b161e..1adf05fe1a 100644 --- a/modules/cudaarithm/src/reductions.cpp +++ b/modules/cudaarithm/src/reductions.cpp @@ -157,8 +157,6 @@ void cv::cuda::meanStdDev(InputArray _src, OutputArray _dst, Stream& stream) BufferPool pool(stream); GpuMat buf = pool.getBuffer(1, bufSize, CV_8UC1); - NppStreamHandler h(StreamAccessor::getStream(stream)); - nppSafeCall( nppiMean_StdDev_8u_C1R(src.ptr(), static_cast(src.step), sz, buf.ptr(), dst.ptr(), dst.ptr() + 1) ); syncOutput(dst, _dst, stream); From f4e5d777e856f751c4318f0c633dcce37cfa66f2 Mon Sep 17 00:00:00 2001 From: Tomoaki Teshima Date: Fri, 16 Mar 2018 16:48:03 +0900 Subject: [PATCH 2/2] follow histogram --- modules/cudaarithm/src/reductions.cpp | 8 ++++++-- modules/cudaimgproc/src/histogram.cpp | 2 +- modules/cudaimgproc/test/test_histogram.cpp | 17 +++++++++++++++++ 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/modules/cudaarithm/src/reductions.cpp b/modules/cudaarithm/src/reductions.cpp index 1adf05fe1a..ce1bc232cf 100644 --- a/modules/cudaarithm/src/reductions.cpp +++ b/modules/cudaarithm/src/reductions.cpp @@ -137,11 +137,12 @@ void cv::cuda::meanStdDev(InputArray _src, OutputArray _dst, Stream& stream) if (!deviceSupports(FEATURE_SET_COMPUTE_13)) CV_Error(cv::Error::StsNotImplemented, "Not sufficient compute capebility"); - const GpuMat src = getInputMat(_src, stream); + GpuMat src = getInputMat(_src, stream); CV_Assert( src.type() == CV_8UC1 ); - GpuMat dst = getOutputMat(_dst, 1, 2, CV_64FC1, stream); + _dst.create(1, 2, CV_64FC1); + GpuMat dst = _dst.getGpuMat(); NppiSize sz; sz.width = src.cols; @@ -157,6 +158,9 @@ void cv::cuda::meanStdDev(InputArray _src, OutputArray _dst, Stream& stream) BufferPool pool(stream); GpuMat buf = pool.getBuffer(1, bufSize, CV_8UC1); + // detail: https://github.com/opencv/opencv/issues/11063 + //NppStreamHandler h(StreamAccessor::getStream(stream)); + nppSafeCall( nppiMean_StdDev_8u_C1R(src.ptr(), static_cast(src.step), sz, buf.ptr(), dst.ptr(), dst.ptr() + 1) ); syncOutput(dst, _dst, stream); diff --git a/modules/cudaimgproc/src/histogram.cpp b/modules/cudaimgproc/src/histogram.cpp index fce5057590..6e219b641b 100644 --- a/modules/cudaimgproc/src/histogram.cpp +++ b/modules/cudaimgproc/src/histogram.cpp @@ -107,7 +107,7 @@ namespace hist void cv::cuda::equalizeHist(InputArray _src, OutputArray _dst, Stream& _stream) { - GpuMat src = _src.getGpuMat(); + GpuMat src = getInputMat(_src, _stream); CV_Assert( src.type() == CV_8UC1 ); diff --git a/modules/cudaimgproc/test/test_histogram.cpp b/modules/cudaimgproc/test/test_histogram.cpp index 95598359c3..eb084609e9 100644 --- a/modules/cudaimgproc/test/test_histogram.cpp +++ b/modules/cudaimgproc/test/test_histogram.cpp @@ -194,6 +194,23 @@ PARAM_TEST_CASE(EqualizeHist, cv::cuda::DeviceInfo, cv::Size) } }; +CUDA_TEST_P(EqualizeHist, Async) +{ + cv::Mat src = randomMat(size, CV_8UC1); + + cv::cuda::Stream stream; + + cv::cuda::GpuMat dst; + cv::cuda::equalizeHist(loadMat(src), dst, stream); + + stream.waitForCompletion(); + + cv::Mat dst_gold; + cv::equalizeHist(src, dst_gold); + + EXPECT_MAT_NEAR(dst_gold, dst, 3.0); +} + CUDA_TEST_P(EqualizeHist, Accuracy) { cv::Mat src = randomMat(size, CV_8UC1);