Added 32FC1 type support and mask to cuda::meanStdDev implementation.

pull/3191/head
Alexander Smorkalov 3 years ago
parent d4719b28f6
commit df101fd7e3
  1. 30
      modules/cudaarithm/include/opencv2/cudaarithm.hpp
  2. 93
      modules/cudaarithm/src/reductions.cpp
  3. 68
      modules/cudaarithm/test/test_reductions.cpp

@ -685,21 +685,39 @@ CV_EXPORTS_W void reduce(InputArray mtx, OutputArray vec, int dim, int reduceOp,
/** @brief Computes a mean value and a standard deviation of matrix elements.
@param mtx Source matrix. CV_8UC1 matrices are supported for now.
@param mean Mean value.
@param stddev Standard deviation value.
@param src Source matrix. CV_8UC1 and CV_32FC1 matrices are supported for now.
@param dst Target GpuMat with size 1x2 and type CV_64FC1. The first value is mean, the second - stddev.
@param mask Operation mask.
@param stream Stream for the asynchronous version.
@sa meanStdDev
*/
CV_EXPORTS_W void meanStdDev(InputArray mtx, Scalar& mean, Scalar& stddev);
/** @overload */
CV_EXPORTS_W void meanStdDev(InputArray src, OutputArray dst, InputArray mask, Stream& stream = Stream::Null());
/** @overload
@param mtx Source matrix. CV_8UC1 and CV_32FC1 matrices are supported for now.
@param dst Target GpuMat with size 1x2 and type CV_64FC1. The first value is mean, the second - stddev.
@param stream Stream for the asynchronous version.
*/
CV_EXPORTS_W void meanStdDev(InputArray mtx, OutputArray dst, Stream& stream = Stream::Null());
/** @overload
@param src Source matrix. CV_8UC1 and CV_32FC1 matrices are supported for now.
@param mean Mean value.
@param stddev Standard deviation value.
@param mask Operation mask.
*/
CV_EXPORTS_W void meanStdDev(InputArray src, CV_OUT Scalar& mean, CV_OUT Scalar& stddev, InputArray mask);
/** @overload
@param mtx Source matrix. CV_8UC1 and CV_32FC1 matrices are supported for now.
@param mean Mean value.
@param stddev Standard deviation value.
*/
CV_EXPORTS_W void meanStdDev(InputArray mtx, CV_OUT Scalar& mean, CV_OUT Scalar& stddev);
/** @brief Computes a standard deviation of integral images.
@param src Source image. Only the CV_32SC1 type is supported.
@param sqr Squared source image. Only the CV_32FC1 type is supported.
@param dst Destination image with the same type and size as src .
@param dst Destination image with the same type and size as src.
@param rect Rectangular window.
@param stream Stream for the asynchronous version.
*/

@ -132,45 +132,71 @@ double cv::cuda::norm(InputArray _src, int normType, InputArray _mask)
////////////////////////////////////////////////////////////////////////
// meanStdDev
void cv::cuda::meanStdDev(InputArray _src, OutputArray _dst, Stream& stream)
void cv::cuda::meanStdDev(InputArray src, OutputArray dst, Stream& stream)
{
if (!deviceSupports(FEATURE_SET_COMPUTE_13))
CV_Error(cv::Error::StsNotImplemented, "Not sufficient compute capebility");
const GpuMat src = getInputMat(_src, stream);
const GpuMat gsrc = getInputMat(src, stream);
CV_Assert( src.type() == CV_8UC1 );
#if (CUDA_VERSION <= 4020)
CV_Assert( gsrc.type() == CV_8UC1 );
#else
CV_Assert( (gsrc.type() == CV_8UC1) || (gsrc.type() == CV_32FC1) );
#endif
GpuMat dst = getOutputMat(_dst, 1, 2, CV_64FC1, stream);
GpuMat gdst = getOutputMat(dst, 1, 2, CV_64FC1, stream);
NppiSize sz;
sz.width = src.cols;
sz.height = src.rows;
sz.width = gsrc.cols;
sz.height = gsrc.rows;
int bufSize;
#if (CUDA_VERSION <= 4020)
nppSafeCall( nppiMeanStdDev8uC1RGetBufferHostSize(sz, &bufSize) );
#else
nppSafeCall( nppiMeanStdDevGetBufferHostSize_8u_C1R(sz, &bufSize) );
if (gsrc.type() == CV_8UC1)
nppSafeCall( nppiMeanStdDevGetBufferHostSize_8u_C1R(sz, &bufSize) );
else
nppSafeCall( nppiMeanStdDevGetBufferHostSize_32f_C1R(sz, &bufSize) );
#endif
BufferPool pool(stream);
GpuMat buf = pool.getBuffer(1, bufSize, CV_8UC1);
GpuMat buf = pool.getBuffer(1, bufSize, gsrc.type());
// detail: https://github.com/opencv/opencv/issues/11063
//NppStreamHandler h(StreamAccessor::getStream(stream));
nppSafeCall( nppiMean_StdDev_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), sz, buf.ptr<Npp8u>(), dst.ptr<Npp64f>(), dst.ptr<Npp64f>() + 1) );
if(gsrc.type() == CV_8UC1)
nppSafeCall( nppiMean_StdDev_8u_C1R(gsrc.ptr<Npp8u>(), static_cast<int>(gsrc.step), sz, buf.ptr<Npp8u>(), gdst.ptr<Npp64f>(), gdst.ptr<Npp64f>() + 1) );
else
nppSafeCall( nppiMean_StdDev_32f_C1R(gsrc.ptr<Npp32f>(), static_cast<int>(gsrc.step), sz, buf.ptr<Npp8u>(), gdst.ptr<Npp64f>(), gdst.ptr<Npp64f>() + 1) );
syncOutput(gdst, dst, stream);
}
void cv::cuda::meanStdDev(InputArray src, Scalar& mean, Scalar& stddev)
{
Stream& stream = Stream::Null();
HostMem dst;
meanStdDev(src, dst, stream);
stream.waitForCompletion();
double vals[2];
dst.createMatHeader().copyTo(Mat(1, 2, CV_64FC1, &vals[0]));
syncOutput(dst, _dst, stream);
mean = Scalar(vals[0]);
stddev = Scalar(vals[1]);
}
void cv::cuda::meanStdDev(InputArray _src, Scalar& mean, Scalar& stddev)
void cv::cuda::meanStdDev(InputArray _src, Scalar& mean, Scalar& stddev, InputArray _mask)
{
Stream& stream = Stream::Null();
HostMem dst;
meanStdDev(_src, dst, stream);
meanStdDev(_src, dst, _mask, stream);
stream.waitForCompletion();
@ -181,6 +207,49 @@ void cv::cuda::meanStdDev(InputArray _src, Scalar& mean, Scalar& stddev)
stddev = Scalar(vals[1]);
}
void cv::cuda::meanStdDev(InputArray src, OutputArray dst, InputArray mask, Stream& stream)
{
if (!deviceSupports(FEATURE_SET_COMPUTE_13))
CV_Error(cv::Error::StsNotImplemented, "Not sufficient compute capebility");
const GpuMat gsrc = getInputMat(src, stream);
const GpuMat gmask = getInputMat(mask, stream);
#if (CUDA_VERSION <= 4020)
CV_Assert( gsrc.type() == CV_8UC1 );
#else
CV_Assert( (gsrc.type() == CV_8UC1) || (gsrc.type() == CV_32FC1) );
#endif
GpuMat gdst = getOutputMat(dst, 1, 2, CV_64FC1, stream);
NppiSize sz;
sz.width = gsrc.cols;
sz.height = gsrc.rows;
int bufSize;
#if (CUDA_VERSION <= 4020)
nppSafeCall( nppiMeanStdDev8uC1MRGetBufferHostSize(sz, &bufSize) );
#else
if (gsrc.type() == CV_8UC1)
nppSafeCall( nppiMeanStdDevGetBufferHostSize_8u_C1MR(sz, &bufSize) );
else
nppSafeCall( nppiMeanStdDevGetBufferHostSize_32f_C1MR(sz, &bufSize) );
#endif
BufferPool pool(stream);
GpuMat buf = pool.getBuffer(1, bufSize, gsrc.type());
if(gsrc.type() == CV_8UC1)
nppSafeCall( nppiMean_StdDev_8u_C1MR(gsrc.ptr<Npp8u>(), static_cast<int>(gsrc.step), gmask.ptr<Npp8u>(), static_cast<int>(gmask.step),
sz, buf.ptr<Npp8u>(), gdst.ptr<Npp64f>(), gdst.ptr<Npp64f>() + 1) );
else
nppSafeCall( nppiMean_StdDev_32f_C1MR(gsrc.ptr<Npp32f>(), static_cast<int>(gsrc.step), gmask.ptr<Npp8u>(), static_cast<int>(gmask.step),
sz, buf.ptr<Npp8u>(), gdst.ptr<Npp64f>(), gdst.ptr<Npp64f>() + 1) );
syncOutput(gdst, dst, stream);
}
//////////////////////////////////////////////////////////////////////////////
// rectStdDev

@ -967,17 +967,19 @@ INSTANTIATE_TEST_CASE_P(CUDA_Arithm, Normalize, testing::Combine(
////////////////////////////////////////////////////////////////////////////////
// MeanStdDev
PARAM_TEST_CASE(MeanStdDev, cv::cuda::DeviceInfo, cv::Size, UseRoi)
PARAM_TEST_CASE(MeanStdDev, cv::cuda::DeviceInfo, cv::Size, UseRoi, MatDepth)
{
cv::cuda::DeviceInfo devInfo;
cv::Size size;
bool useRoi;
int MatDepth;
virtual void SetUp()
{
devInfo = GET_PARAM(0);
size = GET_PARAM(1);
useRoi = GET_PARAM(2);
MatDepth = GET_PARAM(3);
cv::cuda::setDevice(devInfo.deviceID());
}
@ -985,7 +987,7 @@ PARAM_TEST_CASE(MeanStdDev, cv::cuda::DeviceInfo, cv::Size, UseRoi)
CUDA_TEST_P(MeanStdDev, Accuracy)
{
cv::Mat src = randomMat(size, CV_8UC1);
cv::Mat src = randomMat(size, MatDepth);
if (!supportFeature(devInfo, cv::cuda::FEATURE_SET_COMPUTE_13))
{
@ -1015,9 +1017,42 @@ CUDA_TEST_P(MeanStdDev, Accuracy)
}
}
CUDA_TEST_P(MeanStdDev, MaskedAccuracy)
{
cv::Mat src = randomMat(size, MatDepth);
cv::Mat mask = randomMat(size, CV_8UC1, 0, 2);
if (!supportFeature(devInfo, cv::cuda::FEATURE_SET_COMPUTE_13))
{
try
{
cv::Scalar mean;
cv::Scalar stddev;
cv::cuda::meanStdDev(loadMat(src, useRoi), mean, stddev);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(cv::Error::StsNotImplemented, e.code);
}
}
else
{
cv::Scalar mean;
cv::Scalar stddev;
cv::cuda::meanStdDev(loadMat(src, useRoi), mean, stddev, loadMat(mask));
cv::Scalar mean_gold;
cv::Scalar stddev_gold;
cv::meanStdDev(src, mean_gold, stddev_gold, mask);
EXPECT_SCALAR_NEAR(mean_gold, mean, 1e-5);
EXPECT_SCALAR_NEAR(stddev_gold, stddev, 1e-5);
}
}
CUDA_TEST_P(MeanStdDev, Async)
{
cv::Mat src = randomMat(size, CV_8UC1);
cv::Mat src = randomMat(size, MatDepth);
cv::cuda::Stream stream;
@ -1037,10 +1072,35 @@ CUDA_TEST_P(MeanStdDev, Async)
EXPECT_SCALAR_NEAR(stddev_gold, cv::Scalar(vals[1]), 1e-5);
}
CUDA_TEST_P(MeanStdDev, MaskedAsync)
{
cv::Mat src = randomMat(size, MatDepth);
cv::Mat mask = randomMat(size, CV_8UC1, 0, 2);
cv::cuda::Stream stream;
cv::cuda::HostMem dst;
cv::cuda::meanStdDev(loadMat(src, useRoi), dst, loadMat(mask), stream);
stream.waitForCompletion();
double vals[2];
dst.createMatHeader().copyTo(cv::Mat(1, 2, CV_64FC1, &vals[0]));
cv::Scalar mean_gold;
cv::Scalar stddev_gold;
cv::meanStdDev(src, mean_gold, stddev_gold, mask);
EXPECT_SCALAR_NEAR(mean_gold, cv::Scalar(vals[0]), 1e-5);
EXPECT_SCALAR_NEAR(stddev_gold, cv::Scalar(vals[1]), 1e-5);
}
INSTANTIATE_TEST_CASE_P(CUDA_Arithm, MeanStdDev, testing::Combine(
ALL_DEVICES,
DIFFERENT_SIZES,
WHOLE_SUBMAT));
WHOLE_SUBMAT,
testing::Values(MatDepth(CV_8U), MatDepth(CV_32F))
));
///////////////////////////////////////////////////////////////////////////////////////////////////////
// Integral

Loading…
Cancel
Save