diff --git a/modules/cudaimgproc/include/opencv2/cudaimgproc.hpp b/modules/cudaimgproc/include/opencv2/cudaimgproc.hpp index dc876b744b..25a324a00f 100644 --- a/modules/cudaimgproc/include/opencv2/cudaimgproc.hpp +++ b/modules/cudaimgproc/include/opencv2/cudaimgproc.hpp @@ -201,6 +201,15 @@ CV_EXPORTS void alphaComp(InputArray img1, InputArray img2, OutputArray dst, int */ CV_EXPORTS void calcHist(InputArray src, OutputArray hist, Stream& stream = Stream::Null()); +/** @brief Calculates histogram for one channel 8-bit image confined in given mask. + +@param src Source image with CV_8UC1 type. +@param hist Destination histogram with one row, 256 columns, and the CV_32SC1 type. +@param mask A mask image same size as src and of type CV_8UC1. +@param stream Stream for the asynchronous version. + */ +CV_EXPORTS void calcHist(InputArray src, InputArray mask, OutputArray hist, Stream& stream = Stream::Null()); + /** @brief Equalizes the histogram of a grayscale image. @param src Source image with CV_8UC1 type. diff --git a/modules/cudaimgproc/src/cuda/hist.cu b/modules/cudaimgproc/src/cuda/hist.cu index ba9290c190..be13091f12 100644 --- a/modules/cudaimgproc/src/cuda/hist.cu +++ b/modules/cudaimgproc/src/cuda/hist.cu @@ -105,6 +105,72 @@ namespace hist if (stream == 0) cudaSafeCall( cudaDeviceSynchronize() ); } + + __global__ void histogram256Kernel(const uchar* src, int cols, int rows, size_t srcStep, const uchar* mask, size_t maskStep, int* hist) + { + __shared__ int shist[256]; + + const int y = blockIdx.x * blockDim.y + threadIdx.y; + const int tid = threadIdx.y * blockDim.x + threadIdx.x; + + shist[tid] = 0; + __syncthreads(); + + if (y < rows) + { + const unsigned int* rowPtr = (const unsigned int*) (src + y * srcStep); + const unsigned int* maskRowPtr = (const unsigned int*) (mask + y * maskStep); + + const int cols_4 = cols / 4; + for (int x = threadIdx.x; x < cols_4; x += blockDim.x) + { + unsigned int data = rowPtr[x]; + unsigned int m = maskRowPtr[x]; + + if ((m >> 0) & 0xFFU) + Emulation::smem::atomicAdd(&shist[(data >> 0) & 0xFFU], 1); + + if ((m >> 8) & 0xFFU) + Emulation::smem::atomicAdd(&shist[(data >> 8) & 0xFFU], 1); + + if ((m >> 16) & 0xFFU) + Emulation::smem::atomicAdd(&shist[(data >> 16) & 0xFFU], 1); + + if ((m >> 24) & 0xFFU) + Emulation::smem::atomicAdd(&shist[(data >> 24) & 0xFFU], 1); + } + + if (cols % 4 != 0 && threadIdx.x == 0) + { + for (int x = cols_4 * 4; x < cols; ++x) + { + unsigned int data = ((const uchar*)rowPtr)[x]; + unsigned int m = ((const uchar*)maskRowPtr)[x]; + + if (m) + Emulation::smem::atomicAdd(&shist[data], 1); + } + } + } + + __syncthreads(); + + const int histVal = shist[tid]; + if (histVal > 0) + ::atomicAdd(hist + tid, histVal); + } + + void histogram256(PtrStepSzb src, PtrStepSzb mask, int* hist, cudaStream_t stream) + { + const dim3 block(32, 8); + const dim3 grid(divUp(src.rows, block.y)); + + histogram256Kernel<<>>(src.data, src.cols, src.rows, src.step, mask.data, mask.step, hist); + cudaSafeCall( cudaGetLastError() ); + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); + } } ///////////////////////////////////////////////////////////////////////// diff --git a/modules/cudaimgproc/src/histogram.cpp b/modules/cudaimgproc/src/histogram.cpp index 59aa83343a..fce5057590 100644 --- a/modules/cudaimgproc/src/histogram.cpp +++ b/modules/cudaimgproc/src/histogram.cpp @@ -69,20 +69,32 @@ void cv::cuda::histRange(InputArray, GpuMat*, const GpuMat*, Stream&) { throw_no namespace hist { void histogram256(PtrStepSzb src, int* hist, cudaStream_t stream); + void histogram256(PtrStepSzb src, PtrStepSzb mask, int* hist, cudaStream_t stream); } void cv::cuda::calcHist(InputArray _src, OutputArray _hist, Stream& stream) +{ + calcHist(_src, cv::cuda::GpuMat(), _hist, stream); +} + +void cv::cuda::calcHist(InputArray _src, InputArray _mask, OutputArray _hist, Stream& stream) { GpuMat src = _src.getGpuMat(); + GpuMat mask = _mask.getGpuMat(); CV_Assert( src.type() == CV_8UC1 ); + CV_Assert( mask.empty() || mask.type() == CV_8UC1 ); + CV_Assert( mask.empty() || mask.size() == src.size() ); _hist.create(1, 256, CV_32SC1); GpuMat hist = _hist.getGpuMat(); hist.setTo(Scalar::all(0), stream); - hist::histogram256(src, hist.ptr(), StreamAccessor::getStream(stream)); + if (mask.empty()) + hist::histogram256(src, hist.ptr(), StreamAccessor::getStream(stream)); + else + hist::histogram256(src, mask, hist.ptr(), StreamAccessor::getStream(stream)); } //////////////////////////////////////////////////////////////////////// diff --git a/modules/cudaimgproc/test/test_histogram.cpp b/modules/cudaimgproc/test/test_histogram.cpp index 3d3217375e..7fcde85206 100644 --- a/modules/cudaimgproc/test/test_histogram.cpp +++ b/modules/cudaimgproc/test/test_histogram.cpp @@ -136,6 +136,49 @@ INSTANTIATE_TEST_CASE_P(CUDA_ImgProc, CalcHist, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES)); +PARAM_TEST_CASE(CalcHistWithMask, cv::cuda::DeviceInfo, cv::Size) +{ + cv::cuda::DeviceInfo devInfo; + + cv::Size size; + + virtual void SetUp() + { + devInfo = GET_PARAM(0); + size = GET_PARAM(1); + + cv::cuda::setDevice(devInfo.deviceID()); + } +}; + +CUDA_TEST_P(CalcHistWithMask, Accuracy) +{ + cv::Mat src = randomMat(size, CV_8UC1); + cv::Mat mask = randomMat(size, CV_8UC1); + cv::Mat(mask, cv::Rect(0, 0, size.width / 2, size.height / 2)).setTo(0); + + cv::cuda::GpuMat hist; + cv::cuda::calcHist(loadMat(src), loadMat(mask), hist); + + cv::Mat hist_gold; + + const int hbins = 256; + const float hranges[] = {0.0f, 256.0f}; + const int histSize[] = {hbins}; + const float* ranges[] = {hranges}; + const int channels[] = {0}; + + cv::calcHist(&src, 1, channels, mask, hist_gold, 1, histSize, ranges); + hist_gold = hist_gold.reshape(1, 1); + hist_gold.convertTo(hist_gold, CV_32S); + + EXPECT_MAT_NEAR(hist_gold, hist, 0.0); +} + +INSTANTIATE_TEST_CASE_P(CUDA_ImgProc, CalcHistWithMask, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES)); + /////////////////////////////////////////////////////////////////////////////////////////////////////// // EqualizeHist