Merge pull request #3475 from cudawarped:cuda_fix_unaligned_hist

Modified histogram kernels to work with non aligned data
pull/3515/head
Alexander Smorkalov 2 years ago committed by GitHub
commit 4f66f8677b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 160
      modules/cudaimgproc/src/cuda/hist.cu
  2. 16
      modules/cudaimgproc/src/histogram.cpp
  3. 76
      modules/cudaimgproc/test/test_histogram.cpp

@ -52,38 +52,41 @@ using namespace cv::cuda::device;
namespace hist namespace hist
{ {
__global__ void histogram256Kernel(const uchar* src, int cols, int rows, size_t step, int* hist) template<bool fourByteAligned>
__global__ void histogram256Kernel(const uchar* src, int cols, int rows, size_t step, int* hist, const int offsetX = 0)
{ {
__shared__ int shist[256]; __shared__ int shist[256];
const int y = blockIdx.x * blockDim.y + threadIdx.y; const int y = blockIdx.x * blockDim.y + threadIdx.y;
const int tid = threadIdx.y * blockDim.x + threadIdx.x; const int tid = threadIdx.y * blockDim.x + threadIdx.x;
const int alignedOffset = fourByteAligned ? 0 : 4 - offsetX;
shist[tid] = 0; shist[tid] = 0;
__syncthreads(); __syncthreads();
if (y < rows) if (y < rows) {
{ const uchar* rowPtr = &src[y * step];
const unsigned int* rowPtr = (const unsigned int*) (src + y * step); // load uncoalesced head
if (!fourByteAligned && threadIdx.x == 0) {
const int cols_4 = cols / 4; for (int x = 0; x < min(alignedOffset, cols); x++)
for (int x = threadIdx.x; x < cols_4; x += blockDim.x) Emulation::smem::atomicAdd(&shist[static_cast<int>(rowPtr[x])], 1);
{ }
unsigned int data = rowPtr[x];
Emulation::smem::atomicAdd(&shist[(data >> 0) & 0xFFU], 1); // coalesced loads
Emulation::smem::atomicAdd(&shist[(data >> 8) & 0xFFU], 1); const unsigned int* rowPtrIntAligned = (const unsigned int*)(fourByteAligned ? &src[y * step] : &src[alignedOffset + y * step]);
const int cols_4 = fourByteAligned ? cols / 4 : (cols - alignedOffset) / 4;
for (int x = threadIdx.x; x < cols_4; x += blockDim.x) {
const unsigned int data = rowPtrIntAligned[x];
Emulation::smem::atomicAdd(&shist[(data >> 0) & 0xFFU], 1);
Emulation::smem::atomicAdd(&shist[(data >> 8) & 0xFFU], 1);
Emulation::smem::atomicAdd(&shist[(data >> 16) & 0xFFU], 1); Emulation::smem::atomicAdd(&shist[(data >> 16) & 0xFFU], 1);
Emulation::smem::atomicAdd(&shist[(data >> 24) & 0xFFU], 1); Emulation::smem::atomicAdd(&shist[(data >> 24) & 0xFFU], 1);
} }
if (cols % 4 != 0 && threadIdx.x == 0) // load uncoalesced tail
{ if (threadIdx.x == 0) {
for (int x = cols_4 * 4; x < cols; ++x) const int iTailStart = fourByteAligned ? cols_4 * 4 : cols_4 * 4 + alignedOffset;
{ for (int x = iTailStart; x < cols; x++)
unsigned int data = ((const uchar*)rowPtr)[x]; Emulation::smem::atomicAdd(&shist[static_cast<int>(rowPtr[x])], 1);
Emulation::smem::atomicAdd(&shist[data], 1);
}
} }
} }
@ -94,61 +97,70 @@ namespace hist
::atomicAdd(hist + tid, histVal); ::atomicAdd(hist + tid, histVal);
} }
void histogram256(PtrStepSzb src, int* hist, cudaStream_t stream) void histogram256(PtrStepSzb src, int* hist, const int offsetX, cudaStream_t stream)
{ {
const dim3 block(32, 8); const dim3 block(32, 8);
const dim3 grid(divUp(src.rows, block.y)); const dim3 grid(divUp(src.rows, block.y));
if(offsetX)
histogram256Kernel<<<grid, block, 0, stream>>>(src.data, src.cols, src.rows, src.step, hist); histogram256Kernel<false><<<grid, block, 0, stream>>>(src.data, src.cols, src.rows, src.step, hist, offsetX);
else
histogram256Kernel<true><<<grid, block, 0, stream>>>(src.data, src.cols, src.rows, src.step, hist, offsetX);
cudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
__global__ void histogram256Kernel(const uchar* src, int cols, int rows, size_t srcStep, const uchar* mask, size_t maskStep, int* hist) template<bool fourByteAligned>
__global__ void histogram256Kernel(const uchar* src, int cols, int rows, size_t srcStep, const uchar* mask, size_t maskStep, int* hist, const int offsetX = 0)
{ {
__shared__ int shist[256]; __shared__ int shist[256];
const int y = blockIdx.x * blockDim.y + threadIdx.y; const int y = blockIdx.x * blockDim.y + threadIdx.y;
const int tid = threadIdx.y * blockDim.x + threadIdx.x; const int tid = threadIdx.y * blockDim.x + threadIdx.x;
const int alignedOffset = fourByteAligned ? 0 : 4 - offsetX;
shist[tid] = 0; shist[tid] = 0;
__syncthreads(); __syncthreads();
if (y < rows) if (y < rows)
{ {
const unsigned int* rowPtr = (const unsigned int*) (src + y * srcStep); const uchar* rowPtr = &src[y * srcStep];
const unsigned int* maskRowPtr = (const unsigned int*) (mask + y * maskStep); const uchar* maskRowPtr = &mask[y * maskStep];
// load uncoalesced head
if (!fourByteAligned && threadIdx.x == 0) {
for (int x = 0; x < min(alignedOffset, cols); x++) {
if (maskRowPtr[x])
Emulation::smem::atomicAdd(&shist[rowPtr[x]], 1);
}
}
const int cols_4 = cols / 4; // coalesced loads
for (int x = threadIdx.x; x < cols_4; x += blockDim.x) const unsigned int* rowPtrIntAligned = (const unsigned int*)(fourByteAligned ? &src[y * srcStep] : &src[alignedOffset + y * maskStep]);
{ const unsigned int* maskRowPtrIntAligned = (const unsigned int*)(fourByteAligned ? &mask[y * maskStep] : &mask[alignedOffset + y * maskStep]);
unsigned int data = rowPtr[x]; const int cols_4 = fourByteAligned ? cols / 4 : (cols - alignedOffset) / 4;
unsigned int m = maskRowPtr[x]; for (int x = threadIdx.x; x < cols_4; x += blockDim.x) {
const unsigned int data = rowPtrIntAligned[x];
const unsigned int m = maskRowPtrIntAligned[x];
if ((m >> 0) & 0xFFU) if ((m >> 0) & 0xFFU)
Emulation::smem::atomicAdd(&shist[(data >> 0) & 0xFFU], 1); Emulation::smem::atomicAdd(&shist[(data >> 0) & 0xFFU], 1);
if ((m >> 8) & 0xFFU) if ((m >> 8) & 0xFFU)
Emulation::smem::atomicAdd(&shist[(data >> 8) & 0xFFU], 1); Emulation::smem::atomicAdd(&shist[(data >> 8) & 0xFFU], 1);
if ((m >> 16) & 0xFFU) if ((m >> 16) & 0xFFU)
Emulation::smem::atomicAdd(&shist[(data >> 16) & 0xFFU], 1); Emulation::smem::atomicAdd(&shist[(data >> 16) & 0xFFU], 1);
if ((m >> 24) & 0xFFU) if ((m >> 24) & 0xFFU)
Emulation::smem::atomicAdd(&shist[(data >> 24) & 0xFFU], 1); Emulation::smem::atomicAdd(&shist[(data >> 24) & 0xFFU], 1);
} }
if (cols % 4 != 0 && threadIdx.x == 0) // load uncoalesced tail
{ if (threadIdx.x == 0) {
for (int x = cols_4 * 4; x < cols; ++x) const int iTailStart = fourByteAligned ? cols_4 * 4 : cols_4 * 4 + alignedOffset;
{ for (int x = iTailStart; x < cols; x++) {
unsigned int data = ((const uchar*)rowPtr)[x]; if (maskRowPtr[x])
unsigned int m = ((const uchar*)maskRowPtr)[x]; Emulation::smem::atomicAdd(&shist[static_cast<int>(rowPtr[x])], 1);
if (m)
Emulation::smem::atomicAdd(&shist[data], 1);
} }
} }
} }
@ -160,12 +172,15 @@ namespace hist
::atomicAdd(hist + tid, histVal); ::atomicAdd(hist + tid, histVal);
} }
void histogram256(PtrStepSzb src, PtrStepSzb mask, int* hist, cudaStream_t stream) void histogram256(PtrStepSzb src, PtrStepSzb mask, int* hist, const int offsetX, cudaStream_t stream)
{ {
const dim3 block(32, 8); const dim3 block(32, 8);
const dim3 grid(divUp(src.rows, block.y)); const dim3 grid(divUp(src.rows, block.y));
histogram256Kernel<<<grid, block, 0, stream>>>(src.data, src.cols, src.rows, src.step, mask.data, mask.step, hist); if(offsetX)
histogram256Kernel<false><<<grid, block, 0, stream>>>(src.data, src.cols, src.rows, src.step, mask.data, mask.step, hist, offsetX);
else
histogram256Kernel<true><<<grid, block, 0, stream>>>(src.data, src.cols, src.rows, src.step, mask.data, mask.step, hist, offsetX);
cudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
@ -186,42 +201,44 @@ namespace hist
} }
} }
__global__ void histEven8u(const uchar* src, const size_t step, const int rows, const int cols, template<bool fourByteAligned>
int* hist, const int binCount, const int binSize, const int lowerLevel, const int upperLevel) __global__ void histEven8u(const uchar* src, const size_t step, const int rows, const int cols, int* hist, const int binCount, const int binSize,
const int lowerLevel, const int upperLevel, const int offsetX)
{ {
extern __shared__ int shist[]; extern __shared__ int shist[];
const int y = blockIdx.x * blockDim.y + threadIdx.y; const int y = blockIdx.x * blockDim.y + threadIdx.y;
const int tid = threadIdx.y * blockDim.x + threadIdx.x; const int tid = threadIdx.y * blockDim.x + threadIdx.x;
const int alignedOffset = fourByteAligned ? 0 : 4 - offsetX;
if (tid < binCount) if (tid < binCount)
shist[tid] = 0; shist[tid] = 0;
__syncthreads(); __syncthreads();
if (y < rows) if (y < rows)
{ {
const uchar* rowPtr = src + y * step; const uchar* rowPtr = &src[y * step];
const uint* rowPtr4 = (uint*) rowPtr; // load uncoalesced head
if (!fourByteAligned && threadIdx.x == 0) {
const int cols_4 = cols / 4; for (int x = 0; x < min(alignedOffset, cols); x++)
for (int x = threadIdx.x; x < cols_4; x += blockDim.x) histEvenInc(shist, rowPtr[x], binSize, lowerLevel, upperLevel);
{ }
const uint data = rowPtr4[x];
histEvenInc(shist, (data >> 0) & 0xFFU, binSize, lowerLevel, upperLevel); // coalesced loads
histEvenInc(shist, (data >> 8) & 0xFFU, binSize, lowerLevel, upperLevel); const unsigned int* rowPtrIntAligned = (const unsigned int*)(fourByteAligned ? &src[y * step] : &src[alignedOffset + y * step]);
const int cols_4 = fourByteAligned ? cols / 4 : (cols - alignedOffset) / 4;
for (int x = threadIdx.x; x < cols_4; x += blockDim.x) {
const unsigned int data = rowPtrIntAligned[x];
histEvenInc(shist, (data >> 0) & 0xFFU, binSize, lowerLevel, upperLevel);
histEvenInc(shist, (data >> 8) & 0xFFU, binSize, lowerLevel, upperLevel);
histEvenInc(shist, (data >> 16) & 0xFFU, binSize, lowerLevel, upperLevel); histEvenInc(shist, (data >> 16) & 0xFFU, binSize, lowerLevel, upperLevel);
histEvenInc(shist, (data >> 24) & 0xFFU, binSize, lowerLevel, upperLevel); histEvenInc(shist, (data >> 24) & 0xFFU, binSize, lowerLevel, upperLevel);
} }
if (cols % 4 != 0 && threadIdx.x == 0) // load uncoalesced tail
{ if (threadIdx.x == 0) {
for (int x = cols_4 * 4; x < cols; ++x) const int iTailStart = fourByteAligned ? cols_4 * 4 : cols_4 * 4 + alignedOffset;
{ for (int x = iTailStart; x < cols; x++)
const uchar data = rowPtr[x]; histEvenInc(shist, rowPtr[x], binSize, lowerLevel, upperLevel);
histEvenInc(shist, data, binSize, lowerLevel, upperLevel);
}
} }
} }
@ -236,7 +253,7 @@ namespace hist
} }
} }
void histEven8u(PtrStepSzb src, int* hist, int binCount, int lowerLevel, int upperLevel, cudaStream_t stream) void histEven8u(PtrStepSzb src, int* hist, int binCount, int lowerLevel, int upperLevel, const int offsetX, cudaStream_t stream)
{ {
const dim3 block(32, 8); const dim3 block(32, 8);
const dim3 grid(divUp(src.rows, block.y)); const dim3 grid(divUp(src.rows, block.y));
@ -245,7 +262,10 @@ namespace hist
const size_t smem_size = binCount * sizeof(int); const size_t smem_size = binCount * sizeof(int);
histEven8u<<<grid, block, smem_size, stream>>>(src.data, src.step, src.rows, src.cols, hist, binCount, binSize, lowerLevel, upperLevel); if(offsetX)
histEven8u<false><<<grid, block, smem_size, stream>>>(src.data, src.step, src.rows, src.cols, hist, binCount, binSize, lowerLevel, upperLevel, offsetX);
else
histEven8u<true><<<grid, block, smem_size, stream>>>(src.data, src.step, src.rows, src.cols, hist, binCount, binSize, lowerLevel, upperLevel, offsetX);
cudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)

@ -68,8 +68,8 @@ void cv::cuda::histRange(InputArray, GpuMat*, const GpuMat*, Stream&) { throw_no
namespace hist namespace hist
{ {
void histogram256(PtrStepSzb src, int* hist, cudaStream_t stream); void histogram256(PtrStepSzb src, int* hist, const int offsetX, cudaStream_t stream);
void histogram256(PtrStepSzb src, PtrStepSzb mask, int* hist, cudaStream_t stream); void histogram256(PtrStepSzb src, PtrStepSzb mask, int* hist, const int offsetX, cudaStream_t stream);
} }
void cv::cuda::calcHist(InputArray _src, OutputArray _hist, Stream& stream) void cv::cuda::calcHist(InputArray _src, OutputArray _hist, Stream& stream)
@ -91,10 +91,12 @@ void cv::cuda::calcHist(InputArray _src, InputArray _mask, OutputArray _hist, St
hist.setTo(Scalar::all(0), stream); hist.setTo(Scalar::all(0), stream);
Point ofs; Size wholeSize;
src.locateROI(wholeSize, ofs);
if (mask.empty()) if (mask.empty())
hist::histogram256(src, hist.ptr<int>(), StreamAccessor::getStream(stream)); hist::histogram256(src, hist.ptr<int>(), ofs.x, StreamAccessor::getStream(stream));
else else
hist::histogram256(src, mask, hist.ptr<int>(), StreamAccessor::getStream(stream)); hist::histogram256(src, mask, hist.ptr<int>(), ofs.x, StreamAccessor::getStream(stream));
} }
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
@ -494,16 +496,18 @@ void cv::cuda::evenLevels(OutputArray _levels, int nLevels, int lowerLevel, int
namespace hist namespace hist
{ {
void histEven8u(PtrStepSzb src, int* hist, int binCount, int lowerLevel, int upperLevel, cudaStream_t stream); void histEven8u(PtrStepSzb src, int* hist, int binCount, int lowerLevel, int upperLevel, const int offsetX, cudaStream_t stream);
} }
namespace namespace
{ {
void histEven8u(const GpuMat& src, GpuMat& hist, int histSize, int lowerLevel, int upperLevel, cudaStream_t stream) void histEven8u(const GpuMat& src, GpuMat& hist, int histSize, int lowerLevel, int upperLevel, cudaStream_t stream)
{ {
Point ofs; Size wholeSize;
src.locateROI(wholeSize, ofs);
hist.create(1, histSize, CV_32S); hist.create(1, histSize, CV_32S);
cudaSafeCall( cudaMemsetAsync(hist.data, 0, histSize * sizeof(int), stream) ); cudaSafeCall( cudaMemsetAsync(hist.data, 0, histSize * sizeof(int), stream) );
hist::histEven8u(src, hist.ptr<int>(), histSize, lowerLevel, upperLevel, stream); hist::histEven8u(src, hist.ptr<int>(), histSize, lowerLevel, upperLevel, ofs.x, stream);
} }
} }

@ -49,15 +49,40 @@ namespace opencv_test { namespace {
/////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////
// HistEven // HistEven
PARAM_TEST_CASE(HistEven, cv::cuda::DeviceInfo, cv::Size) typedef tuple<Size, int> hist_size_to_roi_offset_params_t;
const hist_size_to_roi_offset_params_t hist_size_to_roi_offset_params[] =
{
// uchar reads only
hist_size_to_roi_offset_params_t(Size(1,32), 0),
hist_size_to_roi_offset_params_t(Size(2,32), 0),
hist_size_to_roi_offset_params_t(Size(2,32), 1),
hist_size_to_roi_offset_params_t(Size(3,32), 0),
hist_size_to_roi_offset_params_t(Size(3,32), 1),
hist_size_to_roi_offset_params_t(Size(3,32), 2),
hist_size_to_roi_offset_params_t(Size(4,32), 0),
hist_size_to_roi_offset_params_t(Size(4,32), 1),
hist_size_to_roi_offset_params_t(Size(4,32), 2),
hist_size_to_roi_offset_params_t(Size(4,32), 3),
// uchar and int reads
hist_size_to_roi_offset_params_t(Size(129,32), 0),
hist_size_to_roi_offset_params_t(Size(129,32), 1),
hist_size_to_roi_offset_params_t(Size(129,32), 2),
hist_size_to_roi_offset_params_t(Size(129,32), 3),
// int reads only
hist_size_to_roi_offset_params_t(Size(128,32), 0)
};
PARAM_TEST_CASE(HistEven, cv::cuda::DeviceInfo, hist_size_to_roi_offset_params_t)
{ {
cv::cuda::DeviceInfo devInfo; cv::cuda::DeviceInfo devInfo;
cv::Size size; cv::Size size;
int roiOffsetX;
virtual void SetUp() virtual void SetUp()
{ {
devInfo = GET_PARAM(0); devInfo = GET_PARAM(0);
size = GET_PARAM(1); size = get<0>(GET_PARAM(1));
roiOffsetX = get<1>(GET_PARAM(1));
cv::cuda::setDevice(devInfo.deviceID()); cv::cuda::setDevice(devInfo.deviceID());
} }
@ -66,19 +91,21 @@ PARAM_TEST_CASE(HistEven, cv::cuda::DeviceInfo, cv::Size)
CUDA_TEST_P(HistEven, Accuracy) CUDA_TEST_P(HistEven, Accuracy)
{ {
cv::Mat src = randomMat(size, CV_8UC1); cv::Mat src = randomMat(size, CV_8UC1);
const Rect roi = Rect(roiOffsetX, 0, src.cols - roiOffsetX, src.rows);
int hbins = 30; int hbins = 30;
float hranges[] = {50.0f, 200.0f}; float hranges[] = {50.0f, 200.0f};
cv::cuda::GpuMat hist; cv::cuda::GpuMat hist;
cv::cuda::histEven(loadMat(src), hist, hbins, (int) hranges[0], (int) hranges[1]); cv::cuda::GpuMat srcDevice = loadMat(src);
cv::cuda::histEven(srcDevice(roi), hist, hbins, (int)hranges[0], (int)hranges[1]);
cv::Mat hist_gold; cv::Mat hist_gold;
int histSize[] = {hbins}; int histSize[] = {hbins};
const float* ranges[] = {hranges}; const float* ranges[] = {hranges};
int channels[] = {0}; int channels[] = {0};
cv::calcHist(&src, 1, channels, cv::Mat(), hist_gold, 1, histSize, ranges); Mat srcRoi = src(roi);
cv::calcHist(&srcRoi, 1, channels, cv::Mat(), hist_gold, 1, histSize, ranges);
hist_gold = hist_gold.t(); hist_gold = hist_gold.t();
hist_gold.convertTo(hist_gold, CV_32S); hist_gold.convertTo(hist_gold, CV_32S);
@ -87,22 +114,24 @@ CUDA_TEST_P(HistEven, Accuracy)
} }
INSTANTIATE_TEST_CASE_P(CUDA_ImgProc, HistEven, testing::Combine( INSTANTIATE_TEST_CASE_P(CUDA_ImgProc, HistEven, testing::Combine(
ALL_DEVICES, ALL_DEVICES, testing::ValuesIn(hist_size_to_roi_offset_params)));
DIFFERENT_SIZES));
/////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////
// CalcHist // CalcHist
PARAM_TEST_CASE(CalcHist, cv::cuda::DeviceInfo, cv::Size) PARAM_TEST_CASE(CalcHist, cv::cuda::DeviceInfo, hist_size_to_roi_offset_params_t)
{ {
cv::cuda::DeviceInfo devInfo; cv::cuda::DeviceInfo devInfo;
cv::Size size; cv::Size size;
int roiOffsetX;
virtual void SetUp() virtual void SetUp()
{ {
devInfo = GET_PARAM(0); devInfo = GET_PARAM(0);
size = GET_PARAM(1); size = get<0>(GET_PARAM(1));
roiOffsetX = get<1>(GET_PARAM(1));
cv::cuda::setDevice(devInfo.deviceID()); cv::cuda::setDevice(devInfo.deviceID());
} }
@ -111,9 +140,10 @@ PARAM_TEST_CASE(CalcHist, cv::cuda::DeviceInfo, cv::Size)
CUDA_TEST_P(CalcHist, Accuracy) CUDA_TEST_P(CalcHist, Accuracy)
{ {
cv::Mat src = randomMat(size, CV_8UC1); cv::Mat src = randomMat(size, CV_8UC1);
const Rect roi = Rect(roiOffsetX, 0, src.cols - roiOffsetX, src.rows);
cv::cuda::GpuMat hist; cv::cuda::GpuMat hist;
cv::cuda::calcHist(loadMat(src), hist); GpuMat srcDevice = loadMat(src);
cv::cuda::calcHist(srcDevice(roi), hist);
cv::Mat hist_gold; cv::Mat hist_gold;
@ -123,7 +153,8 @@ CUDA_TEST_P(CalcHist, Accuracy)
const float* ranges[] = {hranges}; const float* ranges[] = {hranges};
const int channels[] = {0}; const int channels[] = {0};
cv::calcHist(&src, 1, channels, cv::Mat(), hist_gold, 1, histSize, ranges); const Mat srcRoi = src(roi);
cv::calcHist(&srcRoi, 1, channels, cv::Mat(), hist_gold, 1, histSize, ranges);
hist_gold = hist_gold.reshape(1, 1); hist_gold = hist_gold.reshape(1, 1);
hist_gold.convertTo(hist_gold, CV_32S); hist_gold.convertTo(hist_gold, CV_32S);
@ -131,19 +162,21 @@ CUDA_TEST_P(CalcHist, Accuracy)
} }
INSTANTIATE_TEST_CASE_P(CUDA_ImgProc, CalcHist, testing::Combine( INSTANTIATE_TEST_CASE_P(CUDA_ImgProc, CalcHist, testing::Combine(
ALL_DEVICES, ALL_DEVICES, testing::ValuesIn(hist_size_to_roi_offset_params)));
DIFFERENT_SIZES));
PARAM_TEST_CASE(CalcHistWithMask, cv::cuda::DeviceInfo, cv::Size) PARAM_TEST_CASE(CalcHistWithMask, cv::cuda::DeviceInfo, hist_size_to_roi_offset_params_t)
{ {
cv::cuda::DeviceInfo devInfo; cv::cuda::DeviceInfo devInfo;
cv::Size size; cv::Size size;
int roiOffsetX;
virtual void SetUp() virtual void SetUp()
{ {
devInfo = GET_PARAM(0); devInfo = GET_PARAM(0);
size = GET_PARAM(1); size = get<0>(GET_PARAM(1));
roiOffsetX = get<1>(GET_PARAM(1));
cv::cuda::setDevice(devInfo.deviceID()); cv::cuda::setDevice(devInfo.deviceID());
} }
@ -152,11 +185,14 @@ PARAM_TEST_CASE(CalcHistWithMask, cv::cuda::DeviceInfo, cv::Size)
CUDA_TEST_P(CalcHistWithMask, Accuracy) CUDA_TEST_P(CalcHistWithMask, Accuracy)
{ {
cv::Mat src = randomMat(size, CV_8UC1); cv::Mat src = randomMat(size, CV_8UC1);
const Rect roi = Rect(roiOffsetX, 0, src.cols - roiOffsetX, src.rows);
cv::Mat mask = randomMat(size, CV_8UC1); cv::Mat mask = randomMat(size, CV_8UC1);
cv::Mat(mask, cv::Rect(0, 0, size.width / 2, size.height / 2)).setTo(0); cv::Mat(mask, cv::Rect(0, 0, size.width / 2, size.height / 2)).setTo(0);
cv::cuda::GpuMat hist; cv::cuda::GpuMat hist;
cv::cuda::calcHist(loadMat(src), loadMat(mask), hist); GpuMat srcDevice = loadMat(src);
GpuMat maskDevice = loadMat(mask);
cv::cuda::calcHist(srcDevice(roi), maskDevice(roi), hist);
cv::Mat hist_gold; cv::Mat hist_gold;
@ -166,7 +202,8 @@ CUDA_TEST_P(CalcHistWithMask, Accuracy)
const float* ranges[] = {hranges}; const float* ranges[] = {hranges};
const int channels[] = {0}; const int channels[] = {0};
cv::calcHist(&src, 1, channels, mask, hist_gold, 1, histSize, ranges); const Mat srcRoi = src(roi);
cv::calcHist(&srcRoi, 1, channels, mask(roi), hist_gold, 1, histSize, ranges);
hist_gold = hist_gold.reshape(1, 1); hist_gold = hist_gold.reshape(1, 1);
hist_gold.convertTo(hist_gold, CV_32S); hist_gold.convertTo(hist_gold, CV_32S);
@ -174,8 +211,7 @@ CUDA_TEST_P(CalcHistWithMask, Accuracy)
} }
INSTANTIATE_TEST_CASE_P(CUDA_ImgProc, CalcHistWithMask, testing::Combine( INSTANTIATE_TEST_CASE_P(CUDA_ImgProc, CalcHistWithMask, testing::Combine(
ALL_DEVICES, ALL_DEVICES, testing::ValuesIn(hist_size_to_roi_offset_params)));
DIFFERENT_SIZES));
/////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////
// EqualizeHist // EqualizeHist

Loading…
Cancel
Save