commit
0bcbc73bca
8 changed files with 548 additions and 5 deletions
@ -0,0 +1,61 @@ |
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
|
|
||||||
|
#include "perf_precomp.hpp" |
||||||
|
|
||||||
|
namespace opencv_test { namespace { |
||||||
|
static void drawCircle(cv::Mat& dst, const cv::Vec3i& circle, bool fill) |
||||||
|
{ |
||||||
|
dst.setTo(Scalar::all(0)); |
||||||
|
cv::circle(dst, Point2i(circle[0], circle[1]), circle[2], Scalar::all(255), fill ? -1 : 1, cv::LINE_AA); |
||||||
|
} |
||||||
|
|
||||||
|
DEF_PARAM_TEST(Sz_Depth, Size, MatDepth); |
||||||
|
PERF_TEST_P(Sz_Depth, SpatialMoments, Combine(CUDA_TYPICAL_MAT_SIZES, Values(MatDepth(CV_32F), MatDepth((CV_64F))))) |
||||||
|
{ |
||||||
|
const cv::Size size = GET_PARAM(0); |
||||||
|
const int momentsType = GET_PARAM(1); |
||||||
|
Mat imgHost(size, CV_8U); |
||||||
|
const Vec3i circle(size.width / 2, size.height / 2, static_cast<int>(static_cast<float>(size.width / 2) * 0.9)); |
||||||
|
drawCircle(imgHost, circle, true); |
||||||
|
if (PERF_RUN_CUDA()) { |
||||||
|
const MomentsOrder order = MomentsOrder::THIRD_ORDER_MOMENTS; |
||||||
|
const int nMoments = numMoments(order); |
||||||
|
GpuMat momentsDevice(1, nMoments, momentsType); |
||||||
|
const GpuMat imgDevice(imgHost); |
||||||
|
TEST_CYCLE() cuda::spatialMoments(imgDevice, momentsDevice, false, order, momentsType); |
||||||
|
SANITY_CHECK_NOTHING(); |
||||||
|
} |
||||||
|
else { |
||||||
|
cv::Moments momentsHost; |
||||||
|
TEST_CYCLE() momentsHost = cv::moments(imgHost, false); |
||||||
|
SANITY_CHECK_NOTHING(); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
PERF_TEST_P(Sz_Depth, Moments, Combine(CUDA_TYPICAL_MAT_SIZES, Values(MatDepth(CV_32F), MatDepth(CV_64F)))) |
||||||
|
{ |
||||||
|
const cv::Size size = GET_PARAM(0); |
||||||
|
const int momentsType = GET_PARAM(1); |
||||||
|
Mat imgHost(size, CV_8U); |
||||||
|
const Vec3i circle(size.width / 2, size.height / 2, static_cast<int>(static_cast<float>(size.width / 2) * 0.9)); |
||||||
|
drawCircle(imgHost, circle, true); |
||||||
|
if (PERF_RUN_CUDA()) { |
||||||
|
const MomentsOrder order = MomentsOrder::THIRD_ORDER_MOMENTS; |
||||||
|
const int nMoments = numMoments(order); |
||||||
|
setBufferPoolUsage(true); |
||||||
|
setBufferPoolConfig(getDevice(), nMoments * ((momentsType == CV_64F) ? sizeof(double) : sizeof(float)), 1); |
||||||
|
const GpuMat imgDevice(imgHost); |
||||||
|
cv::Moments momentsHost; |
||||||
|
TEST_CYCLE() momentsHost = cuda::moments(imgDevice, false, order, momentsType); |
||||||
|
SANITY_CHECK_NOTHING(); |
||||||
|
} |
||||||
|
else { |
||||||
|
cv::Moments momentsHost; |
||||||
|
TEST_CYCLE() momentsHost = cv::moments(imgHost, false); |
||||||
|
SANITY_CHECK_NOTHING(); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
}} |
@ -0,0 +1,186 @@ |
|||||||
|
// This file is part of OpenCV project. |
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory |
||||||
|
// of this distribution and at http://opencv.org/license.html. |
||||||
|
|
||||||
|
#if !defined CUDA_DISABLER |
||||||
|
|
||||||
|
#include <opencv2/core/cuda/common.hpp> |
||||||
|
#include <opencv2/cudev/util/atomic.hpp> |
||||||
|
#include "moments.cuh" |
||||||
|
|
||||||
|
namespace cv { namespace cuda { namespace device { namespace imgproc { |
||||||
|
|
||||||
|
constexpr int blockSizeX = 32; |
||||||
|
constexpr int blockSizeY = 16; |
||||||
|
|
||||||
|
template <typename T> |
||||||
|
__device__ T butterflyWarpReduction(T value) { |
||||||
|
for (int i = 16; i >= 1; i /= 2) |
||||||
|
value += __shfl_xor_sync(0xffffffff, value, i, 32); |
||||||
|
return value; |
||||||
|
} |
||||||
|
|
||||||
|
template <typename T> |
||||||
|
__device__ T butterflyHalfWarpReduction(T value) { |
||||||
|
for (int i = 8; i >= 1; i /= 2) |
||||||
|
value += __shfl_xor_sync(0xffff, value, i, 32); |
||||||
|
return value; |
||||||
|
} |
||||||
|
|
||||||
|
template<typename T, int nMoments> |
||||||
|
__device__ void updateSums(const T val, const unsigned int x, T r[4]) { |
||||||
|
const T x2 = x * x; |
||||||
|
const T x3 = static_cast<T>(x) * x2; |
||||||
|
r[0] += val; |
||||||
|
r[1] += val * x; |
||||||
|
if (nMoments >= n12) r[2] += val * x2; |
||||||
|
if (nMoments >= n123) r[3] += val * x3; |
||||||
|
} |
||||||
|
|
||||||
|
template<typename TSrc, typename TMoments, int nMoments> |
||||||
|
__device__ void rowReductions(const PtrStepSz<TSrc> img, const bool binary, const unsigned int y, TMoments r[4], TMoments smem[][nMoments + 1]) { |
||||||
|
for (int x = threadIdx.x; x < img.cols; x += blockDim.x) { |
||||||
|
const TMoments val = (!binary || img(y, x) == 0) ? img(y, x) : 1; |
||||||
|
updateSums<TMoments,nMoments>(val, x, r); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
template<typename TSrc, typename TMoments, bool fourByteAligned, int nMoments> |
||||||
|
__device__ void rowReductionsCoalesced(const PtrStepSz<TSrc> img, const bool binary, const unsigned int y, TMoments r[4], const int offsetX, TMoments smem[][nMoments + 1]) { |
||||||
|
const int alignedOffset = fourByteAligned ? 0 : 4 - offsetX; |
||||||
|
// load uncoalesced head |
||||||
|
if (!fourByteAligned && threadIdx.x == 0) { |
||||||
|
for (int x = 0; x < ::min(alignedOffset, static_cast<int>(img.cols)); x++) { |
||||||
|
const TMoments val = (!binary || img(y, x) == 0) ? img(y, x) : 1; |
||||||
|
updateSums<TMoments, nMoments>(val, x, r); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// coalesced loads |
||||||
|
const unsigned int* rowPtrIntAligned = (const unsigned int*)(fourByteAligned ? img.ptr(y) : img.ptr(y) + alignedOffset); |
||||||
|
const int cols4 = fourByteAligned ? img.cols / 4 : (img.cols - alignedOffset) / 4; |
||||||
|
for (int x = threadIdx.x; x < cols4; x += blockDim.x) { |
||||||
|
const unsigned int data = rowPtrIntAligned[x]; |
||||||
|
#pragma unroll 4 |
||||||
|
for (int i = 0; i < 4; i++) { |
||||||
|
const int iX = alignedOffset + 4 * x + i; |
||||||
|
const uchar ucharVal = ((data >> i * 8) & 0xFFU); |
||||||
|
const TMoments val = (!binary || ucharVal == 0) ? ucharVal : 1; |
||||||
|
updateSums<TMoments, nMoments>(val, iX, r); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// load uncoalesced tail |
||||||
|
if (threadIdx.x == 0) { |
||||||
|
const int iTailStart = fourByteAligned ? cols4 * 4 : cols4 * 4 + alignedOffset; |
||||||
|
for (int x = iTailStart; x < img.cols; x++) { |
||||||
|
const TMoments val = (!binary || img(y, x) == 0) ? img(y, x) : 1; |
||||||
|
updateSums<TMoments, nMoments>(val, x, r); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
template <typename TSrc, typename TMoments, bool coalesced = false, bool fourByteAligned = false, int nMoments> |
||||||
|
__global__ void spatialMoments(const PtrStepSz<TSrc> img, const bool binary, TMoments* moments, const int offsetX = 0) { |
||||||
|
const unsigned int y = blockIdx.x * blockDim.y + threadIdx.y; |
||||||
|
__shared__ TMoments smem[blockSizeY][nMoments + 1]; |
||||||
|
if (threadIdx.y < nMoments && threadIdx.x < blockSizeY) |
||||||
|
smem[threadIdx.x][threadIdx.y] = 0; |
||||||
|
__syncthreads(); |
||||||
|
|
||||||
|
TMoments r[4] = { 0 }; |
||||||
|
if (y < img.rows) { |
||||||
|
if (coalesced) |
||||||
|
rowReductionsCoalesced<TSrc, TMoments, fourByteAligned, nMoments>(img, binary, y, r, offsetX, smem); |
||||||
|
else |
||||||
|
rowReductions<TSrc, TMoments, nMoments>(img, binary, y, r, smem); |
||||||
|
} |
||||||
|
|
||||||
|
const unsigned long y2 = y * y; |
||||||
|
const TMoments y3 = static_cast<TMoments>(y2) * y; |
||||||
|
const TMoments res = butterflyWarpReduction<float>(r[0]); |
||||||
|
if (res) { |
||||||
|
smem[threadIdx.y][0] = res; //0th |
||||||
|
smem[threadIdx.y][1] = butterflyWarpReduction(r[1]); //1st |
||||||
|
smem[threadIdx.y][2] = y * res; //1st |
||||||
|
if (nMoments >= n12) { |
||||||
|
smem[threadIdx.y][3] = butterflyWarpReduction(r[2]); //2nd |
||||||
|
smem[threadIdx.y][4] = smem[threadIdx.y][1] * y; //2nd |
||||||
|
smem[threadIdx.y][5] = y2 * res; //2nd |
||||||
|
} |
||||||
|
if (nMoments >= n123) { |
||||||
|
smem[threadIdx.y][6] = butterflyWarpReduction(r[3]); //3rd |
||||||
|
smem[threadIdx.y][7] = smem[threadIdx.y][3] * y; //3rd |
||||||
|
smem[threadIdx.y][8] = smem[threadIdx.y][1] * y2; //3rd |
||||||
|
smem[threadIdx.y][9] = y3 * res; //3rd |
||||||
|
} |
||||||
|
} |
||||||
|
__syncthreads(); |
||||||
|
|
||||||
|
if (threadIdx.x < blockSizeY && threadIdx.y < nMoments) |
||||||
|
smem[threadIdx.y][nMoments] = butterflyHalfWarpReduction(smem[threadIdx.x][threadIdx.y]); |
||||||
|
__syncthreads(); |
||||||
|
|
||||||
|
if (threadIdx.y == 0 && threadIdx.x < nMoments) { |
||||||
|
if (smem[threadIdx.x][nMoments]) |
||||||
|
cudev::atomicAdd(&moments[threadIdx.x], smem[threadIdx.x][nMoments]); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
template <typename TSrc, typename TMoments, int nMoments> struct momentsDispatcherNonChar { |
||||||
|
static void call(const PtrStepSz<TSrc> src, PtrStepSz<TMoments> moments, const bool binary, const int offsetX, const cudaStream_t stream) { |
||||||
|
dim3 blockSize(blockSizeX, blockSizeY); |
||||||
|
dim3 gridSize = dim3(divUp(src.rows, blockSizeY)); |
||||||
|
spatialMoments<TSrc, TMoments, false, false, nMoments> << <gridSize, blockSize, 0, stream >> > (src, binary, moments.ptr()); |
||||||
|
if (stream == 0) |
||||||
|
cudaSafeCall(cudaStreamSynchronize(stream)); |
||||||
|
}; |
||||||
|
}; |
||||||
|
|
||||||
|
template <typename TSrc, int nMoments> struct momentsDispatcherChar { |
||||||
|
static void call(const PtrStepSz<TSrc> src, PtrStepSz<float> moments, const bool binary, const int offsetX, const cudaStream_t stream) { |
||||||
|
dim3 blockSize(blockSizeX, blockSizeY); |
||||||
|
dim3 gridSize = dim3(divUp(src.rows, blockSizeY)); |
||||||
|
if (offsetX) |
||||||
|
spatialMoments<TSrc, float, true, false, nMoments> << <gridSize, blockSize, 0, stream >> > (src, binary, moments.ptr(), offsetX); |
||||||
|
else |
||||||
|
spatialMoments<TSrc, float, true, true, nMoments> << <gridSize, blockSize, 0, stream >> > (src, binary, moments.ptr()); |
||||||
|
|
||||||
|
if (stream == 0) |
||||||
|
cudaSafeCall(cudaStreamSynchronize(stream)); |
||||||
|
}; |
||||||
|
}; |
||||||
|
|
||||||
|
template <typename TSrc, typename TMoments, int nMoments> struct momentsDispatcher : momentsDispatcherNonChar<TSrc, TMoments, nMoments> {}; |
||||||
|
template <int nMoments> struct momentsDispatcher<uchar, float, nMoments> : momentsDispatcherChar<uchar, nMoments> {}; |
||||||
|
template <int nMoments> struct momentsDispatcher<schar, float, nMoments> : momentsDispatcherChar<schar, nMoments> {}; |
||||||
|
|
||||||
|
template <typename TSrc, typename TMoments> |
||||||
|
void moments(const PtrStepSzb src, PtrStepSzb moments, const bool binary, const int order, const int offsetX, const cudaStream_t stream) { |
||||||
|
if (order == 1) |
||||||
|
momentsDispatcher<TSrc, TMoments, n1>::call(static_cast<PtrStepSz<TSrc>>(src), static_cast<PtrStepSz<TMoments>>(moments), binary, offsetX, stream); |
||||||
|
else if (order == 2) |
||||||
|
momentsDispatcher<TSrc, TMoments, n12>::call(static_cast<PtrStepSz<TSrc>>(src), static_cast<PtrStepSz<TMoments>>(moments), binary, offsetX, stream); |
||||||
|
else if (order == 3) |
||||||
|
momentsDispatcher<TSrc, TMoments, n123>::call(static_cast<PtrStepSz<TSrc>>(src), static_cast<PtrStepSz<TMoments>>(moments), binary, offsetX, stream); |
||||||
|
}; |
||||||
|
|
||||||
|
template void moments<uchar, float>(const PtrStepSzb src, PtrStepSzb moments, const bool binary, const int order, const int offsetX, const cudaStream_t stream); |
||||||
|
template void moments<schar, float>(const PtrStepSzb src, PtrStepSzb moments, const bool binary, const int order, const int offsetX, const cudaStream_t stream); |
||||||
|
template void moments<ushort, float>(const PtrStepSzb src, PtrStepSzb moments, const bool binary, const int order, const int offsetX, const cudaStream_t stream); |
||||||
|
template void moments<short, float>(const PtrStepSzb src, PtrStepSzb moments, const bool binary, const int order, const int offsetX, const cudaStream_t stream); |
||||||
|
template void moments<int, float>(const PtrStepSzb src, PtrStepSzb moments, const bool binary, const int order, const int offsetX, const cudaStream_t stream); |
||||||
|
template void moments<float, float>(const PtrStepSzb src, PtrStepSzb moments, const bool binary, const int order, const int offsetX, const cudaStream_t stream); |
||||||
|
template void moments<double, float>(const PtrStepSzb src, PtrStepSzb moments, const bool binary, const int order, const int offsetX, const cudaStream_t stream); |
||||||
|
|
||||||
|
template void moments<uchar, double>(const PtrStepSzb src, PtrStepSzb moments, const bool binary, const int order, const int offsetX, const cudaStream_t stream); |
||||||
|
template void moments<schar, double>(const PtrStepSzb src, PtrStepSzb moments, const bool binary, const int order, const int offsetX, const cudaStream_t stream); |
||||||
|
template void moments<ushort, double>(const PtrStepSzb src, PtrStepSzb moments, const bool binary, const int order, const int offsetX, const cudaStream_t stream); |
||||||
|
template void moments<short, double>(const PtrStepSzb src, PtrStepSzb moments, const bool binary, const int order, const int offsetX, const cudaStream_t stream); |
||||||
|
template void moments<int, double>(const PtrStepSzb src, PtrStepSzb moments, const bool binary, const int order, const int offsetX, const cudaStream_t stream); |
||||||
|
template void moments<float, double>(const PtrStepSzb src, PtrStepSzb moments, const bool binary, const int order, const int offsetX, const cudaStream_t stream); |
||||||
|
template void moments<double, double>(const PtrStepSzb src, PtrStepSzb moments, const bool binary, const int order, const int offsetX, const cudaStream_t stream); |
||||||
|
|
||||||
|
}}}} |
||||||
|
|
||||||
|
#endif /* CUDA_DISABLER */ |
@ -0,0 +1,6 @@ |
|||||||
|
#pragma once |
||||||
|
namespace cv { namespace cuda { namespace device { namespace imgproc { |
||||||
|
constexpr int n1 = 3; |
||||||
|
constexpr int n12 = 6; |
||||||
|
constexpr int n123 = 10; |
||||||
|
}}}} |
@ -0,0 +1,67 @@ |
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
|
|
||||||
|
#include "precomp.hpp" |
||||||
|
#include "cuda/moments.cuh" |
||||||
|
|
||||||
|
using namespace cv; |
||||||
|
using namespace cv::cuda; |
||||||
|
|
||||||
|
int cv::cuda::numMoments(const MomentsOrder order) { |
||||||
|
return order == MomentsOrder::FIRST_ORDER_MOMENTS ? device::imgproc::n1 : order == MomentsOrder::SECOND_ORDER_MOMENTS ? device::imgproc::n12 : device::imgproc::n123; |
||||||
|
} |
||||||
|
|
||||||
|
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) |
||||||
|
Moments cv::cuda::moments(InputArray src, const bool binary, const MomentsOrder order, const int momentsType) { throw_no_cuda(); } |
||||||
|
void spatialMoments(InputArray src, OutputArray moments, const bool binary, const MomentsOrder order, const int momentsType, Stream& stream) { throw_no_cuda(); } |
||||||
|
#else /* !defined (HAVE_CUDA) */ |
||||||
|
|
||||||
|
namespace cv { namespace cuda { namespace device { namespace imgproc { |
||||||
|
template <typename TSrc, typename TMoments> |
||||||
|
void moments(const PtrStepSzb src, PtrStepSzb moments, const bool binary, const int order, const int offsetX, const cudaStream_t stream); |
||||||
|
}}}} |
||||||
|
|
||||||
|
void cv::cuda::spatialMoments(InputArray src, OutputArray moments, const bool binary, const MomentsOrder order, const int momentsType, Stream& stream) { |
||||||
|
CV_Assert(src.depth() <= CV_64F); |
||||||
|
const GpuMat srcDevice = getInputMat(src, stream); |
||||||
|
|
||||||
|
CV_Assert(momentsType == CV_32F || momentsType == CV_64F); |
||||||
|
const int nMoments = numMoments(order); |
||||||
|
const int momentsCols = nMoments < moments.cols() ? moments.cols() : nMoments; |
||||||
|
GpuMat momentsDevice = getOutputMat(moments, 1, momentsCols, momentsType, stream); |
||||||
|
momentsDevice.setTo(0); |
||||||
|
|
||||||
|
Point ofs; Size wholeSize; |
||||||
|
srcDevice.locateROI(wholeSize, ofs); |
||||||
|
|
||||||
|
typedef void (*func_t)(const PtrStepSzb src, PtrStepSzb moments, const bool binary, const int order, const int offsetX, const cudaStream_t stream); |
||||||
|
static const func_t funcs[7][2] = |
||||||
|
{ |
||||||
|
{device::imgproc::moments<uchar, float>, device::imgproc::moments<uchar, double> }, |
||||||
|
{device::imgproc::moments<schar, float>, device::imgproc::moments<schar, double> }, |
||||||
|
{device::imgproc::moments<ushort, float>, device::imgproc::moments<ushort, double>}, |
||||||
|
{device::imgproc::moments<short, float>, device::imgproc::moments<short, double> }, |
||||||
|
{device::imgproc::moments<int, float>, device::imgproc::moments<int, double> }, |
||||||
|
{device::imgproc::moments<float, float>, device::imgproc::moments<float, double> }, |
||||||
|
{device::imgproc::moments<double, float>, device::imgproc::moments<double, double> } |
||||||
|
}; |
||||||
|
|
||||||
|
const func_t func = funcs[srcDevice.depth()][momentsType == CV_64F]; |
||||||
|
func(srcDevice, momentsDevice, binary, static_cast<int>(order), ofs.x, StreamAccessor::getStream(stream)); |
||||||
|
syncOutput(momentsDevice, moments, stream); |
||||||
|
} |
||||||
|
|
||||||
|
Moments cv::cuda::moments(InputArray src, const bool binary, const MomentsOrder order, const int momentsType) { |
||||||
|
Stream& stream = Stream::Null(); |
||||||
|
HostMem dst; |
||||||
|
spatialMoments(src, dst, binary, order, momentsType, stream); |
||||||
|
stream.waitForCompletion(); |
||||||
|
Mat moments = dst.createMatHeader(); |
||||||
|
if(momentsType == CV_32F) |
||||||
|
return Moments(moments.at<float>(0), moments.at<float>(1), moments.at<float>(2), moments.at<float>(3), moments.at<float>(4), moments.at<float>(5), moments.at<float>(6), moments.at<float>(7), moments.at<float>(8), moments.at<float>(9)); |
||||||
|
else |
||||||
|
return Moments(moments.at<double>(0), moments.at<double>(1), moments.at<double>(2), moments.at<double>(3), moments.at<double>(4), moments.at<double>(5), moments.at<double>(6), moments.at<double>(7), moments.at<double>(8), moments.at<double>(9)); |
||||||
|
} |
||||||
|
|
||||||
|
#endif /* !defined (HAVE_CUDA) */ |
@ -0,0 +1,124 @@ |
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
|
|
||||||
|
#include "test_precomp.hpp" |
||||||
|
|
||||||
|
#ifdef HAVE_CUDA |
||||||
|
|
||||||
|
namespace opencv_test { namespace { |
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Moments
|
||||||
|
|
||||||
|
CV_ENUM(MaxMomentsOrder, MomentsOrder::FIRST_ORDER_MOMENTS, MomentsOrder::SECOND_ORDER_MOMENTS, MomentsOrder::THIRD_ORDER_MOMENTS) |
||||||
|
|
||||||
|
PARAM_TEST_CASE(Moments, cv::cuda::DeviceInfo, cv::Size, bool, MatDepth, MatDepth, UseRoi, MaxMomentsOrder) |
||||||
|
{ |
||||||
|
DeviceInfo devInfo; |
||||||
|
Size size; |
||||||
|
bool isBinary; |
||||||
|
float pcWidth = 0.6f; |
||||||
|
int momentsType; |
||||||
|
int imgType; |
||||||
|
bool useRoi; |
||||||
|
MomentsOrder order; |
||||||
|
|
||||||
|
virtual void SetUp() |
||||||
|
{ |
||||||
|
devInfo = GET_PARAM(0); |
||||||
|
size = GET_PARAM(1); |
||||||
|
isBinary = GET_PARAM(2); |
||||||
|
momentsType = GET_PARAM(3); |
||||||
|
imgType = GET_PARAM(4); |
||||||
|
useRoi = GET_PARAM(5); |
||||||
|
order = static_cast<MomentsOrder>(static_cast<int>(GET_PARAM(6))); |
||||||
|
cv::cuda::setDevice(devInfo.deviceID()); |
||||||
|
} |
||||||
|
|
||||||
|
static void drawCircle(cv::Mat& dst, const cv::Vec3i& circle, bool fill) |
||||||
|
{ |
||||||
|
dst.setTo(Scalar::all(0)); |
||||||
|
cv::circle(dst, Point2i(circle[0], circle[1]), circle[2], Scalar::all(255), fill ? -1 : 1, cv::LINE_AA); |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
bool Equal(const double m0, const double m1, const double absPcErr) { |
||||||
|
if (absPcErr == 0) return m0 == m1; |
||||||
|
if (m0 == 0) { |
||||||
|
if (m1 < absPcErr) return true; |
||||||
|
else return false; |
||||||
|
} |
||||||
|
const double pcDiff = abs(m0 - m1) / m1; |
||||||
|
return pcDiff < absPcErr; |
||||||
|
} |
||||||
|
|
||||||
|
void CheckMoments(const cv::Moments m0, const cv::Moments m1, const MomentsOrder order, const int momentsType) { |
||||||
|
double absPcErr = momentsType == CV_64F ? 0 : 5e-7; |
||||||
|
ASSERT_TRUE(Equal(m0.m00, m1.m00, absPcErr)) << "m0.m00: " << m0.m00 << ", m1.m00: " << m1.m00 << ", absPcErr: " << absPcErr; |
||||||
|
ASSERT_TRUE(Equal(m0.m10, m1.m10, absPcErr)) << "m0.m10: " << m0.m10 << ", m1.m10: " << m1.m10 << ", absPcErr: " << absPcErr; |
||||||
|
ASSERT_TRUE(Equal(m0.m01, m1.m01, absPcErr)) << "m0.m01: " << m0.m01 << ", m1.m01: " << m1.m01 << ", absPcErr: " << absPcErr; |
||||||
|
if (static_cast<int>(order) >= static_cast<int>(MomentsOrder::SECOND_ORDER_MOMENTS)) { |
||||||
|
ASSERT_TRUE(Equal(m0.m20, m1.m20, absPcErr)) << "m0.m20: " << m0.m20 << ", m1.m20: " << m1.m20 << ", absPcErr: " << absPcErr; |
||||||
|
ASSERT_TRUE(Equal(m0.m11, m1.m11, absPcErr)) << "m0.m11: " << m0.m11 << ", m1.m11: " << m1.m11 << ", absPcErr: " << absPcErr; |
||||||
|
ASSERT_TRUE(Equal(m0.m02, m1.m02, absPcErr)) << "m0.m02: " << m0.m02 << ", m1.m02: " << m1.m02 << ", absPcErr: " << absPcErr; |
||||||
|
} |
||||||
|
if (static_cast<int>(order) >= static_cast<int>(MomentsOrder::THIRD_ORDER_MOMENTS)) { |
||||||
|
ASSERT_TRUE(Equal(m0.m30, m1.m30, absPcErr)) << "m0.m30: " << m0.m30 << ", m1.m30: " << m1.m30 << ", absPcErr: " << absPcErr; |
||||||
|
ASSERT_TRUE(Equal(m0.m21, m1.m21, absPcErr)) << "m0.m21: " << m0.m21 << ", m1.m21: " << m1.m21 << ", absPcErr: " << absPcErr; |
||||||
|
ASSERT_TRUE(Equal(m0.m12, m1.m12, absPcErr)) << "m0.m12: " << m0.m12 << ", m1.m12: " << m1.m12 << ", absPcErr: " << absPcErr; |
||||||
|
ASSERT_TRUE(Equal(m0.m03, m1.m03, absPcErr)) << "m0.m03: " << m0.m03 << ", m1.m03: " << m1.m03 << ", absPcErr: " << absPcErr; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
CUDA_TEST_P(Moments, Accuracy) |
||||||
|
{ |
||||||
|
Mat imgHost(size, imgType); |
||||||
|
const Rect roi = useRoi ? Rect(1, 0, imgHost.cols - 2, imgHost.rows) : Rect(0, 0, imgHost.cols, imgHost.rows); |
||||||
|
const Vec3i circle(size.width / 2, size.height / 2, static_cast<int>(static_cast<float>(size.width/2) * pcWidth)); |
||||||
|
drawCircle(imgHost, circle, true); |
||||||
|
const GpuMat imgDevice(imgHost); |
||||||
|
const int nMoments = numMoments(order); |
||||||
|
setBufferPoolUsage(true); |
||||||
|
setBufferPoolConfig(getDevice(), nMoments * ((momentsType == CV_64F) ? sizeof(double) : sizeof(float)), 1); |
||||||
|
const cv::Moments moments = cuda::moments(imgDevice(roi), isBinary, order, momentsType); |
||||||
|
Mat imgHostFloat; imgHost(roi).convertTo(imgHostFloat, CV_32F); |
||||||
|
const cv::Moments momentsGs = cv::moments(imgHostFloat, isBinary); |
||||||
|
CheckMoments(momentsGs, moments, order, momentsType); |
||||||
|
} |
||||||
|
|
||||||
|
CUDA_TEST_P(Moments, Async) |
||||||
|
{ |
||||||
|
Stream stream; |
||||||
|
const int nMoments = numMoments(order); |
||||||
|
GpuMat momentsDevice(1, nMoments, momentsType); |
||||||
|
Mat imgHost(size, imgType); |
||||||
|
const Rect roi = useRoi ? Rect(1, 0, imgHost.cols - 2, imgHost.rows) : Rect(0, 0, imgHost.cols, imgHost.rows); |
||||||
|
const Vec3i circle(size.width / 2, size.height / 2, static_cast<int>(static_cast<float>(size.width/2) * pcWidth)); |
||||||
|
drawCircle(imgHost, circle, true); |
||||||
|
const GpuMat imgDevice(imgHost); |
||||||
|
cuda::spatialMoments(imgDevice(roi), momentsDevice, isBinary, order, momentsType, stream); |
||||||
|
HostMem momentsHost(1, nMoments, momentsType); |
||||||
|
momentsDevice.download(momentsHost, stream); |
||||||
|
stream.waitForCompletion(); |
||||||
|
Mat momentsHost64F = momentsHost.createMatHeader(); |
||||||
|
if (momentsType == CV_32F) |
||||||
|
momentsHost.createMatHeader().convertTo(momentsHost64F, CV_64F); |
||||||
|
const cv::Moments moments = cv::Moments(momentsHost64F.at<double>(0), momentsHost64F.at<double>(1), momentsHost64F.at<double>(2), momentsHost64F.at<double>(3), momentsHost64F.at<double>(4), momentsHost64F.at<double>(5), momentsHost64F.at<double>(6), momentsHost64F.at<double>(7), momentsHost64F.at<double>(8), momentsHost64F.at<double>(9)); |
||||||
|
Mat imgHostAdjustedType = imgHost(roi); |
||||||
|
if (imgType != CV_8U && imgType != CV_32F) |
||||||
|
imgHost(roi).convertTo(imgHostAdjustedType, CV_32F); |
||||||
|
const cv::Moments momentsGs = cv::moments(imgHostAdjustedType, isBinary); |
||||||
|
CheckMoments(momentsGs, moments, order, momentsType); |
||||||
|
} |
||||||
|
|
||||||
|
#define SIZES DIFFERENT_SIZES |
||||||
|
#define GRAYSCALE_BINARY testing::Bool() |
||||||
|
#define MOMENTS_TYPE testing::Values(MatDepth(CV_32F), MatDepth(CV_64F)) |
||||||
|
#define IMG_TYPE ALL_DEPTH |
||||||
|
#define USE_ROI WHOLE_SUBMAT |
||||||
|
#define MOMENTS_ORDER testing::Values(MaxMomentsOrder(MomentsOrder::FIRST_ORDER_MOMENTS), MaxMomentsOrder(MomentsOrder::SECOND_ORDER_MOMENTS), MaxMomentsOrder(MomentsOrder::THIRD_ORDER_MOMENTS)) |
||||||
|
INSTANTIATE_TEST_CASE_P(CUDA_ImgProc, Moments, testing::Combine(ALL_DEVICES, SIZES, GRAYSCALE_BINARY, MOMENTS_TYPE, IMG_TYPE, USE_ROI, MOMENTS_ORDER)); |
||||||
|
}} // namespace
|
||||||
|
|
||||||
|
#endif // HAVE_CUDA
|
Loading…
Reference in new issue