commit
0bcbc73bca
8 changed files with 548 additions and 5 deletions
@ -0,0 +1,61 @@ |
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "perf_precomp.hpp" |
||||
|
||||
namespace opencv_test { namespace { |
||||
static void drawCircle(cv::Mat& dst, const cv::Vec3i& circle, bool fill) |
||||
{ |
||||
dst.setTo(Scalar::all(0)); |
||||
cv::circle(dst, Point2i(circle[0], circle[1]), circle[2], Scalar::all(255), fill ? -1 : 1, cv::LINE_AA); |
||||
} |
||||
|
||||
DEF_PARAM_TEST(Sz_Depth, Size, MatDepth); |
||||
PERF_TEST_P(Sz_Depth, SpatialMoments, Combine(CUDA_TYPICAL_MAT_SIZES, Values(MatDepth(CV_32F), MatDepth((CV_64F))))) |
||||
{ |
||||
const cv::Size size = GET_PARAM(0); |
||||
const int momentsType = GET_PARAM(1); |
||||
Mat imgHost(size, CV_8U); |
||||
const Vec3i circle(size.width / 2, size.height / 2, static_cast<int>(static_cast<float>(size.width / 2) * 0.9)); |
||||
drawCircle(imgHost, circle, true); |
||||
if (PERF_RUN_CUDA()) { |
||||
const MomentsOrder order = MomentsOrder::THIRD_ORDER_MOMENTS; |
||||
const int nMoments = numMoments(order); |
||||
GpuMat momentsDevice(1, nMoments, momentsType); |
||||
const GpuMat imgDevice(imgHost); |
||||
TEST_CYCLE() cuda::spatialMoments(imgDevice, momentsDevice, false, order, momentsType); |
||||
SANITY_CHECK_NOTHING(); |
||||
} |
||||
else { |
||||
cv::Moments momentsHost; |
||||
TEST_CYCLE() momentsHost = cv::moments(imgHost, false); |
||||
SANITY_CHECK_NOTHING(); |
||||
} |
||||
} |
||||
|
||||
PERF_TEST_P(Sz_Depth, Moments, Combine(CUDA_TYPICAL_MAT_SIZES, Values(MatDepth(CV_32F), MatDepth(CV_64F)))) |
||||
{ |
||||
const cv::Size size = GET_PARAM(0); |
||||
const int momentsType = GET_PARAM(1); |
||||
Mat imgHost(size, CV_8U); |
||||
const Vec3i circle(size.width / 2, size.height / 2, static_cast<int>(static_cast<float>(size.width / 2) * 0.9)); |
||||
drawCircle(imgHost, circle, true); |
||||
if (PERF_RUN_CUDA()) { |
||||
const MomentsOrder order = MomentsOrder::THIRD_ORDER_MOMENTS; |
||||
const int nMoments = numMoments(order); |
||||
setBufferPoolUsage(true); |
||||
setBufferPoolConfig(getDevice(), nMoments * ((momentsType == CV_64F) ? sizeof(double) : sizeof(float)), 1); |
||||
const GpuMat imgDevice(imgHost); |
||||
cv::Moments momentsHost; |
||||
TEST_CYCLE() momentsHost = cuda::moments(imgDevice, false, order, momentsType); |
||||
SANITY_CHECK_NOTHING(); |
||||
} |
||||
else { |
||||
cv::Moments momentsHost; |
||||
TEST_CYCLE() momentsHost = cv::moments(imgHost, false); |
||||
SANITY_CHECK_NOTHING(); |
||||
} |
||||
} |
||||
|
||||
}} |
@ -0,0 +1,186 @@ |
||||
// This file is part of OpenCV project. |
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory |
||||
// of this distribution and at http://opencv.org/license.html. |
||||
|
||||
#if !defined CUDA_DISABLER |
||||
|
||||
#include <opencv2/core/cuda/common.hpp> |
||||
#include <opencv2/cudev/util/atomic.hpp> |
||||
#include "moments.cuh" |
||||
|
||||
namespace cv { namespace cuda { namespace device { namespace imgproc { |
||||
|
||||
constexpr int blockSizeX = 32; |
||||
constexpr int blockSizeY = 16; |
||||
|
||||
template <typename T> |
||||
__device__ T butterflyWarpReduction(T value) { |
||||
for (int i = 16; i >= 1; i /= 2) |
||||
value += __shfl_xor_sync(0xffffffff, value, i, 32); |
||||
return value; |
||||
} |
||||
|
||||
template <typename T> |
||||
__device__ T butterflyHalfWarpReduction(T value) { |
||||
for (int i = 8; i >= 1; i /= 2) |
||||
value += __shfl_xor_sync(0xffff, value, i, 32); |
||||
return value; |
||||
} |
||||
|
||||
template<typename T, int nMoments> |
||||
__device__ void updateSums(const T val, const unsigned int x, T r[4]) { |
||||
const T x2 = x * x; |
||||
const T x3 = static_cast<T>(x) * x2; |
||||
r[0] += val; |
||||
r[1] += val * x; |
||||
if (nMoments >= n12) r[2] += val * x2; |
||||
if (nMoments >= n123) r[3] += val * x3; |
||||
} |
||||
|
||||
template<typename TSrc, typename TMoments, int nMoments> |
||||
__device__ void rowReductions(const PtrStepSz<TSrc> img, const bool binary, const unsigned int y, TMoments r[4], TMoments smem[][nMoments + 1]) { |
||||
for (int x = threadIdx.x; x < img.cols; x += blockDim.x) { |
||||
const TMoments val = (!binary || img(y, x) == 0) ? img(y, x) : 1; |
||||
updateSums<TMoments,nMoments>(val, x, r); |
||||
} |
||||
} |
||||
|
||||
template<typename TSrc, typename TMoments, bool fourByteAligned, int nMoments> |
||||
__device__ void rowReductionsCoalesced(const PtrStepSz<TSrc> img, const bool binary, const unsigned int y, TMoments r[4], const int offsetX, TMoments smem[][nMoments + 1]) { |
||||
const int alignedOffset = fourByteAligned ? 0 : 4 - offsetX; |
||||
// load uncoalesced head |
||||
if (!fourByteAligned && threadIdx.x == 0) { |
||||
for (int x = 0; x < ::min(alignedOffset, static_cast<int>(img.cols)); x++) { |
||||
const TMoments val = (!binary || img(y, x) == 0) ? img(y, x) : 1; |
||||
updateSums<TMoments, nMoments>(val, x, r); |
||||
} |
||||
} |
||||
|
||||
// coalesced loads |
||||
const unsigned int* rowPtrIntAligned = (const unsigned int*)(fourByteAligned ? img.ptr(y) : img.ptr(y) + alignedOffset); |
||||
const int cols4 = fourByteAligned ? img.cols / 4 : (img.cols - alignedOffset) / 4; |
||||
for (int x = threadIdx.x; x < cols4; x += blockDim.x) { |
||||
const unsigned int data = rowPtrIntAligned[x]; |
||||
#pragma unroll 4 |
||||
for (int i = 0; i < 4; i++) { |
||||
const int iX = alignedOffset + 4 * x + i; |
||||
const uchar ucharVal = ((data >> i * 8) & 0xFFU); |
||||
const TMoments val = (!binary || ucharVal == 0) ? ucharVal : 1; |
||||
updateSums<TMoments, nMoments>(val, iX, r); |
||||
} |
||||
} |
||||
|
||||
// load uncoalesced tail |
||||
if (threadIdx.x == 0) { |
||||
const int iTailStart = fourByteAligned ? cols4 * 4 : cols4 * 4 + alignedOffset; |
||||
for (int x = iTailStart; x < img.cols; x++) { |
||||
const TMoments val = (!binary || img(y, x) == 0) ? img(y, x) : 1; |
||||
updateSums<TMoments, nMoments>(val, x, r); |
||||
} |
||||
} |
||||
} |
||||
|
||||
template <typename TSrc, typename TMoments, bool coalesced = false, bool fourByteAligned = false, int nMoments> |
||||
__global__ void spatialMoments(const PtrStepSz<TSrc> img, const bool binary, TMoments* moments, const int offsetX = 0) { |
||||
const unsigned int y = blockIdx.x * blockDim.y + threadIdx.y; |
||||
__shared__ TMoments smem[blockSizeY][nMoments + 1]; |
||||
if (threadIdx.y < nMoments && threadIdx.x < blockSizeY) |
||||
smem[threadIdx.x][threadIdx.y] = 0; |
||||
__syncthreads(); |
||||
|
||||
TMoments r[4] = { 0 }; |
||||
if (y < img.rows) { |
||||
if (coalesced) |
||||
rowReductionsCoalesced<TSrc, TMoments, fourByteAligned, nMoments>(img, binary, y, r, offsetX, smem); |
||||
else |
||||
rowReductions<TSrc, TMoments, nMoments>(img, binary, y, r, smem); |
||||
} |
||||
|
||||
const unsigned long y2 = y * y; |
||||
const TMoments y3 = static_cast<TMoments>(y2) * y; |
||||
const TMoments res = butterflyWarpReduction<float>(r[0]); |
||||
if (res) { |
||||
smem[threadIdx.y][0] = res; //0th |
||||
smem[threadIdx.y][1] = butterflyWarpReduction(r[1]); //1st |
||||
smem[threadIdx.y][2] = y * res; //1st |
||||
if (nMoments >= n12) { |
||||
smem[threadIdx.y][3] = butterflyWarpReduction(r[2]); //2nd |
||||
smem[threadIdx.y][4] = smem[threadIdx.y][1] * y; //2nd |
||||
smem[threadIdx.y][5] = y2 * res; //2nd |
||||
} |
||||
if (nMoments >= n123) { |
||||
smem[threadIdx.y][6] = butterflyWarpReduction(r[3]); //3rd |
||||
smem[threadIdx.y][7] = smem[threadIdx.y][3] * y; //3rd |
||||
smem[threadIdx.y][8] = smem[threadIdx.y][1] * y2; //3rd |
||||
smem[threadIdx.y][9] = y3 * res; //3rd |
||||
} |
||||
} |
||||
__syncthreads(); |
||||
|
||||
if (threadIdx.x < blockSizeY && threadIdx.y < nMoments) |
||||
smem[threadIdx.y][nMoments] = butterflyHalfWarpReduction(smem[threadIdx.x][threadIdx.y]); |
||||
__syncthreads(); |
||||
|
||||
if (threadIdx.y == 0 && threadIdx.x < nMoments) { |
||||
if (smem[threadIdx.x][nMoments]) |
||||
cudev::atomicAdd(&moments[threadIdx.x], smem[threadIdx.x][nMoments]); |
||||
} |
||||
} |
||||
|
||||
template <typename TSrc, typename TMoments, int nMoments> struct momentsDispatcherNonChar { |
||||
static void call(const PtrStepSz<TSrc> src, PtrStepSz<TMoments> moments, const bool binary, const int offsetX, const cudaStream_t stream) { |
||||
dim3 blockSize(blockSizeX, blockSizeY); |
||||
dim3 gridSize = dim3(divUp(src.rows, blockSizeY)); |
||||
spatialMoments<TSrc, TMoments, false, false, nMoments> << <gridSize, blockSize, 0, stream >> > (src, binary, moments.ptr()); |
||||
if (stream == 0) |
||||
cudaSafeCall(cudaStreamSynchronize(stream)); |
||||
}; |
||||
}; |
||||
|
||||
template <typename TSrc, int nMoments> struct momentsDispatcherChar { |
||||
static void call(const PtrStepSz<TSrc> src, PtrStepSz<float> moments, const bool binary, const int offsetX, const cudaStream_t stream) { |
||||
dim3 blockSize(blockSizeX, blockSizeY); |
||||
dim3 gridSize = dim3(divUp(src.rows, blockSizeY)); |
||||
if (offsetX) |
||||
spatialMoments<TSrc, float, true, false, nMoments> << <gridSize, blockSize, 0, stream >> > (src, binary, moments.ptr(), offsetX); |
||||
else |
||||
spatialMoments<TSrc, float, true, true, nMoments> << <gridSize, blockSize, 0, stream >> > (src, binary, moments.ptr()); |
||||
|
||||
if (stream == 0) |
||||
cudaSafeCall(cudaStreamSynchronize(stream)); |
||||
}; |
||||
}; |
||||
|
||||
template <typename TSrc, typename TMoments, int nMoments> struct momentsDispatcher : momentsDispatcherNonChar<TSrc, TMoments, nMoments> {}; |
||||
template <int nMoments> struct momentsDispatcher<uchar, float, nMoments> : momentsDispatcherChar<uchar, nMoments> {}; |
||||
template <int nMoments> struct momentsDispatcher<schar, float, nMoments> : momentsDispatcherChar<schar, nMoments> {}; |
||||
|
||||
template <typename TSrc, typename TMoments> |
||||
void moments(const PtrStepSzb src, PtrStepSzb moments, const bool binary, const int order, const int offsetX, const cudaStream_t stream) { |
||||
if (order == 1) |
||||
momentsDispatcher<TSrc, TMoments, n1>::call(static_cast<PtrStepSz<TSrc>>(src), static_cast<PtrStepSz<TMoments>>(moments), binary, offsetX, stream); |
||||
else if (order == 2) |
||||
momentsDispatcher<TSrc, TMoments, n12>::call(static_cast<PtrStepSz<TSrc>>(src), static_cast<PtrStepSz<TMoments>>(moments), binary, offsetX, stream); |
||||
else if (order == 3) |
||||
momentsDispatcher<TSrc, TMoments, n123>::call(static_cast<PtrStepSz<TSrc>>(src), static_cast<PtrStepSz<TMoments>>(moments), binary, offsetX, stream); |
||||
}; |
||||
|
||||
template void moments<uchar, float>(const PtrStepSzb src, PtrStepSzb moments, const bool binary, const int order, const int offsetX, const cudaStream_t stream); |
||||
template void moments<schar, float>(const PtrStepSzb src, PtrStepSzb moments, const bool binary, const int order, const int offsetX, const cudaStream_t stream); |
||||
template void moments<ushort, float>(const PtrStepSzb src, PtrStepSzb moments, const bool binary, const int order, const int offsetX, const cudaStream_t stream); |
||||
template void moments<short, float>(const PtrStepSzb src, PtrStepSzb moments, const bool binary, const int order, const int offsetX, const cudaStream_t stream); |
||||
template void moments<int, float>(const PtrStepSzb src, PtrStepSzb moments, const bool binary, const int order, const int offsetX, const cudaStream_t stream); |
||||
template void moments<float, float>(const PtrStepSzb src, PtrStepSzb moments, const bool binary, const int order, const int offsetX, const cudaStream_t stream); |
||||
template void moments<double, float>(const PtrStepSzb src, PtrStepSzb moments, const bool binary, const int order, const int offsetX, const cudaStream_t stream); |
||||
|
||||
template void moments<uchar, double>(const PtrStepSzb src, PtrStepSzb moments, const bool binary, const int order, const int offsetX, const cudaStream_t stream); |
||||
template void moments<schar, double>(const PtrStepSzb src, PtrStepSzb moments, const bool binary, const int order, const int offsetX, const cudaStream_t stream); |
||||
template void moments<ushort, double>(const PtrStepSzb src, PtrStepSzb moments, const bool binary, const int order, const int offsetX, const cudaStream_t stream); |
||||
template void moments<short, double>(const PtrStepSzb src, PtrStepSzb moments, const bool binary, const int order, const int offsetX, const cudaStream_t stream); |
||||
template void moments<int, double>(const PtrStepSzb src, PtrStepSzb moments, const bool binary, const int order, const int offsetX, const cudaStream_t stream); |
||||
template void moments<float, double>(const PtrStepSzb src, PtrStepSzb moments, const bool binary, const int order, const int offsetX, const cudaStream_t stream); |
||||
template void moments<double, double>(const PtrStepSzb src, PtrStepSzb moments, const bool binary, const int order, const int offsetX, const cudaStream_t stream); |
||||
|
||||
}}}} |
||||
|
||||
#endif /* CUDA_DISABLER */ |
@ -0,0 +1,6 @@ |
||||
#pragma once |
||||
namespace cv { namespace cuda { namespace device { namespace imgproc { |
||||
constexpr int n1 = 3; |
||||
constexpr int n12 = 6; |
||||
constexpr int n123 = 10; |
||||
}}}} |
@ -0,0 +1,67 @@ |
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "precomp.hpp" |
||||
#include "cuda/moments.cuh" |
||||
|
||||
using namespace cv; |
||||
using namespace cv::cuda; |
||||
|
||||
int cv::cuda::numMoments(const MomentsOrder order) { |
||||
return order == MomentsOrder::FIRST_ORDER_MOMENTS ? device::imgproc::n1 : order == MomentsOrder::SECOND_ORDER_MOMENTS ? device::imgproc::n12 : device::imgproc::n123; |
||||
} |
||||
|
||||
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) |
||||
Moments cv::cuda::moments(InputArray src, const bool binary, const MomentsOrder order, const int momentsType) { throw_no_cuda(); } |
||||
void spatialMoments(InputArray src, OutputArray moments, const bool binary, const MomentsOrder order, const int momentsType, Stream& stream) { throw_no_cuda(); } |
||||
#else /* !defined (HAVE_CUDA) */ |
||||
|
||||
namespace cv { namespace cuda { namespace device { namespace imgproc { |
||||
template <typename TSrc, typename TMoments> |
||||
void moments(const PtrStepSzb src, PtrStepSzb moments, const bool binary, const int order, const int offsetX, const cudaStream_t stream); |
||||
}}}} |
||||
|
||||
void cv::cuda::spatialMoments(InputArray src, OutputArray moments, const bool binary, const MomentsOrder order, const int momentsType, Stream& stream) { |
||||
CV_Assert(src.depth() <= CV_64F); |
||||
const GpuMat srcDevice = getInputMat(src, stream); |
||||
|
||||
CV_Assert(momentsType == CV_32F || momentsType == CV_64F); |
||||
const int nMoments = numMoments(order); |
||||
const int momentsCols = nMoments < moments.cols() ? moments.cols() : nMoments; |
||||
GpuMat momentsDevice = getOutputMat(moments, 1, momentsCols, momentsType, stream); |
||||
momentsDevice.setTo(0); |
||||
|
||||
Point ofs; Size wholeSize; |
||||
srcDevice.locateROI(wholeSize, ofs); |
||||
|
||||
typedef void (*func_t)(const PtrStepSzb src, PtrStepSzb moments, const bool binary, const int order, const int offsetX, const cudaStream_t stream); |
||||
static const func_t funcs[7][2] = |
||||
{ |
||||
{device::imgproc::moments<uchar, float>, device::imgproc::moments<uchar, double> }, |
||||
{device::imgproc::moments<schar, float>, device::imgproc::moments<schar, double> }, |
||||
{device::imgproc::moments<ushort, float>, device::imgproc::moments<ushort, double>}, |
||||
{device::imgproc::moments<short, float>, device::imgproc::moments<short, double> }, |
||||
{device::imgproc::moments<int, float>, device::imgproc::moments<int, double> }, |
||||
{device::imgproc::moments<float, float>, device::imgproc::moments<float, double> }, |
||||
{device::imgproc::moments<double, float>, device::imgproc::moments<double, double> } |
||||
}; |
||||
|
||||
const func_t func = funcs[srcDevice.depth()][momentsType == CV_64F]; |
||||
func(srcDevice, momentsDevice, binary, static_cast<int>(order), ofs.x, StreamAccessor::getStream(stream)); |
||||
syncOutput(momentsDevice, moments, stream); |
||||
} |
||||
|
||||
Moments cv::cuda::moments(InputArray src, const bool binary, const MomentsOrder order, const int momentsType) { |
||||
Stream& stream = Stream::Null(); |
||||
HostMem dst; |
||||
spatialMoments(src, dst, binary, order, momentsType, stream); |
||||
stream.waitForCompletion(); |
||||
Mat moments = dst.createMatHeader(); |
||||
if(momentsType == CV_32F) |
||||
return Moments(moments.at<float>(0), moments.at<float>(1), moments.at<float>(2), moments.at<float>(3), moments.at<float>(4), moments.at<float>(5), moments.at<float>(6), moments.at<float>(7), moments.at<float>(8), moments.at<float>(9)); |
||||
else |
||||
return Moments(moments.at<double>(0), moments.at<double>(1), moments.at<double>(2), moments.at<double>(3), moments.at<double>(4), moments.at<double>(5), moments.at<double>(6), moments.at<double>(7), moments.at<double>(8), moments.at<double>(9)); |
||||
} |
||||
|
||||
#endif /* !defined (HAVE_CUDA) */ |
@ -0,0 +1,124 @@ |
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "test_precomp.hpp" |
||||
|
||||
#ifdef HAVE_CUDA |
||||
|
||||
namespace opencv_test { namespace { |
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Moments
|
||||
|
||||
CV_ENUM(MaxMomentsOrder, MomentsOrder::FIRST_ORDER_MOMENTS, MomentsOrder::SECOND_ORDER_MOMENTS, MomentsOrder::THIRD_ORDER_MOMENTS) |
||||
|
||||
PARAM_TEST_CASE(Moments, cv::cuda::DeviceInfo, cv::Size, bool, MatDepth, MatDepth, UseRoi, MaxMomentsOrder) |
||||
{ |
||||
DeviceInfo devInfo; |
||||
Size size; |
||||
bool isBinary; |
||||
float pcWidth = 0.6f; |
||||
int momentsType; |
||||
int imgType; |
||||
bool useRoi; |
||||
MomentsOrder order; |
||||
|
||||
virtual void SetUp() |
||||
{ |
||||
devInfo = GET_PARAM(0); |
||||
size = GET_PARAM(1); |
||||
isBinary = GET_PARAM(2); |
||||
momentsType = GET_PARAM(3); |
||||
imgType = GET_PARAM(4); |
||||
useRoi = GET_PARAM(5); |
||||
order = static_cast<MomentsOrder>(static_cast<int>(GET_PARAM(6))); |
||||
cv::cuda::setDevice(devInfo.deviceID()); |
||||
} |
||||
|
||||
static void drawCircle(cv::Mat& dst, const cv::Vec3i& circle, bool fill) |
||||
{ |
||||
dst.setTo(Scalar::all(0)); |
||||
cv::circle(dst, Point2i(circle[0], circle[1]), circle[2], Scalar::all(255), fill ? -1 : 1, cv::LINE_AA); |
||||
} |
||||
}; |
||||
|
||||
bool Equal(const double m0, const double m1, const double absPcErr) { |
||||
if (absPcErr == 0) return m0 == m1; |
||||
if (m0 == 0) { |
||||
if (m1 < absPcErr) return true; |
||||
else return false; |
||||
} |
||||
const double pcDiff = abs(m0 - m1) / m1; |
||||
return pcDiff < absPcErr; |
||||
} |
||||
|
||||
void CheckMoments(const cv::Moments m0, const cv::Moments m1, const MomentsOrder order, const int momentsType) { |
||||
double absPcErr = momentsType == CV_64F ? 0 : 5e-7; |
||||
ASSERT_TRUE(Equal(m0.m00, m1.m00, absPcErr)) << "m0.m00: " << m0.m00 << ", m1.m00: " << m1.m00 << ", absPcErr: " << absPcErr; |
||||
ASSERT_TRUE(Equal(m0.m10, m1.m10, absPcErr)) << "m0.m10: " << m0.m10 << ", m1.m10: " << m1.m10 << ", absPcErr: " << absPcErr; |
||||
ASSERT_TRUE(Equal(m0.m01, m1.m01, absPcErr)) << "m0.m01: " << m0.m01 << ", m1.m01: " << m1.m01 << ", absPcErr: " << absPcErr; |
||||
if (static_cast<int>(order) >= static_cast<int>(MomentsOrder::SECOND_ORDER_MOMENTS)) { |
||||
ASSERT_TRUE(Equal(m0.m20, m1.m20, absPcErr)) << "m0.m20: " << m0.m20 << ", m1.m20: " << m1.m20 << ", absPcErr: " << absPcErr; |
||||
ASSERT_TRUE(Equal(m0.m11, m1.m11, absPcErr)) << "m0.m11: " << m0.m11 << ", m1.m11: " << m1.m11 << ", absPcErr: " << absPcErr; |
||||
ASSERT_TRUE(Equal(m0.m02, m1.m02, absPcErr)) << "m0.m02: " << m0.m02 << ", m1.m02: " << m1.m02 << ", absPcErr: " << absPcErr; |
||||
} |
||||
if (static_cast<int>(order) >= static_cast<int>(MomentsOrder::THIRD_ORDER_MOMENTS)) { |
||||
ASSERT_TRUE(Equal(m0.m30, m1.m30, absPcErr)) << "m0.m30: " << m0.m30 << ", m1.m30: " << m1.m30 << ", absPcErr: " << absPcErr; |
||||
ASSERT_TRUE(Equal(m0.m21, m1.m21, absPcErr)) << "m0.m21: " << m0.m21 << ", m1.m21: " << m1.m21 << ", absPcErr: " << absPcErr; |
||||
ASSERT_TRUE(Equal(m0.m12, m1.m12, absPcErr)) << "m0.m12: " << m0.m12 << ", m1.m12: " << m1.m12 << ", absPcErr: " << absPcErr; |
||||
ASSERT_TRUE(Equal(m0.m03, m1.m03, absPcErr)) << "m0.m03: " << m0.m03 << ", m1.m03: " << m1.m03 << ", absPcErr: " << absPcErr; |
||||
} |
||||
} |
||||
|
||||
CUDA_TEST_P(Moments, Accuracy) |
||||
{ |
||||
Mat imgHost(size, imgType); |
||||
const Rect roi = useRoi ? Rect(1, 0, imgHost.cols - 2, imgHost.rows) : Rect(0, 0, imgHost.cols, imgHost.rows); |
||||
const Vec3i circle(size.width / 2, size.height / 2, static_cast<int>(static_cast<float>(size.width/2) * pcWidth)); |
||||
drawCircle(imgHost, circle, true); |
||||
const GpuMat imgDevice(imgHost); |
||||
const int nMoments = numMoments(order); |
||||
setBufferPoolUsage(true); |
||||
setBufferPoolConfig(getDevice(), nMoments * ((momentsType == CV_64F) ? sizeof(double) : sizeof(float)), 1); |
||||
const cv::Moments moments = cuda::moments(imgDevice(roi), isBinary, order, momentsType); |
||||
Mat imgHostFloat; imgHost(roi).convertTo(imgHostFloat, CV_32F); |
||||
const cv::Moments momentsGs = cv::moments(imgHostFloat, isBinary); |
||||
CheckMoments(momentsGs, moments, order, momentsType); |
||||
} |
||||
|
||||
CUDA_TEST_P(Moments, Async) |
||||
{ |
||||
Stream stream; |
||||
const int nMoments = numMoments(order); |
||||
GpuMat momentsDevice(1, nMoments, momentsType); |
||||
Mat imgHost(size, imgType); |
||||
const Rect roi = useRoi ? Rect(1, 0, imgHost.cols - 2, imgHost.rows) : Rect(0, 0, imgHost.cols, imgHost.rows); |
||||
const Vec3i circle(size.width / 2, size.height / 2, static_cast<int>(static_cast<float>(size.width/2) * pcWidth)); |
||||
drawCircle(imgHost, circle, true); |
||||
const GpuMat imgDevice(imgHost); |
||||
cuda::spatialMoments(imgDevice(roi), momentsDevice, isBinary, order, momentsType, stream); |
||||
HostMem momentsHost(1, nMoments, momentsType); |
||||
momentsDevice.download(momentsHost, stream); |
||||
stream.waitForCompletion(); |
||||
Mat momentsHost64F = momentsHost.createMatHeader(); |
||||
if (momentsType == CV_32F) |
||||
momentsHost.createMatHeader().convertTo(momentsHost64F, CV_64F); |
||||
const cv::Moments moments = cv::Moments(momentsHost64F.at<double>(0), momentsHost64F.at<double>(1), momentsHost64F.at<double>(2), momentsHost64F.at<double>(3), momentsHost64F.at<double>(4), momentsHost64F.at<double>(5), momentsHost64F.at<double>(6), momentsHost64F.at<double>(7), momentsHost64F.at<double>(8), momentsHost64F.at<double>(9)); |
||||
Mat imgHostAdjustedType = imgHost(roi); |
||||
if (imgType != CV_8U && imgType != CV_32F) |
||||
imgHost(roi).convertTo(imgHostAdjustedType, CV_32F); |
||||
const cv::Moments momentsGs = cv::moments(imgHostAdjustedType, isBinary); |
||||
CheckMoments(momentsGs, moments, order, momentsType); |
||||
} |
||||
|
||||
#define SIZES DIFFERENT_SIZES |
||||
#define GRAYSCALE_BINARY testing::Bool() |
||||
#define MOMENTS_TYPE testing::Values(MatDepth(CV_32F), MatDepth(CV_64F)) |
||||
#define IMG_TYPE ALL_DEPTH |
||||
#define USE_ROI WHOLE_SUBMAT |
||||
#define MOMENTS_ORDER testing::Values(MaxMomentsOrder(MomentsOrder::FIRST_ORDER_MOMENTS), MaxMomentsOrder(MomentsOrder::SECOND_ORDER_MOMENTS), MaxMomentsOrder(MomentsOrder::THIRD_ORDER_MOMENTS)) |
||||
INSTANTIATE_TEST_CASE_P(CUDA_ImgProc, Moments, testing::Combine(ALL_DEVICES, SIZES, GRAYSCALE_BINARY, MOMENTS_TYPE, IMG_TYPE, USE_ROI, MOMENTS_ORDER)); |
||||
}} // namespace
|
||||
|
||||
#endif // HAVE_CUDA
|
Loading…
Reference in new issue