|
|
|
@ -45,8 +45,19 @@ |
|
|
|
|
using namespace cv; |
|
|
|
|
using namespace cv::cuda; |
|
|
|
|
|
|
|
|
|
#define canUseContext NPP_VERSION >= (10 * 1000 + 1 * 100 + 0) |
|
|
|
|
#if canUseContext |
|
|
|
|
#define CTX_PREFIX _Ctx |
|
|
|
|
#else |
|
|
|
|
#define CTX_PREFIX |
|
|
|
|
#endif |
|
|
|
|
#define PPCAT_NX(A, B) A ## B |
|
|
|
|
#define PPCAT(A, B) PPCAT_NX(A, B) |
|
|
|
|
#define TRY_CTX(func) PPCAT(func, CTX_PREFIX) |
|
|
|
|
|
|
|
|
|
#if !defined HAVE_CUDA || defined(CUDA_DISABLER) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void cv::cuda::warpAffine(InputArray, OutputArray, InputArray, Size, int, int, Scalar, Stream&) { throw_no_cuda(); } |
|
|
|
|
void cv::cuda::buildWarpAffineMaps(InputArray, bool, Size, OutputArray, OutputArray, Stream&) { throw_no_cuda(); } |
|
|
|
|
|
|
|
|
@ -135,7 +146,17 @@ void cv::cuda::buildWarpPerspectiveMaps(InputArray _M, bool inverse, Size dsize, |
|
|
|
|
|
|
|
|
|
namespace |
|
|
|
|
{ |
|
|
|
|
template <int DEPTH> struct NppWarpFunc |
|
|
|
|
template <int DEPTH, bool CanUseContext=true> struct NppWarpFunc |
|
|
|
|
{ |
|
|
|
|
typedef typename NPPTypeTraits<DEPTH>::npp_type npp_type; |
|
|
|
|
|
|
|
|
|
typedef NppStatus (*func_t)(const npp_type* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, npp_type* pDst, |
|
|
|
|
int dstStep, NppiRect dstRoi, const double coeffs[][3], |
|
|
|
|
int interpolation, NppStreamContext stream_ctx); |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
template <int DEPTH> |
|
|
|
|
struct NppWarpFunc<DEPTH, false> |
|
|
|
|
{ |
|
|
|
|
typedef typename NPPTypeTraits<DEPTH>::npp_type npp_type; |
|
|
|
|
|
|
|
|
@ -150,11 +171,24 @@ namespace |
|
|
|
|
#endif |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
template <int DEPTH, typename NppWarpFunc<DEPTH>::func_t func> struct NppWarp |
|
|
|
|
template <int DEPTH, typename NppWarpFunc<DEPTH, canUseContext>::func_t func> struct NppWarp |
|
|
|
|
{ |
|
|
|
|
typedef typename NppWarpFunc<DEPTH>::npp_type npp_type; |
|
|
|
|
|
|
|
|
|
typedef typename NppWarpFunc<DEPTH, canUseContext>::npp_type npp_type; |
|
|
|
|
|
|
|
|
|
static void call(const cv::cuda::GpuMat& src, cv::cuda::GpuMat& dst, double coeffs[][3], int interpolation, cudaStream_t stream) |
|
|
|
|
{ |
|
|
|
|
call_impl(src, dst, coeffs, interpolation, stream, Int2Type<canUseContext>()); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
template <int I> |
|
|
|
|
struct Int2Type |
|
|
|
|
{ |
|
|
|
|
enum { value = I }; |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
// with context
|
|
|
|
|
static void call_impl(const cv::cuda::GpuMat& src, cv::cuda::GpuMat& dst, double coeffs[][3], int interpolation, cudaStream_t stream, Int2Type<true>) |
|
|
|
|
{ |
|
|
|
|
static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC}; |
|
|
|
|
|
|
|
|
@ -174,7 +208,40 @@ namespace |
|
|
|
|
dstroi.height = dst.rows; |
|
|
|
|
dstroi.width = dst.cols; |
|
|
|
|
|
|
|
|
|
cv::cuda::NppStreamHandler h(stream); |
|
|
|
|
NppStatus nppStatus = NPP_SUCCESS; |
|
|
|
|
NppStreamContext nppStreamContext{}; |
|
|
|
|
nppStatus = nppGetStreamContext(&nppStreamContext); |
|
|
|
|
CV_Assert(NPP_SUCCESS == nppStatus); |
|
|
|
|
nppStreamContext.hStream = stream; |
|
|
|
|
|
|
|
|
|
nppSafeCall( func(src.ptr<npp_type>(), srcsz, static_cast<int>(src.step), srcroi, |
|
|
|
|
dst.ptr<npp_type>(), static_cast<int>(dst.step), dstroi, |
|
|
|
|
coeffs, npp_inter[interpolation], nppStreamContext) ); |
|
|
|
|
|
|
|
|
|
if (stream == 0) |
|
|
|
|
cudaSafeCall( cudaDeviceSynchronize() ); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// without context
|
|
|
|
|
static void call_impl(const cv::cuda::GpuMat& src, cv::cuda::GpuMat& dst, double coeffs[][3], int interpolation, cudaStream_t stream, Int2Type<false>) |
|
|
|
|
{ |
|
|
|
|
static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC}; |
|
|
|
|
|
|
|
|
|
NppiSize srcsz; |
|
|
|
|
srcsz.height = src.rows; |
|
|
|
|
srcsz.width = src.cols; |
|
|
|
|
|
|
|
|
|
NppiRect srcroi; |
|
|
|
|
srcroi.x = 0; |
|
|
|
|
srcroi.y = 0; |
|
|
|
|
srcroi.height = src.rows; |
|
|
|
|
srcroi.width = src.cols; |
|
|
|
|
|
|
|
|
|
NppiRect dstroi; |
|
|
|
|
dstroi.x = 0; |
|
|
|
|
dstroi.y = 0; |
|
|
|
|
dstroi.height = dst.rows; |
|
|
|
|
dstroi.width = dst.cols; |
|
|
|
|
|
|
|
|
|
#if USE_NPP_STREAM_CTX |
|
|
|
|
nppSafeCall(func(src.ptr<npp_type>(), srcsz, static_cast<int>(src.step), srcroi, |
|
|
|
@ -205,7 +272,8 @@ void cv::cuda::warpAffine(InputArray _src, OutputArray _dst, InputArray _M, Size |
|
|
|
|
CV_Assert( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC ); |
|
|
|
|
CV_Assert( borderMode == BORDER_REFLECT101 || borderMode == BORDER_REPLICATE || borderMode == BORDER_CONSTANT || borderMode == BORDER_REFLECT || borderMode == BORDER_WRAP ); |
|
|
|
|
|
|
|
|
|
_dst.create(dsize, src.type()); |
|
|
|
|
if (_dst.size() != dsize) |
|
|
|
|
_dst.create(dsize, src.type()); |
|
|
|
|
GpuMat dst = _dst.getGpuMat(); |
|
|
|
|
|
|
|
|
|
Size wholeSize; |
|
|
|
@ -281,20 +349,20 @@ void cv::cuda::warpAffine(InputArray _src, OutputArray _dst, InputArray _M, Size |
|
|
|
|
} |
|
|
|
|
#else |
|
|
|
|
{ |
|
|
|
|
{NppWarp<CV_8U, nppiWarpAffine_8u_C1R>::call, 0, NppWarp<CV_8U, nppiWarpAffine_8u_C3R>::call, NppWarp<CV_8U, nppiWarpAffine_8u_C4R>::call}, |
|
|
|
|
{NppWarp<CV_8U, TRY_CTX(nppiWarpAffine_8u_C1R)>::call, 0, NppWarp<CV_8U, TRY_CTX(nppiWarpAffine_8u_C3R)>::call, NppWarp<CV_8U, TRY_CTX(nppiWarpAffine_8u_C4R)>::call}, |
|
|
|
|
{0, 0, 0, 0}, |
|
|
|
|
{NppWarp<CV_16U, nppiWarpAffine_16u_C1R>::call, 0, NppWarp<CV_16U, nppiWarpAffine_16u_C3R>::call, NppWarp<CV_16U, nppiWarpAffine_16u_C4R>::call}, |
|
|
|
|
{NppWarp<CV_16U, TRY_CTX(nppiWarpAffine_16u_C1R)>::call, 0, NppWarp<CV_16U, TRY_CTX(nppiWarpAffine_16u_C3R)>::call, NppWarp<CV_16U, TRY_CTX(nppiWarpAffine_16u_C4R)>::call}, |
|
|
|
|
{0, 0, 0, 0}, |
|
|
|
|
{NppWarp<CV_32S, nppiWarpAffine_32s_C1R>::call, 0, NppWarp<CV_32S, nppiWarpAffine_32s_C3R>::call, NppWarp<CV_32S, nppiWarpAffine_32s_C4R>::call}, |
|
|
|
|
{NppWarp<CV_32F, nppiWarpAffine_32f_C1R>::call, 0, NppWarp<CV_32F, nppiWarpAffine_32f_C3R>::call, NppWarp<CV_32F, nppiWarpAffine_32f_C4R>::call} |
|
|
|
|
{NppWarp<CV_32S, TRY_CTX(nppiWarpAffine_32s_C1R)>::call, 0, NppWarp<CV_32S, TRY_CTX(nppiWarpAffine_32s_C3R)>::call, NppWarp<CV_32S, TRY_CTX(nppiWarpAffine_32s_C4R)>::call}, |
|
|
|
|
{NppWarp<CV_32F, TRY_CTX(nppiWarpAffine_32f_C1R)>::call, 0, NppWarp<CV_32F, TRY_CTX(nppiWarpAffine_32f_C3R)>::call, NppWarp<CV_32F, TRY_CTX(nppiWarpAffine_32f_C4R)>::call} |
|
|
|
|
}, |
|
|
|
|
{ |
|
|
|
|
{NppWarp<CV_8U, nppiWarpAffineBack_8u_C1R>::call, 0, NppWarp<CV_8U, nppiWarpAffineBack_8u_C3R>::call, NppWarp<CV_8U, nppiWarpAffineBack_8u_C4R>::call}, |
|
|
|
|
{NppWarp<CV_8U, TRY_CTX(nppiWarpAffineBack_8u_C1R)>::call, 0, NppWarp<CV_8U, TRY_CTX(nppiWarpAffineBack_8u_C3R)>::call, NppWarp<CV_8U, TRY_CTX(nppiWarpAffineBack_8u_C4R)>::call}, |
|
|
|
|
{0, 0, 0, 0}, |
|
|
|
|
{NppWarp<CV_16U, nppiWarpAffineBack_16u_C1R>::call, 0, NppWarp<CV_16U, nppiWarpAffineBack_16u_C3R>::call, NppWarp<CV_16U, nppiWarpAffineBack_16u_C4R>::call}, |
|
|
|
|
{NppWarp<CV_16U, TRY_CTX(nppiWarpAffineBack_16u_C1R)>::call, 0, NppWarp<CV_16U, TRY_CTX(nppiWarpAffineBack_16u_C3R)>::call, NppWarp<CV_16U, TRY_CTX(nppiWarpAffineBack_16u_C4R)>::call}, |
|
|
|
|
{0, 0, 0, 0}, |
|
|
|
|
{NppWarp<CV_32S, nppiWarpAffineBack_32s_C1R>::call, 0, NppWarp<CV_32S, nppiWarpAffineBack_32s_C3R>::call, NppWarp<CV_32S, nppiWarpAffineBack_32s_C4R>::call}, |
|
|
|
|
{NppWarp<CV_32F, nppiWarpAffineBack_32f_C1R>::call, 0, NppWarp<CV_32F, nppiWarpAffineBack_32f_C3R>::call, NppWarp<CV_32F, nppiWarpAffineBack_32f_C4R>::call} |
|
|
|
|
{NppWarp<CV_32S, TRY_CTX(nppiWarpAffineBack_32s_C1R)>::call, 0, NppWarp<CV_32S, TRY_CTX(nppiWarpAffineBack_32s_C3R)>::call, NppWarp<CV_32S, TRY_CTX(nppiWarpAffineBack_32s_C4R)>::call}, |
|
|
|
|
{NppWarp<CV_32F, TRY_CTX(nppiWarpAffineBack_32f_C1R)>::call, 0, NppWarp<CV_32F, TRY_CTX(nppiWarpAffineBack_32f_C3R)>::call, NppWarp<CV_32F, TRY_CTX(nppiWarpAffineBack_32f_C4R)>::call} |
|
|
|
|
} |
|
|
|
|
#endif |
|
|
|
|
}; |
|
|
|
@ -439,20 +507,20 @@ void cv::cuda::warpPerspective(InputArray _src, OutputArray _dst, InputArray _M, |
|
|
|
|
} |
|
|
|
|
#else |
|
|
|
|
{ |
|
|
|
|
{NppWarp<CV_8U, nppiWarpPerspective_8u_C1R>::call, 0, NppWarp<CV_8U, nppiWarpPerspective_8u_C3R>::call, NppWarp<CV_8U, nppiWarpPerspective_8u_C4R>::call}, |
|
|
|
|
{NppWarp<CV_8U, TRY_CTX(nppiWarpPerspective_8u_C1R)>::call, 0, NppWarp<CV_8U, TRY_CTX(nppiWarpPerspective_8u_C3R)>::call, NppWarp<CV_8U, TRY_CTX(nppiWarpPerspective_8u_C4R)>::call}, |
|
|
|
|
{0, 0, 0, 0}, |
|
|
|
|
{NppWarp<CV_16U, nppiWarpPerspective_16u_C1R>::call, 0, NppWarp<CV_16U, nppiWarpPerspective_16u_C3R>::call, NppWarp<CV_16U, nppiWarpPerspective_16u_C4R>::call}, |
|
|
|
|
{NppWarp<CV_16U, TRY_CTX(nppiWarpPerspective_16u_C1R)>::call, 0, NppWarp<CV_16U, TRY_CTX(nppiWarpPerspective_16u_C3R)>::call, NppWarp<CV_16U, TRY_CTX(nppiWarpPerspective_16u_C4R)>::call}, |
|
|
|
|
{0, 0, 0, 0}, |
|
|
|
|
{NppWarp<CV_32S, nppiWarpPerspective_32s_C1R>::call, 0, NppWarp<CV_32S, nppiWarpPerspective_32s_C3R>::call, NppWarp<CV_32S, nppiWarpPerspective_32s_C4R>::call}, |
|
|
|
|
{NppWarp<CV_32F, nppiWarpPerspective_32f_C1R>::call, 0, NppWarp<CV_32F, nppiWarpPerspective_32f_C3R>::call, NppWarp<CV_32F, nppiWarpPerspective_32f_C4R>::call} |
|
|
|
|
{NppWarp<CV_32S, TRY_CTX(nppiWarpPerspective_32s_C1R)>::call, 0, NppWarp<CV_32S, TRY_CTX(nppiWarpPerspective_32s_C3R)>::call, NppWarp<CV_32S, TRY_CTX(nppiWarpPerspective_32s_C4R)>::call}, |
|
|
|
|
{NppWarp<CV_32F, TRY_CTX(nppiWarpPerspective_32f_C1R)>::call, 0, NppWarp<CV_32F, TRY_CTX(nppiWarpPerspective_32f_C3R)>::call, NppWarp<CV_32F, TRY_CTX(nppiWarpPerspective_32f_C4R)>::call} |
|
|
|
|
}, |
|
|
|
|
{ |
|
|
|
|
{NppWarp<CV_8U, nppiWarpPerspectiveBack_8u_C1R>::call, 0, NppWarp<CV_8U, nppiWarpPerspectiveBack_8u_C3R>::call, NppWarp<CV_8U, nppiWarpPerspectiveBack_8u_C4R>::call}, |
|
|
|
|
{NppWarp<CV_8U, TRY_CTX(nppiWarpPerspectiveBack_8u_C1R)>::call, 0, NppWarp<CV_8U, TRY_CTX(nppiWarpPerspectiveBack_8u_C3R)>::call, NppWarp<CV_8U, TRY_CTX(nppiWarpPerspectiveBack_8u_C4R)>::call}, |
|
|
|
|
{0, 0, 0, 0}, |
|
|
|
|
{NppWarp<CV_16U, nppiWarpPerspectiveBack_16u_C1R>::call, 0, NppWarp<CV_16U, nppiWarpPerspectiveBack_16u_C3R>::call, NppWarp<CV_16U, nppiWarpPerspectiveBack_16u_C4R>::call}, |
|
|
|
|
{NppWarp<CV_16U, TRY_CTX(nppiWarpPerspectiveBack_16u_C1R)>::call, 0, NppWarp<CV_16U, TRY_CTX(nppiWarpPerspectiveBack_16u_C3R)>::call, NppWarp<CV_16U, TRY_CTX(nppiWarpPerspectiveBack_16u_C4R)>::call}, |
|
|
|
|
{0, 0, 0, 0}, |
|
|
|
|
{NppWarp<CV_32S, nppiWarpPerspectiveBack_32s_C1R>::call, 0, NppWarp<CV_32S, nppiWarpPerspectiveBack_32s_C3R>::call, NppWarp<CV_32S, nppiWarpPerspectiveBack_32s_C4R>::call}, |
|
|
|
|
{NppWarp<CV_32F, nppiWarpPerspectiveBack_32f_C1R>::call, 0, NppWarp<CV_32F, nppiWarpPerspectiveBack_32f_C3R>::call, NppWarp<CV_32F, nppiWarpPerspectiveBack_32f_C4R>::call} |
|
|
|
|
{NppWarp<CV_32S, TRY_CTX(nppiWarpPerspectiveBack_32s_C1R)>::call, 0, NppWarp<CV_32S, TRY_CTX(nppiWarpPerspectiveBack_32s_C3R)>::call, NppWarp<CV_32S, TRY_CTX(nppiWarpPerspectiveBack_32s_C4R)>::call}, |
|
|
|
|
{NppWarp<CV_32F, TRY_CTX(nppiWarpPerspectiveBack_32f_C1R)>::call, 0, NppWarp<CV_32F, TRY_CTX(nppiWarpPerspectiveBack_32f_C3R)>::call, NppWarp<CV_32F, TRY_CTX(nppiWarpPerspectiveBack_32f_C4R)>::call} |
|
|
|
|
} |
|
|
|
|
#endif |
|
|
|
|
}; |
|
|
|
|