pull/3338/merge
Mikhail Scherbina 1 month ago committed by GitHub
commit f5d489087c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 110
      modules/cudawarping/src/warp.cpp

@ -45,8 +45,19 @@
using namespace cv;
using namespace cv::cuda;
#define canUseContext NPP_VERSION >= (10 * 1000 + 1 * 100 + 0)
#if canUseContext
#define CTX_PREFIX _Ctx
#else
#define CTX_PREFIX
#endif
#define PPCAT_NX(A, B) A ## B
#define PPCAT(A, B) PPCAT_NX(A, B)
#define TRY_CTX(func) PPCAT(func, CTX_PREFIX)
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
void cv::cuda::warpAffine(InputArray, OutputArray, InputArray, Size, int, int, Scalar, Stream&) { throw_no_cuda(); }
void cv::cuda::buildWarpAffineMaps(InputArray, bool, Size, OutputArray, OutputArray, Stream&) { throw_no_cuda(); }
@ -135,7 +146,17 @@ void cv::cuda::buildWarpPerspectiveMaps(InputArray _M, bool inverse, Size dsize,
namespace
{
template <int DEPTH> struct NppWarpFunc
template <int DEPTH, bool CanUseContext=true> struct NppWarpFunc
{
typedef typename NPPTypeTraits<DEPTH>::npp_type npp_type;
typedef NppStatus (*func_t)(const npp_type* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, npp_type* pDst,
int dstStep, NppiRect dstRoi, const double coeffs[][3],
int interpolation, NppStreamContext stream_ctx);
};
template <int DEPTH>
struct NppWarpFunc<DEPTH, false>
{
typedef typename NPPTypeTraits<DEPTH>::npp_type npp_type;
@ -150,11 +171,24 @@ namespace
#endif
};
template <int DEPTH, typename NppWarpFunc<DEPTH>::func_t func> struct NppWarp
template <int DEPTH, typename NppWarpFunc<DEPTH, canUseContext>::func_t func> struct NppWarp
{
typedef typename NppWarpFunc<DEPTH>::npp_type npp_type;
typedef typename NppWarpFunc<DEPTH, canUseContext>::npp_type npp_type;
static void call(const cv::cuda::GpuMat& src, cv::cuda::GpuMat& dst, double coeffs[][3], int interpolation, cudaStream_t stream)
{
call_impl(src, dst, coeffs, interpolation, stream, Int2Type<canUseContext>());
}
template <int I>
struct Int2Type
{
enum { value = I };
};
// with context
static void call_impl(const cv::cuda::GpuMat& src, cv::cuda::GpuMat& dst, double coeffs[][3], int interpolation, cudaStream_t stream, Int2Type<true>)
{
static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC};
@ -174,7 +208,40 @@ namespace
dstroi.height = dst.rows;
dstroi.width = dst.cols;
cv::cuda::NppStreamHandler h(stream);
NppStatus nppStatus = NPP_SUCCESS;
NppStreamContext nppStreamContext{};
nppStatus = nppGetStreamContext(&nppStreamContext);
CV_Assert(NPP_SUCCESS == nppStatus);
nppStreamContext.hStream = stream;
nppSafeCall( func(src.ptr<npp_type>(), srcsz, static_cast<int>(src.step), srcroi,
dst.ptr<npp_type>(), static_cast<int>(dst.step), dstroi,
coeffs, npp_inter[interpolation], nppStreamContext) );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
// without context
static void call_impl(const cv::cuda::GpuMat& src, cv::cuda::GpuMat& dst, double coeffs[][3], int interpolation, cudaStream_t stream, Int2Type<false>)
{
static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC};
NppiSize srcsz;
srcsz.height = src.rows;
srcsz.width = src.cols;
NppiRect srcroi;
srcroi.x = 0;
srcroi.y = 0;
srcroi.height = src.rows;
srcroi.width = src.cols;
NppiRect dstroi;
dstroi.x = 0;
dstroi.y = 0;
dstroi.height = dst.rows;
dstroi.width = dst.cols;
#if USE_NPP_STREAM_CTX
nppSafeCall(func(src.ptr<npp_type>(), srcsz, static_cast<int>(src.step), srcroi,
@ -205,7 +272,8 @@ void cv::cuda::warpAffine(InputArray _src, OutputArray _dst, InputArray _M, Size
CV_Assert( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC );
CV_Assert( borderMode == BORDER_REFLECT101 || borderMode == BORDER_REPLICATE || borderMode == BORDER_CONSTANT || borderMode == BORDER_REFLECT || borderMode == BORDER_WRAP );
_dst.create(dsize, src.type());
if (_dst.size() != dsize)
_dst.create(dsize, src.type());
GpuMat dst = _dst.getGpuMat();
Size wholeSize;
@ -281,20 +349,20 @@ void cv::cuda::warpAffine(InputArray _src, OutputArray _dst, InputArray _M, Size
}
#else
{
{NppWarp<CV_8U, nppiWarpAffine_8u_C1R>::call, 0, NppWarp<CV_8U, nppiWarpAffine_8u_C3R>::call, NppWarp<CV_8U, nppiWarpAffine_8u_C4R>::call},
{NppWarp<CV_8U, TRY_CTX(nppiWarpAffine_8u_C1R)>::call, 0, NppWarp<CV_8U, TRY_CTX(nppiWarpAffine_8u_C3R)>::call, NppWarp<CV_8U, TRY_CTX(nppiWarpAffine_8u_C4R)>::call},
{0, 0, 0, 0},
{NppWarp<CV_16U, nppiWarpAffine_16u_C1R>::call, 0, NppWarp<CV_16U, nppiWarpAffine_16u_C3R>::call, NppWarp<CV_16U, nppiWarpAffine_16u_C4R>::call},
{NppWarp<CV_16U, TRY_CTX(nppiWarpAffine_16u_C1R)>::call, 0, NppWarp<CV_16U, TRY_CTX(nppiWarpAffine_16u_C3R)>::call, NppWarp<CV_16U, TRY_CTX(nppiWarpAffine_16u_C4R)>::call},
{0, 0, 0, 0},
{NppWarp<CV_32S, nppiWarpAffine_32s_C1R>::call, 0, NppWarp<CV_32S, nppiWarpAffine_32s_C3R>::call, NppWarp<CV_32S, nppiWarpAffine_32s_C4R>::call},
{NppWarp<CV_32F, nppiWarpAffine_32f_C1R>::call, 0, NppWarp<CV_32F, nppiWarpAffine_32f_C3R>::call, NppWarp<CV_32F, nppiWarpAffine_32f_C4R>::call}
{NppWarp<CV_32S, TRY_CTX(nppiWarpAffine_32s_C1R)>::call, 0, NppWarp<CV_32S, TRY_CTX(nppiWarpAffine_32s_C3R)>::call, NppWarp<CV_32S, TRY_CTX(nppiWarpAffine_32s_C4R)>::call},
{NppWarp<CV_32F, TRY_CTX(nppiWarpAffine_32f_C1R)>::call, 0, NppWarp<CV_32F, TRY_CTX(nppiWarpAffine_32f_C3R)>::call, NppWarp<CV_32F, TRY_CTX(nppiWarpAffine_32f_C4R)>::call}
},
{
{NppWarp<CV_8U, nppiWarpAffineBack_8u_C1R>::call, 0, NppWarp<CV_8U, nppiWarpAffineBack_8u_C3R>::call, NppWarp<CV_8U, nppiWarpAffineBack_8u_C4R>::call},
{NppWarp<CV_8U, TRY_CTX(nppiWarpAffineBack_8u_C1R)>::call, 0, NppWarp<CV_8U, TRY_CTX(nppiWarpAffineBack_8u_C3R)>::call, NppWarp<CV_8U, TRY_CTX(nppiWarpAffineBack_8u_C4R)>::call},
{0, 0, 0, 0},
{NppWarp<CV_16U, nppiWarpAffineBack_16u_C1R>::call, 0, NppWarp<CV_16U, nppiWarpAffineBack_16u_C3R>::call, NppWarp<CV_16U, nppiWarpAffineBack_16u_C4R>::call},
{NppWarp<CV_16U, TRY_CTX(nppiWarpAffineBack_16u_C1R)>::call, 0, NppWarp<CV_16U, TRY_CTX(nppiWarpAffineBack_16u_C3R)>::call, NppWarp<CV_16U, TRY_CTX(nppiWarpAffineBack_16u_C4R)>::call},
{0, 0, 0, 0},
{NppWarp<CV_32S, nppiWarpAffineBack_32s_C1R>::call, 0, NppWarp<CV_32S, nppiWarpAffineBack_32s_C3R>::call, NppWarp<CV_32S, nppiWarpAffineBack_32s_C4R>::call},
{NppWarp<CV_32F, nppiWarpAffineBack_32f_C1R>::call, 0, NppWarp<CV_32F, nppiWarpAffineBack_32f_C3R>::call, NppWarp<CV_32F, nppiWarpAffineBack_32f_C4R>::call}
{NppWarp<CV_32S, TRY_CTX(nppiWarpAffineBack_32s_C1R)>::call, 0, NppWarp<CV_32S, TRY_CTX(nppiWarpAffineBack_32s_C3R)>::call, NppWarp<CV_32S, TRY_CTX(nppiWarpAffineBack_32s_C4R)>::call},
{NppWarp<CV_32F, TRY_CTX(nppiWarpAffineBack_32f_C1R)>::call, 0, NppWarp<CV_32F, TRY_CTX(nppiWarpAffineBack_32f_C3R)>::call, NppWarp<CV_32F, TRY_CTX(nppiWarpAffineBack_32f_C4R)>::call}
}
#endif
};
@ -439,20 +507,20 @@ void cv::cuda::warpPerspective(InputArray _src, OutputArray _dst, InputArray _M,
}
#else
{
{NppWarp<CV_8U, nppiWarpPerspective_8u_C1R>::call, 0, NppWarp<CV_8U, nppiWarpPerspective_8u_C3R>::call, NppWarp<CV_8U, nppiWarpPerspective_8u_C4R>::call},
{NppWarp<CV_8U, TRY_CTX(nppiWarpPerspective_8u_C1R)>::call, 0, NppWarp<CV_8U, TRY_CTX(nppiWarpPerspective_8u_C3R)>::call, NppWarp<CV_8U, TRY_CTX(nppiWarpPerspective_8u_C4R)>::call},
{0, 0, 0, 0},
{NppWarp<CV_16U, nppiWarpPerspective_16u_C1R>::call, 0, NppWarp<CV_16U, nppiWarpPerspective_16u_C3R>::call, NppWarp<CV_16U, nppiWarpPerspective_16u_C4R>::call},
{NppWarp<CV_16U, TRY_CTX(nppiWarpPerspective_16u_C1R)>::call, 0, NppWarp<CV_16U, TRY_CTX(nppiWarpPerspective_16u_C3R)>::call, NppWarp<CV_16U, TRY_CTX(nppiWarpPerspective_16u_C4R)>::call},
{0, 0, 0, 0},
{NppWarp<CV_32S, nppiWarpPerspective_32s_C1R>::call, 0, NppWarp<CV_32S, nppiWarpPerspective_32s_C3R>::call, NppWarp<CV_32S, nppiWarpPerspective_32s_C4R>::call},
{NppWarp<CV_32F, nppiWarpPerspective_32f_C1R>::call, 0, NppWarp<CV_32F, nppiWarpPerspective_32f_C3R>::call, NppWarp<CV_32F, nppiWarpPerspective_32f_C4R>::call}
{NppWarp<CV_32S, TRY_CTX(nppiWarpPerspective_32s_C1R)>::call, 0, NppWarp<CV_32S, TRY_CTX(nppiWarpPerspective_32s_C3R)>::call, NppWarp<CV_32S, TRY_CTX(nppiWarpPerspective_32s_C4R)>::call},
{NppWarp<CV_32F, TRY_CTX(nppiWarpPerspective_32f_C1R)>::call, 0, NppWarp<CV_32F, TRY_CTX(nppiWarpPerspective_32f_C3R)>::call, NppWarp<CV_32F, TRY_CTX(nppiWarpPerspective_32f_C4R)>::call}
},
{
{NppWarp<CV_8U, nppiWarpPerspectiveBack_8u_C1R>::call, 0, NppWarp<CV_8U, nppiWarpPerspectiveBack_8u_C3R>::call, NppWarp<CV_8U, nppiWarpPerspectiveBack_8u_C4R>::call},
{NppWarp<CV_8U, TRY_CTX(nppiWarpPerspectiveBack_8u_C1R)>::call, 0, NppWarp<CV_8U, TRY_CTX(nppiWarpPerspectiveBack_8u_C3R)>::call, NppWarp<CV_8U, TRY_CTX(nppiWarpPerspectiveBack_8u_C4R)>::call},
{0, 0, 0, 0},
{NppWarp<CV_16U, nppiWarpPerspectiveBack_16u_C1R>::call, 0, NppWarp<CV_16U, nppiWarpPerspectiveBack_16u_C3R>::call, NppWarp<CV_16U, nppiWarpPerspectiveBack_16u_C4R>::call},
{NppWarp<CV_16U, TRY_CTX(nppiWarpPerspectiveBack_16u_C1R)>::call, 0, NppWarp<CV_16U, TRY_CTX(nppiWarpPerspectiveBack_16u_C3R)>::call, NppWarp<CV_16U, TRY_CTX(nppiWarpPerspectiveBack_16u_C4R)>::call},
{0, 0, 0, 0},
{NppWarp<CV_32S, nppiWarpPerspectiveBack_32s_C1R>::call, 0, NppWarp<CV_32S, nppiWarpPerspectiveBack_32s_C3R>::call, NppWarp<CV_32S, nppiWarpPerspectiveBack_32s_C4R>::call},
{NppWarp<CV_32F, nppiWarpPerspectiveBack_32f_C1R>::call, 0, NppWarp<CV_32F, nppiWarpPerspectiveBack_32f_C3R>::call, NppWarp<CV_32F, nppiWarpPerspectiveBack_32f_C4R>::call}
{NppWarp<CV_32S, TRY_CTX(nppiWarpPerspectiveBack_32s_C1R)>::call, 0, NppWarp<CV_32S, TRY_CTX(nppiWarpPerspectiveBack_32s_C3R)>::call, NppWarp<CV_32S, TRY_CTX(nppiWarpPerspectiveBack_32s_C4R)>::call},
{NppWarp<CV_32F, TRY_CTX(nppiWarpPerspectiveBack_32f_C1R)>::call, 0, NppWarp<CV_32F, TRY_CTX(nppiWarpPerspectiveBack_32f_C3R)>::call, NppWarp<CV_32F, TRY_CTX(nppiWarpPerspectiveBack_32f_C4R)>::call}
}
#endif
};

Loading…
Cancel
Save