Merge pull request #3600 from chacha21:remap_relative

first proposal of cv::remap with relative displacement field Relates to [#24621](https://github.com/opencv/opencv/pull/24621), [#24603](https://github.com/opencv/opencv/issues/24603) CUDA implementation of the feature ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [X] I agree to contribute to the project under Apache 2 License. - [X] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [X] The PR is proposed to the proper branch - [X] There is a reference to the original bug report and related work - [X] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake
10 months ago · 2413f86419
parent 667a66ee0e
commit 2413f86419
4 changed files with 149 additions and 31 deletions
--- a/modules/cudawarping/include/opencv2/cudawarping.hpp
+++ b/modules/cudawarping/include/opencv2/cudawarping.hpp
@ -70,6 +70,8 @@ namespace cv { namespace cuda {
@param ymap Y values. Only CV_32FC1 type is supported.
@param interpolation Interpolation method (see resize ). INTER_NEAREST , INTER_LINEAR and
 INTER_CUBIC are supported for now.
+The extra flag WARP_RELATIVE_MAP can be ORed to the interpolation method
+(e.g. INTER_LINEAR | WARP_RELATIVE_MAP)
@param borderMode Pixel extrapolation method (see borderInterpolate ). BORDER_REFLECT101 ,
 BORDER_REPLICATE , BORDER_CONSTANT , BORDER_REFLECT and BORDER_WRAP are supported for now.
@param borderValue Value used in case of a constant border. By default, it is 0.
@ -79,6 +81,10 @@ The function transforms the source image using the specified map:

 \f[\texttt{dst} (x,y) =  \texttt{src} (xmap(x,y), ymap(x,y))\f]

+with the WARP_RELATIVE_MAP flag :
+
+\f[\texttt{dst} (x,y) =  \texttt{src} (x+map_x(x,y),y+map_y(x,y))\f]
+
 Values of pixels with non-integer coordinates are computed using the bilinear interpolation.

@sa remap
--- a/modules/cudawarping/src/cuda/remap.cu
+++ b/modules/cudawarping/src/cuda/remap.cu
@ -68,9 +68,23 @@ namespace cv { namespace cuda { namespace device
            }
        }

+        template <typename Ptr2D, typename T> __global__ void remap_relative(const Ptr2D src, const PtrStepf mapx, const PtrStepf mapy, PtrStepSz<T> dst)
+        {
+            const int x = blockDim.x * blockIdx.x + threadIdx.x;
+            const int y = blockDim.y * blockIdx.y + threadIdx.y;
+
+            if (x < dst.cols && y < dst.rows)
+            {
+                const float xcoo = x+mapx.ptr(y)[x];
+                const float ycoo = y+mapy.ptr(y)[x];
+
+                dst.ptr(y)[x] = saturate_cast<T>(src(ycoo, xcoo));
+            }
+        }
+
        template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcherStream
        {
-            static void call(PtrStepSz<T> src, PtrStepSzf mapx, PtrStepSzf mapy, PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool)
+            static void call(PtrStepSz<T> src, PtrStepSzf mapx, PtrStepSzf mapy, PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool, bool isRelative)
            {
                typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;

@ -81,14 +95,17 @@ namespace cv { namespace cuda { namespace device
                BorderReader<PtrStep<T>, B<work_type>> brdSrc(src, brd);
                Filter<BorderReader<PtrStep<T>, B<work_type>>> filter_src(brdSrc);

-                remap<<<grid, block, 0, stream>>>(filter_src, mapx, mapy, dst);
+                if (isRelative)
+                  remap_relative<<<grid, block, 0, stream>>>(filter_src, mapx, mapy, dst);
+                else
+                  remap<<<grid, block, 0, stream>>>(filter_src, mapx, mapy, dst);
                cudaSafeCall( cudaGetLastError() );
            }
        };

        template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcherNonStream
        {
-            static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy, PtrStepSz<T> dst, const float* borderValue, bool)
+            static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy, PtrStepSz<T> dst, const float* borderValue, bool, bool isRelative)
            {
                CV_UNUSED(srcWhole);
                CV_UNUSED(xoff);
@ -102,7 +119,10 @@ namespace cv { namespace cuda { namespace device
                BorderReader<PtrStep<T>, B<work_type>> brdSrc(src, brd);
                Filter<BorderReader<PtrStep<T>, B<work_type>>> filter_src(brdSrc);

-                remap<<<grid, block>>>(filter_src, mapx, mapy, dst);
+                if (isRelative)
+                  remap_relative<<<grid, block>>>(filter_src, mapx, mapy, dst);
+                else
+                  remap<<<grid, block>>>(filter_src, mapx, mapy, dst);
                cudaSafeCall( cudaGetLastError() );

                cudaSafeCall( cudaDeviceSynchronize() );
@ -112,7 +132,7 @@ namespace cv { namespace cuda { namespace device
        template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcherNonStreamTex
        {
            static void call(PtrStepSz< T > src, PtrStepSz< T > srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy,
-                PtrStepSz< T > dst, const float* borderValue, bool cc20)
+                PtrStepSz< T > dst, const float* borderValue, bool cc20, bool isRelative)
            {
                typedef typename TypeVec<float, VecTraits< T >::cn>::vec_type work_type;
                dim3 block(32, cc20 ? 8 : 4);
@ -123,7 +143,10 @@ namespace cv { namespace cuda { namespace device
                    B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));
                    BorderReader<cudev::TexturePtr<T>, B<work_type>> brdSrc(texSrcWhole, brd);
                    Filter<BorderReader<cudev::TexturePtr<T>, B<work_type>>> filter_src(brdSrc);
-                    remap<<<grid, block>>>(filter_src, mapx, mapy, dst);
+                    if (isRelative)
+                        remap_relative<<<grid, block>>>(filter_src, mapx, mapy, dst);
+                    else
+                        remap<<<grid, block>>>(filter_src, mapx, mapy, dst);

                }
                else {
@ -131,7 +154,10 @@ namespace cv { namespace cuda { namespace device
                    B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));
                    BorderReader<cudev::TextureOffPtr<T>, B<work_type>> brdSrc(texSrcWhole, brd);
                    Filter<BorderReader<cudev::TextureOffPtr<T>, B<work_type>>> filter_src(brdSrc);
-                    remap<<<grid, block >>>(filter_src, mapx, mapy, dst);
+                    if (isRelative)
+                        remap_relative<<<grid, block>>>(filter_src, mapx, mapy, dst);
+                    else
+                        remap<<<grid, block >>>(filter_src, mapx, mapy, dst);
                }

                cudaSafeCall( cudaGetLastError() );
@ -142,7 +168,7 @@ namespace cv { namespace cuda { namespace device
        template <template <typename> class Filter, typename T> struct RemapDispatcherNonStreamTex<Filter, BrdReplicate, T>
        {
            static void call(PtrStepSz< T > src, PtrStepSz< T > srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy,
-                PtrStepSz< T > dst, const float*, bool)
+                PtrStepSz< T > dst, const float*, bool, bool isRelative)
            {
                dim3 block(32, 8);
                dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
@ -150,7 +176,10 @@ namespace cv { namespace cuda { namespace device
                {
                    cudev::Texture<T> texSrcWhole(srcWhole);
                    Filter<cudev::TexturePtr<T>> filter_src(texSrcWhole);
-                    remap<<<grid, block>>>(filter_src, mapx, mapy, dst);
+                    if (isRelative)
+                        remap_relative<<<grid, block>>>(filter_src, mapx, mapy, dst);
+                    else
+                        remap<<<grid, block>>>(filter_src, mapx, mapy, dst);
                }
                else
                {
@ -158,7 +187,10 @@ namespace cv { namespace cuda { namespace device
                    BrdReplicate<T> brd(src.rows, src.cols);
                    BorderReader<cudev::TextureOffPtr<T>, BrdReplicate<T>> brdSrc(texSrcWhole, brd);
                    Filter<BorderReader<cudev::TextureOffPtr<T>, BrdReplicate<T>>> filter_src(brdSrc);
-                    remap<<<grid, block>>>(filter_src, mapx, mapy, dst);
+                    if (isRelative)
+                        remap_relative<<<grid, block>>>(filter_src, mapx, mapy, dst);
+                    else
+                        remap<<<grid, block>>>(filter_src, mapx, mapy, dst);
                }
                cudaSafeCall( cudaGetLastError() );
                cudaSafeCall( cudaDeviceSynchronize() );
@ -203,20 +235,20 @@ namespace cv { namespace cuda { namespace device
        template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcher
        {
            static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy,
-                PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool cc20)
+                PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative)
            {
                if (stream == 0)
-                    RemapDispatcherNonStream<Filter, B, T>::call(src, srcWhole, xoff, yoff, mapx, mapy, dst, borderValue, cc20);
+                    RemapDispatcherNonStream<Filter, B, T>::call(src, srcWhole, xoff, yoff, mapx, mapy, dst, borderValue, cc20, isRelative);
                else
-                    RemapDispatcherStream<Filter, B, T>::call(src, mapx, mapy, dst, borderValue, stream, cc20);
+                    RemapDispatcherStream<Filter, B, T>::call(src, mapx, mapy, dst, borderValue, stream, cc20, isRelative);
            }
        };

        template <typename T> void remap_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap,
-            PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20)
+            PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative)
        {
            typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap,
-                PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool cc20);
+                PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);

            static const caller_t callers[3][5] =
            {
@ -244,24 +276,24 @@ namespace cv { namespace cuda { namespace device
            };

            callers[interpolation][borderMode](static_cast<PtrStepSz<T>>(src), static_cast<PtrStepSz<T>>(srcWhole), xoff, yoff, xmap, ymap,
-                static_cast<PtrStepSz<T>>(dst), borderValue, stream, cc20);
+                static_cast<PtrStepSz<T>>(dst), borderValue, stream, cc20, isRelative);
        }

-        template void remap_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
-        template void remap_gpu<uchar3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
-        template void remap_gpu<uchar4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
+        template void remap_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);
+        template void remap_gpu<uchar3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);
+        template void remap_gpu<uchar4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);

-        template void remap_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
-        template void remap_gpu<ushort3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
-        template void remap_gpu<ushort4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
+        template void remap_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);
+        template void remap_gpu<ushort3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);
+        template void remap_gpu<ushort4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);

-        template void remap_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
-        template void remap_gpu<short3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
-        template void remap_gpu<short4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
+        template void remap_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);
+        template void remap_gpu<short3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);
+        template void remap_gpu<short4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);

-        template void remap_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
-        template void remap_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
-        template void remap_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
+        template void remap_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);
+        template void remap_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);
+        template void remap_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);
    } // namespace imgproc
 }}} // namespace cv { namespace cuda { namespace cudev

--- a/modules/cudawarping/src/remap.cpp
+++ b/modules/cudawarping/src/remap.cpp
@ -54,7 +54,7 @@ namespace cv { namespace cuda { namespace device
    {
        template <typename T>
        void remap_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst,
-                       int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
+                       int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);
    }
 }}}

@ -62,8 +62,11 @@ void cv::cuda::remap(InputArray _src, OutputArray _dst, InputArray _xmap, InputA
 {
    using namespace cv::cuda::device::imgproc;

+    const bool hasRelativeFlag = ((interpolation & WARP_RELATIVE_MAP) != 0);
+    interpolation &= ~WARP_RELATIVE_MAP;
+
    typedef void (*func_t)(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation,
-        int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
+        int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);
    static const func_t funcs[6][4] =
    {
        {remap_gpu<uchar>      , 0 /*remap_gpu<uchar2>*/ , remap_gpu<uchar3>     , remap_gpu<uchar4>     },
@ -98,7 +101,7 @@ void cv::cuda::remap(InputArray _src, OutputArray _dst, InputArray _xmap, InputA
    src.locateROI(wholeSize, ofs);

    func(src, PtrStepSzb(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y, xmap, ymap,
-        dst, interpolation, borderMode, borderValueFloat.val, StreamAccessor::getStream(stream), deviceSupports(FEATURE_SET_COMPUTE_20));
+        dst, interpolation, borderMode, borderValueFloat.val, StreamAccessor::getStream(stream), deviceSupports(FEATURE_SET_COMPUTE_20), hasRelativeFlag);
 }

 #endif // HAVE_CUDA
--- a/modules/cudawarping/test/test_remap.cpp
+++ b/modules/cudawarping/test/test_remap.cpp
@ -204,5 +204,82 @@ INSTANTIATE_TEST_CASE_P(CUDA_Warping, RemapOutOfScope, testing::Combine(
        testing::Values(BorderType(cv::BORDER_CONSTANT)),
        WHOLE_SUBMAT));

+PARAM_TEST_CASE(RemapRelative, cv::cuda::DeviceInfo, MatType, Interpolation, BorderType)
+{
+    cv::cuda::DeviceInfo devInfo;
+    int type;
+    int interpolation;
+    int borderType;
+
+    cv::cuda::GpuMat gSrc;
+    cv::cuda::GpuMat gMapRelativeX32F;
+    cv::cuda::GpuMat gMapRelativeY32F;
+    cv::cuda::GpuMat gMapAbsoluteX32F;
+    cv::cuda::GpuMat gMapAbsoluteY32F;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        type = GET_PARAM(1);
+        interpolation = GET_PARAM(2);
+        borderType = GET_PARAM(3);
+
+        cv::cuda::setDevice(devInfo.deviceID());
+
+        const int nChannels = CV_MAT_CN(type);
+        const cv::Size size(127, 61);
+        cv::Mat data64FC1(1, size.area()*nChannels, CV_64FC1);
+        data64FC1.forEach<double>([&](double& pixel, const int* position) {pixel = static_cast<double>(position[1]);});
+
+        cv::Mat src;
+        data64FC1.reshape(nChannels, size.height).convertTo(src, type);
+
+        cv::Mat mapRelativeX32F(size, CV_32FC1);
+        mapRelativeX32F.setTo(cv::Scalar::all(-0.33));
+
+        cv::Mat mapRelativeY32F(size, CV_32FC1);
+        mapRelativeY32F.setTo(cv::Scalar::all(-0.33));
+
+        cv::Mat mapAbsoluteX32F = mapRelativeX32F.clone();
+        mapAbsoluteX32F.forEach<float>([&](float& pixel, const int* position) {
+            pixel += static_cast<float>(position[1]);
+        });
+
+        cv::Mat mapAbsoluteY32F = mapRelativeY32F.clone();
+        mapAbsoluteY32F.forEach<float>([&](float& pixel, const int* position) {
+            pixel += static_cast<float>(position[0]);
+        });
+
+        gSrc.upload(src);
+        gMapRelativeX32F.upload(mapRelativeX32F);
+        gMapRelativeY32F.upload(mapRelativeY32F);
+        gMapAbsoluteX32F.upload(mapAbsoluteX32F);
+        gMapAbsoluteY32F.upload(mapAbsoluteY32F);
+    }
+};
+CUDA_TEST_P(RemapRelative, RemapRelative_Validity)
+{
+    cv::cuda::GpuMat gDstAbsolute;
+    cv::cuda::remap(gSrc, gDstAbsolute, gMapAbsoluteX32F, gMapAbsoluteY32F, interpolation, borderType);
+    cv::cuda::GpuMat gDstRelative;
+    cv::cuda::remap(gSrc, gDstRelative, gMapRelativeX32F, gMapRelativeY32F, interpolation | WARP_RELATIVE_MAP, borderType);
+
+    cv::Mat dstAbsolute;
+    gDstAbsolute.download(dstAbsolute);
+    cv::Mat dstRelative;
+    gDstRelative.download(dstRelative);
+
+    EXPECT_MAT_NEAR(dstAbsolute, dstRelative, (dstAbsolute.depth() == CV_32F) ? 1e-3 : 1.0);
+}
+
+INSTANTIATE_TEST_CASE_P(CUDA_RemapRelative, RemapRelative, testing::Combine(
+        ALL_DEVICES,
+        testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
+                        MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
+                        MatType(CV_16SC1), MatType(CV_16SC3), MatType(CV_16SC4),
+                        MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
+        testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)),
+        testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_CONSTANT))));
+
 }} // namespace
 #endif // HAVE_CUDA