fixed gpu::filter2D border interpolation for CV_32FC1 type

added additional tests for gpu filters fixed gpu features2D tests
13 years ago · 059cef57e6
parent c1a6cb6221
commit 059cef57e6
16 changed files with 1509 additions and 1294 deletions
--- a/modules/gpu/include/opencv2/gpu/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu/gpu.hpp
@ -1661,7 +1661,7 @@ public:
    };
    //! Constructor
-    explicit ORB_GPU(int nFeatures = 500, float scaleFactor = 1.2f, int nLevels = 3, int edgeThreshold = 31,
+    explicit ORB_GPU(int nFeatures = 500, float scaleFactor = 1.2f, int nLevels = 8, int edgeThreshold = 31,
                     int firstLevel = 0, int WTA_K = 2, int scoreType = 0, int patchSize = 31);
    //! Compute the ORB features on an image
--- a/modules/gpu/src/cuda/imgproc.cu
+++ b/modules/gpu/src/cuda/imgproc.cu
@ -46,16 +46,16 @@
 #include "opencv2/gpu/device/saturate_cast.hpp"
 #include "opencv2/gpu/device/border_interpolate.hpp"
-namespace cv { namespace gpu { namespace device 
+namespace cv { namespace gpu { namespace device
 {
-    namespace imgproc 
+    namespace imgproc
    {
        /////////////////////////////////// MeanShiftfiltering ///////////////////////////////////////////////
        texture<uchar4, 2> tex_meanshift;
-        __device__ short2 do_mean_shift(int x0, int y0, unsigned char* out, 
+        __device__ short2 do_mean_shift(int x0, int y0, unsigned char* out,
-                                        size_t out_step, int cols, int rows, 
+                                        size_t out_step, int cols, int rows,
                                        int sp, int sr, int maxIter, float eps)
        {
            int isr2 = sr*sr;
@ -78,7 +78,7 @@ namespace cv { namespace gpu { namespace device
                {
                    int rowCount = 0;
                    for( int x = minx; x <= maxx; x++ )
-                    {                    
+                    {
                        uchar4 t = tex2D( tex_meanshift, x, y );
                        int norm2 = (t.x - c.x) * (t.x - c.x) + (t.y - c.y) * (t.y - c.y) + (t.z - c.z) * (t.z - c.z);
@ -128,16 +128,16 @@ namespace cv { namespace gpu { namespace device
                do_mean_shift(x0, y0, out, out_step, cols, rows, sp, sr, maxIter, eps);
        }
-        __global__ void meanshiftproc_kernel(unsigned char* outr, size_t outrstep, 
+        __global__ void meanshiftproc_kernel(unsigned char* outr, size_t outrstep,
-                                             unsigned char* outsp, size_t outspstep, 
+                                             unsigned char* outsp, size_t outspstep,
-                                             int cols, int rows, 
+                                             int cols, int rows,
                                             int sp, int sr, int maxIter, float eps)
        {
            int x0 = blockIdx.x * blockDim.x + threadIdx.x;
            int y0 = blockIdx.y * blockDim.y + threadIdx.y;
            if( x0 < cols && y0 < rows )
-            {            
+            {
                int basesp = (blockIdx.y * blockDim.y + threadIdx.y) * outspstep + (blockIdx.x * blockDim.x + threadIdx.x) * 2 * sizeof(short);
                *(short2*)(outsp + basesp) = do_mean_shift(x0, y0, outr, outrstep, cols, rows, sp, sr, maxIter, eps);
            }
@ -159,10 +159,10 @@ namespace cv { namespace gpu { namespace device
            if (stream == 0)
                cudaSafeCall( cudaDeviceSynchronize() );
-            //cudaSafeCall( cudaUnbindTexture( tex_meanshift ) );        
+            //cudaSafeCall( cudaUnbindTexture( tex_meanshift ) );
        }
-        void meanShiftProc_gpu(const DevMem2Db& src, DevMem2Db dstr, DevMem2Db dstsp, int sp, int sr, int maxIter, float eps, cudaStream_t stream) 
+        void meanShiftProc_gpu(const DevMem2Db& src, DevMem2Db dstr, DevMem2Db dstsp, int sp, int sr, int maxIter, float eps, cudaStream_t stream)
        {
            dim3 grid(1, 1, 1);
            dim3 threads(32, 8, 1);
@ -178,14 +178,14 @@ namespace cv { namespace gpu { namespace device
            if (stream == 0)
                cudaSafeCall( cudaDeviceSynchronize() );
-            //cudaSafeCall( cudaUnbindTexture( tex_meanshift ) );        
+            //cudaSafeCall( cudaUnbindTexture( tex_meanshift ) );
        }
        /////////////////////////////////// drawColorDisp ///////////////////////////////////////////////
        template <typename T>
        __device__ unsigned int cvtPixel(T d, int ndisp, float S = 1, float V = 1)
-        {        
+        {
            unsigned int H = ((ndisp-d) * 240)/ndisp;
            unsigned int hi = (H/60) % 6;
@ -195,7 +195,7 @@ namespace cv { namespace gpu { namespace device
            float t = V * (1 - (1 - f) * S);
            float3 res;
-            
+
            if (hi == 0) //R = V,	G = t,	B = p
            {
                res.x = p;
@ -208,15 +208,15 @@ namespace cv { namespace gpu { namespace device
                res.x = p;
                res.y = V;
                res.z = q;
-            }        
+            }
-            
+
            if (hi == 2) // R = p,	G = V,	B = t
            {
                res.x = t;
                res.y = V;
                res.z = p;
            }
-                
+
            if (hi == 3) // R = p,	G = q,	B = V
            {
                res.x = V;
@ -242,15 +242,15 @@ namespace cv { namespace gpu { namespace device
            const unsigned int r = (unsigned int)(::max(0.f, ::min(res.z, 1.f)) * 255.f);
            const unsigned int a = 255U;
-            return (a << 24) + (r << 16) + (g << 8) + b;    
+            return (a << 24) + (r << 16) + (g << 8) + b;
-        } 
+        }
        __global__ void drawColorDisp(uchar* disp, size_t disp_step, uchar* out_image, size_t out_step, int width, int height, int ndisp)
        {
            const int x = (blockIdx.x * blockDim.x + threadIdx.x) << 2;
            const int y = blockIdx.y * blockDim.y + threadIdx.y;
-            if(x < width && y < height) 
+            if(x < width && y < height)
            {
                uchar4 d4 = *(uchar4*)(disp + y * disp_step + x);
@ -259,7 +259,7 @@ namespace cv { namespace gpu { namespace device
                res.y = cvtPixel(d4.y, ndisp);
                res.z = cvtPixel(d4.z, ndisp);
                res.w = cvtPixel(d4.w, ndisp);
-                        
+
                uint4* line = (uint4*)(out_image + y * out_step);
                line[x >> 2] = res;
            }
@ -270,12 +270,12 @@ namespace cv { namespace gpu { namespace device
            const int x = (blockIdx.x * blockDim.x + threadIdx.x) << 1;
            const int y = blockIdx.y * blockDim.y + threadIdx.y;
-            if(x < width && y < height) 
+            if(x < width && y < height)
            {
                short2 d2 = *(short2*)(disp + y * disp_step + x);
                uint2 res;
-                res.x = cvtPixel(d2.x, ndisp);            
+                res.x = cvtPixel(d2.x, ndisp);
                res.y = cvtPixel(d2.y, ndisp);
                uint2* line = (uint2*)(out_image + y * out_step);
@ -290,12 +290,12 @@ namespace cv { namespace gpu { namespace device
            dim3 grid(1, 1, 1);
            grid.x = divUp(src.cols, threads.x << 2);
            grid.y = divUp(src.rows, threads.y);
-             
+
            drawColorDisp<<<grid, threads, 0, stream>>>(src.data, src.step, dst.data, dst.step, src.cols, src.rows, ndisp);
            cudaSafeCall( cudaGetLastError() );
            if (stream == 0)
-                cudaSafeCall( cudaDeviceSynchronize() ); 
+                cudaSafeCall( cudaDeviceSynchronize() );
        }
        void drawColorDisp_gpu(const DevMem2D_<short>& src, const DevMem2Db& dst, int ndisp, const cudaStream_t& stream)
@ -304,10 +304,10 @@ namespace cv { namespace gpu { namespace device
            dim3 grid(1, 1, 1);
            grid.x = divUp(src.cols, threads.x << 1);
            grid.y = divUp(src.rows, threads.y);
-             
+
            drawColorDisp<<<grid, threads, 0, stream>>>(src.data, src.step / sizeof(short), dst.data, dst.step, src.cols, src.rows, ndisp);
            cudaSafeCall( cudaGetLastError() );
-            
+
            if (stream == 0)
                cudaSafeCall( cudaDeviceSynchronize() );
        }
@ -318,7 +318,7 @@ namespace cv { namespace gpu { namespace device
        template <typename T>
        __global__ void reprojectImageTo3D(const T* disp, size_t disp_step, float* xyzw, size_t xyzw_step, int rows, int cols)
-        {        
+        {
            const int x = blockIdx.x * blockDim.x + threadIdx.x;
            const int y = blockIdx.y * blockDim.y + threadIdx.y;
@ -328,7 +328,7 @@ namespace cv { namespace gpu { namespace device
                float qx = cq[1] * y + cq[3], qy = cq[5] * y + cq[7];
                float qz = cq[9] * y + cq[11], qw = cq[13] * y + cq[15];
-                qx += x * cq[0]; 
+                qx += x * cq[0];
                qy += x * cq[4];
                qz += x * cq[8];
                qw += x * cq[12];
@ -457,7 +457,7 @@ namespace cv { namespace gpu { namespace device
            bindTexture(&harrisDxTex, Dx);
            bindTexture(&harrisDyTex, Dy);
-            switch (border_type) 
+            switch (border_type)
            {
            case BORDER_REFLECT101_GPU:
                cornerHarris_kernel<<<grid, block, 0, stream>>>(block_size, k, dst, BrdRowReflect101<void>(Dx.cols), BrdColReflect101<void>(Dx.rows));
@ -565,7 +565,7 @@ namespace cv { namespace gpu { namespace device
        {
            dim3 block(32, 8);
            dim3 grid(divUp(Dx.cols, block.x), divUp(Dx.rows, block.y));
-            
+
            bindTexture(&minEigenValDxTex, Dx);
            bindTexture(&minEigenValDyTex, Dy);
@ -630,10 +630,10 @@ namespace cv { namespace gpu { namespace device
        __global__ void mulSpectrumsKernel(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, DevMem2D_<cufftComplex> c)
        {
-            const int x = blockIdx.x * blockDim.x + threadIdx.x;    
+            const int x = blockIdx.x * blockDim.x + threadIdx.x;
-            const int y = blockIdx.y * blockDim.y + threadIdx.y;    
+            const int y = blockIdx.y * blockDim.y + threadIdx.y;
-            if (x < c.cols && y < c.rows) 
+            if (x < c.cols && y < c.rows)
            {
                c.ptr(y)[x] = cuCmulf(a.ptr(y)[x], b.ptr(y)[x]);
            }
@ -658,10 +658,10 @@ namespace cv { namespace gpu { namespace device
        __global__ void mulSpectrumsKernel_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, DevMem2D_<cufftComplex> c)
        {
-            const int x = blockIdx.x * blockDim.x + threadIdx.x;    
+            const int x = blockIdx.x * blockDim.x + threadIdx.x;
-            const int y = blockIdx.y * blockDim.y + threadIdx.y;    
+            const int y = blockIdx.y * blockDim.y + threadIdx.y;
-            if (x < c.cols && y < c.rows) 
+            if (x < c.cols && y < c.rows)
            {
                c.ptr(y)[x] = cuCmulf(a.ptr(y)[x], cuConjf(b.ptr(y)[x]));
            }
@ -689,7 +689,7 @@ namespace cv { namespace gpu { namespace device
            const int x = blockIdx.x * blockDim.x + threadIdx.x;
            const int y = blockIdx.y * blockDim.y + threadIdx.y;
-            if (x < c.cols && y < c.rows) 
+            if (x < c.cols && y < c.rows)
            {
                cufftComplex v = cuCmulf(a.ptr(y)[x], b.ptr(y)[x]);
                c.ptr(y)[x] = make_cuFloatComplex(cuCrealf(v) * scale, cuCimagf(v) * scale);
@ -718,7 +718,7 @@ namespace cv { namespace gpu { namespace device
            const int x = blockIdx.x * blockDim.x + threadIdx.x;
            const int y = blockIdx.y * blockDim.y + threadIdx.y;
-            if (x < c.cols && y < c.rows) 
+            if (x < c.cols && y < c.rows)
            {
                cufftComplex v = cuCmulf(a.ptr(y)[x], cuConjf(b.ptr(y)[x]));
                c.ptr(y)[x] = make_cuFloatComplex(cuCrealf(v) * scale, cuCimagf(v) * scale);
@ -736,7 +736,7 @@ namespace cv { namespace gpu { namespace device
            if (stream == 0)
                cudaSafeCall( cudaDeviceSynchronize() );
-        }    
+        }
        //////////////////////////////////////////////////////////////////////////
        // buildWarpMaps
@ -842,7 +842,7 @@ namespace cv { namespace gpu { namespace device
        void buildWarpPlaneMaps(int tl_u, int tl_v, DevMem2Df map_x, DevMem2Df map_y,
-                                const float k_rinv[9], const float r_kinv[9], const float t[3], 
+                                const float k_rinv[9], const float r_kinv[9], const float t[3],
                                float scale, cudaStream_t stream)
        {
            cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ck_rinv, k_rinv, 9*sizeof(float)));
@ -911,27 +911,28 @@ namespace cv { namespace gpu { namespace device
        __constant__ float c_filter2DKernel[FILTER2D_MAX_KERNEL_SIZE * FILTER2D_MAX_KERNEL_SIZE];
-        texture<float, cudaTextureType2D, cudaReadModeElementType> filter2DTex(0, cudaFilterModePoint, cudaAddressModeBorder);
+        texture<float, cudaTextureType2D, cudaReadModeElementType> filter2DTex(0, cudaFilterModePoint, cudaAddressModeClamp);
-        __global__ void filter2D(int ofsX, int ofsY, DevMem2Df dst, const int kWidth, const int kHeight, const int anchorX, const int anchorY)
+        __global__ void filter2D(int ofsX, int ofsY, PtrStepf dst, const int kWidth, const int kHeight, const int anchorX, const int anchorY, const BrdReflect101<float> brd)
        {
            const int x = blockIdx.x * blockDim.x + threadIdx.x;
            const int y = blockIdx.y * blockDim.y + threadIdx.y;
-            if (x >= dst.cols || y >= dst.rows)
+            if (x > brd.last_col || y > brd.last_row)
                return;
            float res = 0;
            const int baseX = ofsX + x - anchorX;
            const int baseY = ofsY + y - anchorY;
            int kInd = 0;
            for (int i = 0; i < kHeight; ++i)
            {
                for (int j = 0; j < kWidth; ++j)
-                    res += tex2D(filter2DTex, baseX + j, baseY + i) * c_filter2DKernel[kInd++];
+                {
                    const int srcX = ofsX + brd.idx_col(x - anchorX + j);
                    const int srcY = ofsY + brd.idx_row(y - anchorY + i);
                    res += tex2D(filter2DTex, srcX, srcY) * c_filter2DKernel[kInd++];
                }
            }
            dst.ptr(y)[x] = res;
@ -946,7 +947,9 @@ namespace cv { namespace gpu { namespace device
            bindTexture(&filter2DTex, src);
-            filter2D<<<grid, block, 0, stream>>>(ofsX, ofsY, dst, kWidth, kHeight, anchorX, anchorY);
+            BrdReflect101<float> brd(dst.rows, dst.cols);
            filter2D<<<grid, block, 0, stream>>>(ofsX, ofsY, dst, kWidth, kHeight, anchorX, anchorY, brd);
            cudaSafeCall(cudaGetLastError());
            if (stream == 0)
--- a/modules/gpu/src/filtering.cpp
+++ b/modules/gpu/src/filtering.cpp
@ -119,7 +119,7 @@ namespace
    {
        int scale = nDivisor && (kernel.depth() == CV_32F || kernel.depth() == CV_64F) ? 256 : 1;
        if (nDivisor) *nDivisor = scale;
-        
+
        Mat temp(kernel.size(), type);
        kernel.convertTo(temp, type, scale);
        Mat cont_krnl = temp.reshape(1, 1);
@ -134,7 +134,7 @@ namespace
        }
        gpu_krnl.upload(cont_krnl);
-    } 
+    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////
@ -144,7 +144,7 @@ namespace
 {
    struct Filter2DEngine_GPU : public FilterEngine_GPU
    {
-        Filter2DEngine_GPU(const Ptr<BaseFilter_GPU>& filter2D_, int srcType_, int dstType_) : 
+        Filter2DEngine_GPU(const Ptr<BaseFilter_GPU>& filter2D_, int srcType_, int dstType_) :
            filter2D(filter2D_), srcType(srcType_), dstType(dstType_)
        {}
@ -189,9 +189,9 @@ namespace
 {
    struct SeparableFilterEngine_GPU : public FilterEngine_GPU
    {
-        SeparableFilterEngine_GPU(const Ptr<BaseRowFilter_GPU>& rowFilter_, const Ptr<BaseColumnFilter_GPU>& columnFilter_, 
+        SeparableFilterEngine_GPU(const Ptr<BaseRowFilter_GPU>& rowFilter_, const Ptr<BaseColumnFilter_GPU>& columnFilter_,
                                  int srcType_, int bufType_, int dstType_) :
-            rowFilter(rowFilter_), columnFilter(columnFilter_), 
+            rowFilter(rowFilter_), columnFilter(columnFilter_),
            srcType(srcType_), bufType(bufType_), dstType(dstType_)
        {
            ksize = Size(rowFilter->ksize, columnFilter->ksize);
@ -199,11 +199,11 @@ namespace
            pbuf = &buf;
        }
-        
+
-        SeparableFilterEngine_GPU(const Ptr<BaseRowFilter_GPU>& rowFilter_, const Ptr<BaseColumnFilter_GPU>& columnFilter_, 
+        SeparableFilterEngine_GPU(const Ptr<BaseRowFilter_GPU>& rowFilter_, const Ptr<BaseColumnFilter_GPU>& columnFilter_,
                                  int srcType_, int bufType_, int dstType_,
                                  GpuMat& buf_) :
-            rowFilter(rowFilter_), columnFilter(columnFilter_), 
+            rowFilter(rowFilter_), columnFilter(columnFilter_),
            srcType(srcType_), bufType(bufType_), dstType(dstType_)
        {
            ksize = Size(rowFilter->ksize, columnFilter->ksize);
@ -235,7 +235,7 @@ namespace
            GpuMat srcROI = src(roi);
            GpuMat dstROI = dst(roi);
            GpuMat bufROI = (*pbuf)(roi);
-            
+
            (*rowFilter)(srcROI, bufROI, stream);
            (*columnFilter)(bufROI, dstROI, stream);
        }
@ -253,13 +253,13 @@ namespace
    };
 }
-Ptr<FilterEngine_GPU> cv::gpu::createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>& rowFilter, 
+Ptr<FilterEngine_GPU> cv::gpu::createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>& rowFilter,
    const Ptr<BaseColumnFilter_GPU>& columnFilter, int srcType, int bufType, int dstType)
 {
    return Ptr<FilterEngine_GPU>(new SeparableFilterEngine_GPU(rowFilter, columnFilter, srcType, bufType, dstType));
 }
-Ptr<FilterEngine_GPU> cv::gpu::createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>& rowFilter, 
+Ptr<FilterEngine_GPU> cv::gpu::createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>& rowFilter,
    const Ptr<BaseColumnFilter_GPU>& columnFilter, int srcType, int bufType, int dstType, GpuMat& buf)
 {
    return Ptr<FilterEngine_GPU>(new SeparableFilterEngine_GPU(rowFilter, columnFilter, srcType, bufType, dstType, buf));
@ -284,7 +284,7 @@ namespace
            NppStreamHandler h(stream);
-            nppSafeCall( nppiSumWindowRow_8u32f_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), 
+            nppSafeCall( nppiSumWindowRow_8u32f_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step),
                dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz, ksize, anchor) );
            if (stream == 0)
@ -318,7 +318,7 @@ namespace
            NppStreamHandler h(stream);
-            nppSafeCall( nppiSumWindowColumn_8u32f_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), 
+            nppSafeCall( nppiSumWindowColumn_8u32f_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step),
                dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz, ksize, anchor) );
            if (stream == 0)
@ -341,7 +341,7 @@ Ptr<BaseColumnFilter_GPU> cv::gpu::getColumnSumFilter_GPU(int sumType, int dstTy
 namespace
 {
-    typedef NppStatus (*nppFilterBox_t)(const Npp8u * pSrc, Npp32s nSrcStep, Npp8u * pDst, Npp32s nDstStep, NppiSize oSizeROI, 
+    typedef NppStatus (*nppFilterBox_t)(const Npp8u * pSrc, Npp32s nSrcStep, Npp8u * pDst, Npp32s nDstStep, NppiSize oSizeROI,
        NppiSize oMaskSize, NppiPoint oAnchor);
    struct NPPBoxFilter : public BaseFilter_GPU
@ -363,8 +363,8 @@ namespace
            cudaStream_t stream = StreamAccessor::getStream(s);
            NppStreamHandler h(stream);
-            
+
-            nppSafeCall( func(src.ptr<Npp8u>(), static_cast<int>(src.step), 
+            nppSafeCall( func(src.ptr<Npp8u>(), static_cast<int>(src.step),
                dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, oKernelSize, oAnchor) );
            if (stream == 0)
@ -379,7 +379,7 @@ Ptr<BaseFilter_GPU> cv::gpu::getBoxFilter_GPU(int srcType, int dstType, const Si
 {
    static const nppFilterBox_t nppFilterBox_callers[] = {0, nppiFilterBox_8u_C1R, 0, 0, nppiFilterBox_8u_C4R};
-    CV_Assert((srcType == CV_8UC1 || srcType == CV_8UC4) && dstType == srcType); 
+    CV_Assert((srcType == CV_8UC1 || srcType == CV_8UC4) && dstType == srcType);
    normalizeAnchor(anchor, ksize);
@ -413,7 +413,7 @@ namespace
    struct NPPMorphFilter : public BaseFilter_GPU
    {
-        NPPMorphFilter(const Size& ksize_, const Point& anchor_, const GpuMat& kernel_, nppMorfFilter_t func_) : 
+        NPPMorphFilter(const Size& ksize_, const Point& anchor_, const GpuMat& kernel_, nppMorfFilter_t func_) :
            BaseFilter_GPU(ksize_, anchor_), kernel(kernel_), func(func_) {}
        virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null())
@ -432,7 +432,7 @@ namespace
            NppStreamHandler h(stream);
-            nppSafeCall( func(src.ptr<Npp8u>(), static_cast<int>(src.step), 
+            nppSafeCall( func(src.ptr<Npp8u>(), static_cast<int>(src.step),
                dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, kernel.ptr<Npp8u>(), oKernelSize, oAnchor) );
            if (stream == 0)
@ -446,19 +446,19 @@ namespace
 Ptr<BaseFilter_GPU> cv::gpu::getMorphologyFilter_GPU(int op, int type, const Mat& kernel, const Size& ksize, Point anchor)
 {
-    static const nppMorfFilter_t nppMorfFilter_callers[2][5] = 
+    static const nppMorfFilter_t nppMorfFilter_callers[2][5] =
    {
        {0, nppiErode_8u_C1R, 0, 0, nppiErode_8u_C4R },
        {0, nppiDilate_8u_C1R, 0, 0, nppiDilate_8u_C4R }
    };
- 
+
-    CV_Assert(op == MORPH_ERODE || op == MORPH_DILATE);   
+    CV_Assert(op == MORPH_ERODE || op == MORPH_DILATE);
-    CV_Assert(type == CV_8UC1 || type == CV_8UC4); 
+    CV_Assert(type == CV_8UC1 || type == CV_8UC4);
-        
+
    GpuMat gpu_krnl;
    normalizeKernel(kernel, gpu_krnl);
    normalizeAnchor(anchor, ksize);
-    
+
    return Ptr<BaseFilter_GPU>(new NPPMorphFilter(ksize, anchor, gpu_krnl, nppMorfFilter_callers[op][CV_MAT_CN(type)]));
 }
@ -466,13 +466,13 @@ namespace
 {
    struct MorphologyFilterEngine_GPU : public FilterEngine_GPU
    {
-        MorphologyFilterEngine_GPU(const Ptr<BaseFilter_GPU>& filter2D_, int type_, int iters_) : 
+        MorphologyFilterEngine_GPU(const Ptr<BaseFilter_GPU>& filter2D_, int type_, int iters_) :
            filter2D(filter2D_), type(type_), iters(iters_)
        {
            pbuf = &buf;
        }
-        MorphologyFilterEngine_GPU(const Ptr<BaseFilter_GPU>& filter2D_, int type_, int iters_, GpuMat& buf_) : 
+        MorphologyFilterEngine_GPU(const Ptr<BaseFilter_GPU>& filter2D_, int type_, int iters_, GpuMat& buf_) :
            filter2D(filter2D_), type(type_), iters(iters_)
        {
            pbuf = &buf_;
@ -576,7 +576,7 @@ namespace
        else if (iterations > 1 && countNonZero(_kernel) == _kernel.rows * _kernel.cols)
        {
            anchor = Point(anchor.x * iterations, anchor.y * iterations);
-            kernel = getStructuringElement(MORPH_RECT, Size(ksize.width + iterations * (ksize.width - 1), 
+            kernel = getStructuringElement(MORPH_RECT, Size(ksize.width + iterations * (ksize.width - 1),
                ksize.height + iterations * (ksize.height - 1)), anchor);
            iterations = 1;
        }
@ -659,7 +659,7 @@ void cv::gpu::morphologyEx(const GpuMat& src, GpuMat& dst, int op, const Mat& ke
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 // Linear Filter
-namespace cv { namespace gpu { namespace device 
+namespace cv { namespace gpu { namespace device
 {
    namespace imgproc
    {
@ -669,12 +669,12 @@ namespace cv { namespace gpu { namespace device
 namespace
 {
-    typedef NppStatus (*nppFilter2D_t)(const Npp8u * pSrc, Npp32s nSrcStep, Npp8u * pDst, Npp32s nDstStep, NppiSize oSizeROI, 
+    typedef NppStatus (*nppFilter2D_t)(const Npp8u * pSrc, Npp32s nSrcStep, Npp8u * pDst, Npp32s nDstStep, NppiSize oSizeROI,
        const Npp32s * pKernel, NppiSize oKernelSize, NppiPoint oAnchor, Npp32s nDivisor);
    struct NPPLinearFilter : public BaseFilter_GPU
    {
-        NPPLinearFilter(const Size& ksize_, const Point& anchor_, const GpuMat& kernel_, Npp32s nDivisor_, nppFilter2D_t func_) : 
+        NPPLinearFilter(const Size& ksize_, const Point& anchor_, const GpuMat& kernel_, Npp32s nDivisor_, nppFilter2D_t func_) :
            BaseFilter_GPU(ksize_, anchor_), kernel(kernel_), nDivisor(nDivisor_), func(func_) {}
        virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null())
@ -692,8 +692,8 @@ namespace
            cudaStream_t stream = StreamAccessor::getStream(s);
            NppStreamHandler h(stream);
-                                  
+
-            nppSafeCall( func(src.ptr<Npp8u>(), static_cast<int>(src.step), dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, 
+            nppSafeCall( func(src.ptr<Npp8u>(), static_cast<int>(src.step), dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz,
                kernel.ptr<Npp32s>(), oKernelSize, oAnchor, nDivisor) );
            if (stream == 0)
@ -707,9 +707,9 @@ namespace
    struct GpuLinearFilter : public BaseFilter_GPU
    {
-        GpuLinearFilter(Size ksize_, Point anchor_, const GpuMat& kernel_) : 
+        GpuLinearFilter(Size ksize_, Point anchor_, const GpuMat& kernel_) :
            BaseFilter_GPU(ksize_, anchor_), kernel(kernel_) {}
-            
+
        virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null())
        {
            using namespace cv::gpu::device::imgproc;
@ -745,7 +745,7 @@ Ptr<BaseFilter_GPU> cv::gpu::getLinearFilter_GPU(int srcType, int dstType, const
    else
    {
        static const nppFilter2D_t cppFilter2D_callers[] = {0, nppiFilter_8u_C1R, 0, 0, nppiFilter_8u_C4R};
-    
+
        GpuMat gpu_krnl;
        int nDivisor;
        normalizeKernel(kernel, gpu_krnl, CV_32S, &nDivisor, true);
@ -753,8 +753,8 @@ Ptr<BaseFilter_GPU> cv::gpu::getLinearFilter_GPU(int srcType, int dstType, const
        normalizeAnchor(anchor, ksize);
        return Ptr<BaseFilter_GPU>(new NPPLinearFilter(ksize, anchor, gpu_krnl, nDivisor, cppFilter2D_callers[CV_MAT_CN(srcType)]));
-    }    
+    }
-}    
+}
 Ptr<FilterEngine_GPU> cv::gpu::createLinearFilter_GPU(int srcType, int dstType, const Mat& kernel, const Point& anchor)
 {
@ -780,7 +780,7 @@ void cv::gpu::filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& ke
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 // Separable Linear Filter
-namespace cv { namespace gpu { namespace device 
+namespace cv { namespace gpu { namespace device
 {
    namespace row_filter
    {
@ -797,14 +797,14 @@ namespace cv { namespace gpu { namespace device
 namespace
 {
-    typedef NppStatus (*nppFilter1D_t)(const Npp8u * pSrc, Npp32s nSrcStep, Npp8u * pDst, Npp32s nDstStep, NppiSize oROI, 
+    typedef NppStatus (*nppFilter1D_t)(const Npp8u * pSrc, Npp32s nSrcStep, Npp8u * pDst, Npp32s nDstStep, NppiSize oROI,
        const Npp32s * pKernel, Npp32s nMaskSize, Npp32s nAnchor, Npp32s nDivisor);
    typedef void (*gpuFilter1D_t)(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
    struct NppLinearRowFilter : public BaseRowFilter_GPU
    {
-        NppLinearRowFilter(int ksize_, int anchor_, const GpuMat& kernel_, Npp32s nDivisor_, nppFilter1D_t func_) : 
+        NppLinearRowFilter(int ksize_, int anchor_, const GpuMat& kernel_, Npp32s nDivisor_, nppFilter1D_t func_) :
            BaseRowFilter_GPU(ksize_, anchor_), kernel(kernel_), nDivisor(nDivisor_), func(func_) {}
        virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null())
@ -817,7 +817,7 @@ namespace
            NppStreamHandler h(stream);
-            nppSafeCall( func(src.ptr<Npp8u>(), static_cast<int>(src.step), dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, 
+            nppSafeCall( func(src.ptr<Npp8u>(), static_cast<int>(src.step), dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz,
                kernel.ptr<Npp32s>(), ksize, anchor, nDivisor) );
            if (stream == 0)
@ -831,7 +831,7 @@ namespace
    struct GpuLinearRowFilter : public BaseRowFilter_GPU
    {
-        GpuLinearRowFilter(int ksize_, int anchor_, const Mat& kernel_, gpuFilter1D_t func_, int brd_type_) : 
+        GpuLinearRowFilter(int ksize_, int anchor_, const Mat& kernel_, gpuFilter1D_t func_, int brd_type_) :
            BaseRowFilter_GPU(ksize_, anchor_), kernel(kernel_), func(func_), brd_type(brd_type_) {}
        virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null())
@ -852,7 +852,7 @@ Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType,
    using namespace ::cv::gpu::device::row_filter;
    static const nppFilter1D_t nppFilter1D_callers[] = {0, nppiFilterRow_8u_C1R, 0, 0, nppiFilterRow_8u_C4R};
-    
+
    if ((bufType == srcType) && (srcType == CV_8UC1 || srcType == CV_8UC4))
    {
        CV_Assert(borderType == BORDER_CONSTANT);
@ -867,7 +867,7 @@ Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType,
        return Ptr<BaseRowFilter_GPU>(new NppLinearRowFilter(ksize, anchor, gpu_row_krnl, nDivisor,
            nppFilter1D_callers[CV_MAT_CN(srcType)]));
    }
-    
+
    CV_Assert(borderType == BORDER_REFLECT101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT || borderType == BORDER_REFLECT || borderType == BORDER_WRAP);
    int gpuBorderType;
    CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));
@ -914,7 +914,7 @@ namespace
 {
    struct NppLinearColumnFilter : public BaseColumnFilter_GPU
    {
-        NppLinearColumnFilter(int ksize_, int anchor_, const GpuMat& kernel_, Npp32s nDivisor_, nppFilter1D_t func_) : 
+        NppLinearColumnFilter(int ksize_, int anchor_, const GpuMat& kernel_, Npp32s nDivisor_, nppFilter1D_t func_) :
            BaseColumnFilter_GPU(ksize_, anchor_), kernel(kernel_), nDivisor(nDivisor_), func(func_) {}
        virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null())
@ -927,7 +927,7 @@ namespace
            NppStreamHandler h(stream);
-            nppSafeCall( func(src.ptr<Npp8u>(), static_cast<int>(src.step), dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, 
+            nppSafeCall( func(src.ptr<Npp8u>(), static_cast<int>(src.step), dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz,
                kernel.ptr<Npp32s>(), ksize, anchor, nDivisor) );
            if (stream == 0)
@ -941,7 +941,7 @@ namespace
    struct GpuLinearColumnFilter : public BaseColumnFilter_GPU
    {
-        GpuLinearColumnFilter(int ksize_, int anchor_, const Mat& kernel_, gpuFilter1D_t func_, int brd_type_) : 
+        GpuLinearColumnFilter(int ksize_, int anchor_, const Mat& kernel_, gpuFilter1D_t func_, int brd_type_) :
            BaseColumnFilter_GPU(ksize_, anchor_), kernel(kernel_), func(func_), brd_type(brd_type_) {}
        virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null())
@ -963,7 +963,7 @@ Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int bufType, int ds
    using namespace ::cv::gpu::device::column_filter;
    static const nppFilter1D_t nppFilter1D_callers[] = {0, nppiFilterColumn_8u_C1R, 0, 0, nppiFilterColumn_8u_C4R};
-    
+
    if ((bufType == dstType) && (bufType == CV_8UC1 || bufType == CV_8UC4))
    {
        CV_Assert(borderType == BORDER_CONSTANT);
@ -975,14 +975,14 @@ Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int bufType, int ds
        int ksize = gpu_col_krnl.cols;
        normalizeAnchor(anchor, ksize);
-        return Ptr<BaseColumnFilter_GPU>(new NppLinearColumnFilter(ksize, anchor, gpu_col_krnl, nDivisor, 
+        return Ptr<BaseColumnFilter_GPU>(new NppLinearColumnFilter(ksize, anchor, gpu_col_krnl, nDivisor,
            nppFilter1D_callers[CV_MAT_CN(bufType)]));
    }
-    
+
    CV_Assert(borderType == BORDER_REFLECT101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT || borderType == BORDER_REFLECT || borderType == BORDER_WRAP);
    int gpuBorderType;
    CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));
-   
+
    CV_Assert(dstType == CV_8UC1 || dstType == CV_8UC4 || dstType == CV_16SC3 || dstType == CV_32SC1 || dstType == CV_32FC1);
    CV_Assert(CV_MAT_DEPTH(bufType) == CV_32F && CV_MAT_CN(dstType) == CV_MAT_CN(bufType));
@ -1021,7 +1021,7 @@ Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int bufType, int ds
    return Ptr<BaseColumnFilter_GPU>(new GpuLinearColumnFilter(ksize, anchor, cont_krnl, func, gpuBorderType));
 }
-Ptr<FilterEngine_GPU> cv::gpu::createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat& rowKernel, const Mat& columnKernel, 
+Ptr<FilterEngine_GPU> cv::gpu::createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat& rowKernel, const Mat& columnKernel,
    const Point& anchor, int rowBorderType, int columnBorderType)
 {
    if (columnBorderType < 0)
@ -1037,7 +1037,7 @@ Ptr<FilterEngine_GPU> cv::gpu::createSeparableLinearFilter_GPU(int srcType, int
    return createSeparableFilter_GPU(rowFilter, columnFilter, srcType, bufType, dstType);
 }
-Ptr<FilterEngine_GPU> cv::gpu::createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat& rowKernel, const Mat& columnKernel, GpuMat& buf, 
+Ptr<FilterEngine_GPU> cv::gpu::createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat& rowKernel, const Mat& columnKernel, GpuMat& buf,
    const Point& anchor, int rowBorderType, int columnBorderType)
 {
    if (columnBorderType < 0)
@ -1053,7 +1053,7 @@ Ptr<FilterEngine_GPU> cv::gpu::createSeparableLinearFilter_GPU(int srcType, int
    return createSeparableFilter_GPU(rowFilter, columnFilter, srcType, bufType, dstType, buf);
 }
-void cv::gpu::sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY, 
+void cv::gpu::sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY,
                          Point anchor, int rowBorderType, int columnBorderType)
 {
    if( ddepth < 0 )
@ -1065,7 +1065,7 @@ void cv::gpu::sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat&
    f->apply(src, dst, Rect(0, 0, src.cols, src.rows));
 }
-void cv::gpu::sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY, GpuMat& buf, 
+void cv::gpu::sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY, GpuMat& buf,
                          Point anchor, int rowBorderType, int columnBorderType,
                          Stream& stream)
 {
@ -1115,7 +1115,7 @@ void cv::gpu::Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy,
        else
            ky *= scale;
    }
-    
+
    sepFilter2D(src, dst, ddepth, kx, ky, buf, Point(-1,-1), rowBorderType, columnBorderType, stream);
 }
@ -1155,7 +1155,7 @@ void cv::gpu::Laplacian(const GpuMat& src, GpuMat& dst, int ddepth, int ksize, d
    Mat kernel(3, 3, CV_32S, (void*)K[ksize == 3]);
    if (scale != 1)
        kernel *= scale;
-    
+
    filter2D(src, dst, ddepth, kernel, Point(-1,-1), stream);
 }
@ -1163,7 +1163,7 @@ void cv::gpu::Laplacian(const GpuMat& src, GpuMat& dst, int ddepth, int ksize, d
 // Gaussian Filter
 Ptr<FilterEngine_GPU> cv::gpu::createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2, int rowBorderType, int columnBorderType)
-{        
+{
    int depth = CV_MAT_DEPTH(type);
    if (sigma2 <= 0)
@ -1191,7 +1191,7 @@ Ptr<FilterEngine_GPU> cv::gpu::createGaussianFilter_GPU(int type, Size ksize, do
 }
 Ptr<FilterEngine_GPU> cv::gpu::createGaussianFilter_GPU(int type, Size ksize, GpuMat& buf, double sigma1, double sigma2, int rowBorderType, int columnBorderType)
-{        
+{
    int depth = CV_MAT_DEPTH(type);
    if (sigma2 <= 0)
@ -1227,7 +1227,7 @@ void cv::gpu::GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, double si
    }
    dst.create(src.size(), src.type());
-    
+
    Ptr<FilterEngine_GPU> f = createGaussianFilter_GPU(src.type(), ksize, sigma1, sigma2, rowBorderType, columnBorderType);
    f->apply(src, dst, Rect(0, 0, src.cols, src.rows));
 }
@ -1241,7 +1241,7 @@ void cv::gpu::GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, GpuMat& b
    }
    dst.create(src.size(), src.type());
-    
+
    Ptr<FilterEngine_GPU> f = createGaussianFilter_GPU(src.type(), ksize, buf, sigma1, sigma2, rowBorderType, columnBorderType);
    f->apply(src, dst, Rect(0, 0, src.cols, src.rows), stream);
 }
@ -1251,7 +1251,7 @@ void cv::gpu::GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, GpuMat& b
 namespace
 {
-    typedef NppStatus (*nppFilterRank_t)(const Npp8u * pSrc, Npp32s nSrcStep, Npp8u * pDst, Npp32s nDstStep, NppiSize oSizeROI, 
+    typedef NppStatus (*nppFilterRank_t)(const Npp8u * pSrc, Npp32s nSrcStep, Npp8u * pDst, Npp32s nDstStep, NppiSize oSizeROI,
        NppiSize oMaskSize, NppiPoint oAnchor);
    struct NPPRankFilter : public BaseFilter_GPU
@ -1273,7 +1273,7 @@ namespace
            cudaStream_t stream = StreamAccessor::getStream(s);
            NppStreamHandler h(stream);
-            
+
            nppSafeCall( func(src.ptr<Npp8u>(), static_cast<int>(src.step), dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, oKernelSize, oAnchor) );
            if (stream == 0)
@ -1288,7 +1288,7 @@ Ptr<BaseFilter_GPU> cv::gpu::getMaxFilter_GPU(int srcType, int dstType, const Si
 {
    static const nppFilterRank_t nppFilterRank_callers[] = {0, nppiFilterMax_8u_C1R, 0, 0, nppiFilterMax_8u_C4R};
-    CV_Assert((srcType == CV_8UC1 || srcType == CV_8UC4) && dstType == srcType); 
+    CV_Assert((srcType == CV_8UC1 || srcType == CV_8UC4) && dstType == srcType);
    normalizeAnchor(anchor, ksize);
@ -1299,7 +1299,7 @@ Ptr<BaseFilter_GPU> cv::gpu::getMinFilter_GPU(int srcType, int dstType, const Si
 {
    static const nppFilterRank_t nppFilterRank_callers[] = {0, nppiFilterMin_8u_C1R, 0, 0, nppiFilterMin_8u_C4R};
-    CV_Assert((srcType == CV_8UC1 || srcType == CV_8UC4) && dstType == srcType); 
+    CV_Assert((srcType == CV_8UC1 || srcType == CV_8UC4) && dstType == srcType);
    normalizeAnchor(anchor, ksize);
--- a/modules/gpu/src/surf.cpp
+++ b/modules/gpu/src/surf.cpp
@ -63,7 +63,7 @@ void cv::gpu::SURF_GPU::releaseMemory() { throw_nogpu(); }
 #else /* !defined (HAVE_CUDA) */
-namespace cv { namespace gpu { namespace device 
+namespace cv { namespace gpu { namespace device
 {
    namespace surf
    {
@ -79,13 +79,13 @@ namespace cv { namespace gpu { namespace device
        void icvFindMaximaInLayer_gpu(const PtrStepf& det, const PtrStepf& trace, int4* maxPosBuffer, unsigned int* maxCounter,
            int img_rows, int img_cols, int octave, bool use_mask, int nLayers);
-        void icvInterpolateKeypoint_gpu(const PtrStepf& det, const int4* maxPosBuffer, unsigned int maxCounter, 
+        void icvInterpolateKeypoint_gpu(const PtrStepf& det, const int4* maxPosBuffer, unsigned int maxCounter,
-            float* featureX, float* featureY, int* featureLaplacian, float* featureSize, float* featureHessian, 
+            float* featureX, float* featureY, int* featureLaplacian, float* featureSize, float* featureHessian,
            unsigned int* featureCounter);
        void icvCalcOrientation_gpu(const float* featureX, const float* featureY, const float* featureSize, float* featureDir, int nFeatures);
-        void compute_descriptors_gpu(const DevMem2Df& descriptors, 
+        void compute_descriptors_gpu(const DevMem2Df& descriptors,
            const float* featureX, const float* featureY, const float* featureSize, const float* featureDir, int nFeatures);
    }
 }}}
@ -108,7 +108,7 @@ namespace
        return (HAAR_SIZE0 + HAAR_SIZE_INC * layer) << octave;
    }
-    
+
    class SURF_GPU_Invoker
    {
    public:
@ -121,11 +121,11 @@ namespace
            CV_Assert(mask.empty() || (mask.size() == img.size() && mask.type() == CV_8UC1));
            CV_Assert(surf_.nOctaves > 0 && surf_.nOctaveLayers > 0);
            CV_Assert(TargetArchs::builtWith(GLOBAL_ATOMICS) && DeviceInfo().supports(GLOBAL_ATOMICS));
-                
+
            const int min_size = calcSize(surf_.nOctaves - 1, 0);
            CV_Assert(img_rows - min_size >= 0);
            CV_Assert(img_cols - min_size >= 0);
-            
+
            const int layer_rows = img_rows >> (surf_.nOctaves - 1);
            const int layer_cols = img_cols >> (surf_.nOctaves - 1);
            const int min_margin = ((calcSize((surf_.nOctaves - 1), 2) >> 1) >> (surf_.nOctaves - 1)) + 1;
@ -159,7 +159,7 @@ namespace
        {
            ensureSizeIsEnough(img_rows * (surf_.nOctaveLayers + 2), img_cols, CV_32FC1, surf_.det);
            ensureSizeIsEnough(img_rows * (surf_.nOctaveLayers + 2), img_cols, CV_32FC1, surf_.trace);
-            
+
            ensureSizeIsEnough(1, maxCandidates, CV_32SC4, surf_.maxPosBuffer);
            ensureSizeIsEnough(SURF_GPU::SF_FEATURE_STRIDE, maxFeatures, CV_32FC1, keypoints);
            keypoints.setTo(Scalar::all(0));
@ -182,7 +182,7 @@ namespace
                if (maxCounter > 0)
                {
-                    icvInterpolateKeypoint_gpu(surf_.det, surf_.maxPosBuffer.ptr<int4>(), maxCounter, 
+                    icvInterpolateKeypoint_gpu(surf_.det, surf_.maxPosBuffer.ptr<int4>(), maxCounter,
                        keypoints.ptr<float>(SURF_GPU::SF_X), keypoints.ptr<float>(SURF_GPU::SF_Y),
                        keypoints.ptr<int>(SURF_GPU::SF_LAPLACIAN), keypoints.ptr<float>(SURF_GPU::SF_SIZE),
                        keypoints.ptr<float>(SURF_GPU::SF_HESSIAN), counters.ptr<unsigned int>());
@ -238,7 +238,7 @@ namespace
 cv::gpu::SURF_GPU::SURF_GPU()
 {
    hessianThreshold = 100;
-    extended = 1;
+    extended = true;
    nOctaves = 4;
    nOctaveLayers = 2;
    keypointsRatio = 0.01f;
@ -323,9 +323,9 @@ void cv::gpu::SURF_GPU::downloadKeypoints(const GpuMat& keypointsGPU, vector<Key
    else
    {
        CV_Assert(keypointsGPU.type() == CV_32FC1 && keypointsGPU.rows == SF_FEATURE_STRIDE);
-        
+
        Mat keypointsCPU(keypointsGPU);
-        
+
        keypoints.resize(nFeatures);
        float* kp_x = keypointsCPU.ptr<float>(SF_X);
@ -373,13 +373,13 @@ void cv::gpu::SURF_GPU::operator()(const GpuMat& img, const GpuMat& mask, GpuMat
    }
 }
-void cv::gpu::SURF_GPU::operator()(const GpuMat& img, const GpuMat& mask, GpuMat& keypoints, GpuMat& descriptors, 
+void cv::gpu::SURF_GPU::operator()(const GpuMat& img, const GpuMat& mask, GpuMat& keypoints, GpuMat& descriptors,
                                   bool useProvidedKeypoints)
 {
    if (!img.empty())
    {
        SURF_GPU_Invoker surf(*this, img, mask);
-    
+
        if (!useProvidedKeypoints)
            surf.detectKeypoints(keypoints);
        else if (!upright)
@ -400,20 +400,20 @@ void cv::gpu::SURF_GPU::operator()(const GpuMat& img, const GpuMat& mask, vector
    downloadKeypoints(keypointsGPU, keypoints);
 }
-void cv::gpu::SURF_GPU::operator()(const GpuMat& img, const GpuMat& mask, vector<KeyPoint>& keypoints, 
+void cv::gpu::SURF_GPU::operator()(const GpuMat& img, const GpuMat& mask, vector<KeyPoint>& keypoints,
    GpuMat& descriptors, bool useProvidedKeypoints)
 {
    GpuMat keypointsGPU;
    if (useProvidedKeypoints)
-        uploadKeypoints(keypoints, keypointsGPU);    
+        uploadKeypoints(keypoints, keypointsGPU);
    (*this)(img, mask, keypointsGPU, descriptors, useProvidedKeypoints);
    downloadKeypoints(keypointsGPU, keypoints);
 }
-void cv::gpu::SURF_GPU::operator()(const GpuMat& img, const GpuMat& mask, vector<KeyPoint>& keypoints, 
+void cv::gpu::SURF_GPU::operator()(const GpuMat& img, const GpuMat& mask, vector<KeyPoint>& keypoints,
    vector<float>& descriptors, bool useProvidedKeypoints)
 {
    GpuMat descriptorsGPU;
@ -423,9 +423,9 @@ void cv::gpu::SURF_GPU::operator()(const GpuMat& img, const GpuMat& mask, vector
    downloadDescriptors(descriptorsGPU, descriptors);
 }
-void cv::gpu::SURF_GPU::releaseMemory() 
+void cv::gpu::SURF_GPU::releaseMemory()
 {
-    sum.release(); 
+    sum.release();
    mask1.release();
    maskSum.release();
    intBuffer.release();
--- a/modules/gpu/test/test_calib3d.cpp
+++ b/modules/gpu/test/test_calib3d.cpp
@ -41,20 +41,13 @@
 #include "precomp.hpp"
-#ifdef HAVE_CUDA
+namespace {
 using namespace cvtest;
 using namespace testing;
 //////////////////////////////////////////////////////////////////////////
-// BlockMatching
+// StereoBM
-struct StereoBlockMatching : TestWithParam<cv::gpu::DeviceInfo>
+struct StereoBM : testing::TestWithParam<cv::gpu::DeviceInfo>
 {
    cv::Mat img_l;
    cv::Mat img_r;
    cv::Mat img_template;
    cv::gpu::DeviceInfo devInfo;
    virtual void SetUp()
@ -62,44 +55,34 @@ struct StereoBlockMatching : TestWithParam<cv::gpu::DeviceInfo>
        devInfo = GetParam();
        cv::gpu::setDevice(devInfo.deviceID());
        img_l = readImage("stereobm/aloe-L.png", CV_LOAD_IMAGE_GRAYSCALE);
        img_r = readImage("stereobm/aloe-R.png", CV_LOAD_IMAGE_GRAYSCALE);
        img_template = readImage("stereobm/aloe-disp.png", CV_LOAD_IMAGE_GRAYSCALE);
        ASSERT_FALSE(img_l.empty());
        ASSERT_FALSE(img_r.empty());
        ASSERT_FALSE(img_template.empty());
    }
 };
-TEST_P(StereoBlockMatching, Regression)
+TEST_P(StereoBM, Regression)
 {
-    cv::Mat disp;
+    cv::Mat left_image  = readImage("stereobm/aloe-L.png", cv::IMREAD_GRAYSCALE);
-
+    cv::Mat right_image = readImage("stereobm/aloe-R.png", cv::IMREAD_GRAYSCALE);
-    cv::gpu::GpuMat dev_disp;
+    cv::Mat disp_gold   = readImage("stereobm/aloe-disp.png", cv::IMREAD_GRAYSCALE);
    cv::gpu::StereoBM_GPU bm(0, 128, 19);
-    bm(cv::gpu::GpuMat(img_l), cv::gpu::GpuMat(img_r), dev_disp);
+    ASSERT_FALSE(left_image.empty());
    ASSERT_FALSE(right_image.empty());
    ASSERT_FALSE(disp_gold.empty());
-    dev_disp.download(disp);
+    cv::gpu::StereoBM_GPU bm(0, 128, 19);
    cv::gpu::GpuMat disp;
-    disp.convertTo(disp, img_template.type());
+    bm(loadMat(left_image), loadMat(right_image), disp);
-    EXPECT_MAT_NEAR(img_template, disp, 0.0);
+    EXPECT_MAT_NEAR(disp_gold, disp, 0.0);
 }
-INSTANTIATE_TEST_CASE_P(Calib3D, StereoBlockMatching, ALL_DEVICES);
+INSTANTIATE_TEST_CASE_P(GPU_Calib3D, StereoBM, ALL_DEVICES);
 //////////////////////////////////////////////////////////////////////////
-// BeliefPropagation
+// StereoBeliefPropagation
-struct StereoBeliefPropagation : TestWithParam<cv::gpu::DeviceInfo>
+struct StereoBeliefPropagation : testing::TestWithParam<cv::gpu::DeviceInfo>
 {
    cv::Mat img_l;
    cv::Mat img_r;
    cv::Mat img_template;
    cv::gpu::DeviceInfo devInfo;
    virtual void SetUp()
@ -107,44 +90,37 @@ struct StereoBeliefPropagation : TestWithParam<cv::gpu::DeviceInfo>
        devInfo = GetParam();
        cv::gpu::setDevice(devInfo.deviceID());
        img_l = readImage("stereobp/aloe-L.png");
        img_r = readImage("stereobp/aloe-R.png");
        img_template = readImage("stereobp/aloe-disp.png", CV_LOAD_IMAGE_GRAYSCALE);
        ASSERT_FALSE(img_l.empty());
        ASSERT_FALSE(img_r.empty());
        ASSERT_FALSE(img_template.empty());
    }
 };
 TEST_P(StereoBeliefPropagation, Regression)
 {
-    cv::Mat disp;
+    cv::Mat left_image  = readImage("stereobp/aloe-L.png");
    cv::Mat right_image = readImage("stereobp/aloe-R.png");
    cv::Mat disp_gold   = readImage("stereobp/aloe-disp.png", cv::IMREAD_GRAYSCALE);
-    cv::gpu::GpuMat dev_disp;
+    ASSERT_FALSE(left_image.empty());
-    cv::gpu::StereoBeliefPropagation bpm(64, 8, 2, 25, 0.1f, 15, 1, CV_16S);
+    ASSERT_FALSE(right_image.empty());
    ASSERT_FALSE(disp_gold.empty());
-    bpm(cv::gpu::GpuMat(img_l), cv::gpu::GpuMat(img_r), dev_disp);
+    cv::gpu::StereoBeliefPropagation bp(64, 8, 2, 25, 0.1f, 15, 1, CV_16S);
    cv::gpu::GpuMat disp;
-    dev_disp.download(disp);
+    bp(loadMat(left_image), loadMat(right_image), disp);
-    disp.convertTo(disp, img_template.type());
+    cv::Mat h_disp(disp);
    h_disp.convertTo(h_disp, disp_gold.depth());
-    EXPECT_MAT_NEAR(img_template, disp, 0.0);
+    EXPECT_MAT_NEAR(disp_gold, h_disp, 0.0);
 }
-INSTANTIATE_TEST_CASE_P(Calib3D, StereoBeliefPropagation, ALL_DEVICES);
+INSTANTIATE_TEST_CASE_P(GPU_Calib3D, StereoBeliefPropagation, ALL_DEVICES);
 //////////////////////////////////////////////////////////////////////////
-// ConstantSpaceBP
+// StereoConstantSpaceBP
-struct StereoConstantSpaceBP : TestWithParam<cv::gpu::DeviceInfo>
+struct StereoConstantSpaceBP : testing::TestWithParam<cv::gpu::DeviceInfo>
 {
    cv::Mat img_l;
    cv::Mat img_r;
    cv::Mat img_template;
    cv::gpu::DeviceInfo devInfo;
    virtual void SetUp()
@ -152,207 +128,177 @@ struct StereoConstantSpaceBP : TestWithParam<cv::gpu::DeviceInfo>
        devInfo = GetParam();
        cv::gpu::setDevice(devInfo.deviceID());
        img_l = readImage("csstereobp/aloe-L.png");
        img_r = readImage("csstereobp/aloe-R.png");
        if (supportFeature(devInfo, cv::gpu::FEATURE_SET_COMPUTE_20))
            img_template = readImage("csstereobp/aloe-disp.png", CV_LOAD_IMAGE_GRAYSCALE);
        else
            img_template = readImage("csstereobp/aloe-disp_CC1X.png", CV_LOAD_IMAGE_GRAYSCALE);
        ASSERT_FALSE(img_l.empty());
        ASSERT_FALSE(img_r.empty());
        ASSERT_FALSE(img_template.empty());
    }
 };
 TEST_P(StereoConstantSpaceBP, Regression)
 {
-    cv::Mat disp;
+    cv::Mat left_image  = readImage("csstereobp/aloe-L.png");
    cv::Mat right_image = readImage("csstereobp/aloe-R.png");
    cv::Mat disp_gold;
-    cv::gpu::GpuMat dev_disp;
+    if (supportFeature(devInfo, cv::gpu::FEATURE_SET_COMPUTE_20))
-    cv::gpu::StereoConstantSpaceBP bpm(128, 16, 4, 4);
+        disp_gold = readImage("csstereobp/aloe-disp.png", cv::IMREAD_GRAYSCALE);
    else
        disp_gold = readImage("csstereobp/aloe-disp_CC1X.png", cv::IMREAD_GRAYSCALE);
-    bpm(cv::gpu::GpuMat(img_l), cv::gpu::GpuMat(img_r), dev_disp);
+    ASSERT_FALSE(left_image.empty());
    ASSERT_FALSE(right_image.empty());
    ASSERT_FALSE(disp_gold.empty());
-    dev_disp.download(disp);
+    cv::gpu::StereoConstantSpaceBP csbp(128, 16, 4, 4);
    cv::gpu::GpuMat disp;
-    disp.convertTo(disp, img_template.type());
+    csbp(loadMat(left_image), loadMat(right_image), disp);
-    EXPECT_MAT_NEAR(img_template, disp, 1.0);
+    cv::Mat h_disp(disp);
    h_disp.convertTo(h_disp, disp_gold.depth());
    EXPECT_MAT_NEAR(disp_gold, h_disp, 1.0);
 }
-INSTANTIATE_TEST_CASE_P(Calib3D, StereoConstantSpaceBP, ALL_DEVICES);
+INSTANTIATE_TEST_CASE_P(GPU_Calib3D, StereoConstantSpaceBP, ALL_DEVICES);
 ///////////////////////////////////////////////////////////////////////////////////////////////////////
-// projectPoints
+// transformPoints
-struct ProjectPoints : TestWithParam<cv::gpu::DeviceInfo>
+struct TransformPoints : testing::TestWithParam<cv::gpu::DeviceInfo>
 {
    cv::gpu::DeviceInfo devInfo;
    cv::Mat src;
    cv::Mat rvec;
    cv::Mat tvec;
    cv::Mat camera_mat;
    std::vector<cv::Point2f> dst_gold;
    virtual void SetUp()
    {
        devInfo = GetParam();
        cv::gpu::setDevice(devInfo.deviceID());
        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
        src = cvtest::randomMat(rng, cv::Size(1000, 1), CV_32FC3, 0, 10, false);
        rvec = cvtest::randomMat(rng, cv::Size(3, 1), CV_32F, 0, 1, false);
        tvec = cvtest::randomMat(rng, cv::Size(3, 1), CV_32F, 0, 1, false);
        camera_mat = cvtest::randomMat(rng, cv::Size(3, 3), CV_32F, 0, 1, false);
        camera_mat.at<float>(0, 1) = 0.f;
        camera_mat.at<float>(1, 0) = 0.f;
        camera_mat.at<float>(2, 0) = 0.f;
        camera_mat.at<float>(2, 1) = 0.f;
        cv::projectPoints(src, rvec, tvec, camera_mat, cv::Mat(1, 8, CV_32F, cv::Scalar::all(0)), dst_gold);
    }
 };
-TEST_P(ProjectPoints, Accuracy)
+TEST_P(TransformPoints, Accuracy)
 {
-    cv::Mat dst;
+    cv::Mat src = randomMat(cv::Size(1000, 1), CV_32FC3, 0, 10);
    cv::Mat rvec = randomMat(cv::Size(3, 1), CV_32F, 0, 1);
    cv::Mat tvec = randomMat(cv::Size(3, 1), CV_32F, 0, 1);
-    cv::gpu::GpuMat d_dst;
+    cv::gpu::GpuMat dst;
    cv::gpu::transformPoints(loadMat(src), rvec, tvec, dst);
-    cv::gpu::projectPoints(cv::gpu::GpuMat(src), rvec, tvec, camera_mat, cv::Mat(), d_dst);
+    ASSERT_EQ(src.size(), dst.size());
    ASSERT_EQ(src.type(), dst.type());
-    d_dst.download(dst);
+    cv::Mat h_dst(dst);
-    ASSERT_EQ(dst_gold.size(), static_cast<size_t>(dst.cols));
+    cv::Mat rot;
-    ASSERT_EQ(1, dst.rows);
+    cv::Rodrigues(rvec, rot);
    ASSERT_EQ(CV_32FC2, dst.type());
-    for (size_t i = 0; i < dst_gold.size(); ++i)
+    for (int i = 0; i < h_dst.cols; ++i)
    {
-        cv::Point2f res_gold = dst_gold[i];
+        cv::Point3f res = h_dst.at<cv::Point3f>(0, i);
-        cv::Point2f res_actual = dst.at<cv::Point2f>(0, i);
+
-        cv::Point2f err = res_actual - res_gold;
+        cv::Point3f p = src.at<cv::Point3f>(0, i);
        cv::Point3f res_gold(
                rot.at<float>(0, 0) * p.x + rot.at<float>(0, 1) * p.y + rot.at<float>(0, 2) * p.z + tvec.at<float>(0, 0),
                rot.at<float>(1, 0) * p.x + rot.at<float>(1, 1) * p.y + rot.at<float>(1, 2) * p.z + tvec.at<float>(0, 1),
                rot.at<float>(2, 0) * p.x + rot.at<float>(2, 1) * p.y + rot.at<float>(2, 2) * p.z + tvec.at<float>(0, 2));
-        ASSERT_LE(err.dot(err) / res_gold.dot(res_gold), 1e-3f);
+        ASSERT_POINT3_NEAR(res_gold, res, 1e-5);
    }
 }
-INSTANTIATE_TEST_CASE_P(Calib3D, ProjectPoints, ALL_DEVICES);
+INSTANTIATE_TEST_CASE_P(GPU_Calib3D, TransformPoints, ALL_DEVICES);
 ///////////////////////////////////////////////////////////////////////////////////////////////////////
-// transformPoints
+// ProjectPoints
-struct TransformPoints : TestWithParam<cv::gpu::DeviceInfo>
+struct ProjectPoints : testing::TestWithParam<cv::gpu::DeviceInfo>
 {
    cv::gpu::DeviceInfo devInfo;
    cv::Mat src;
    cv::Mat rvec;
    cv::Mat tvec;
    cv::Mat rot;
    virtual void SetUp()
    {
        devInfo = GetParam();
        cv::gpu::setDevice(devInfo.deviceID());
        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
        src = cvtest::randomMat(rng, cv::Size(1000, 1), CV_32FC3, 0, 10, false);
        rvec = cvtest::randomMat(rng, cv::Size(3, 1), CV_32F, 0, 1, false);
        tvec = cvtest::randomMat(rng, cv::Size(3, 1), CV_32F, 0, 1, false);
        cv::Rodrigues(rvec, rot);
    }
 };
-TEST_P(TransformPoints, Accuracy)
+TEST_P(ProjectPoints, Accuracy)
 {
-    cv::Mat dst;
+    cv::Mat src = randomMat(cv::Size(1000, 1), CV_32FC3, 0, 10);
    cv::Mat rvec = randomMat(cv::Size(3, 1), CV_32F, 0, 1);
    cv::Mat tvec = randomMat(cv::Size(3, 1), CV_32F, 0, 1);
    cv::Mat camera_mat = randomMat(cv::Size(3, 3), CV_32F, 0.5, 1);
    camera_mat.at<float>(0, 1) = 0.f;
    camera_mat.at<float>(1, 0) = 0.f;
    camera_mat.at<float>(2, 0) = 0.f;
    camera_mat.at<float>(2, 1) = 0.f;
    cv::gpu::GpuMat dst;
    cv::gpu::projectPoints(loadMat(src), rvec, tvec, camera_mat, cv::Mat(), dst);
-    cv::gpu::GpuMat d_dst;
+    ASSERT_EQ(1, dst.rows);
    ASSERT_EQ(MatType(CV_32FC2), MatType(dst.type()));
-    cv::gpu::transformPoints(cv::gpu::GpuMat(src), rvec, tvec, d_dst);
+    std::vector<cv::Point2f> dst_gold;
    cv::projectPoints(src, rvec, tvec, camera_mat, cv::Mat(1, 8, CV_32F, cv::Scalar::all(0)), dst_gold);
-    d_dst.download(dst);
+    ASSERT_EQ(dst_gold.size(), static_cast<size_t>(dst.cols));
-    ASSERT_EQ(src.size(), dst.size());
+    cv::Mat h_dst(dst);
    ASSERT_EQ(src.type(), dst.type());
-    for (int i = 0; i < dst.cols; ++i)
+    for (size_t i = 0; i < dst_gold.size(); ++i)
    {
-        cv::Point3f p = src.at<cv::Point3f>(0, i);
+        cv::Point2f res = h_dst.at<cv::Point2f>(0, i);
-        cv::Point3f res_gold(
+        cv::Point2f res_gold = dst_gold[i];
                rot.at<float>(0, 0) * p.x + rot.at<float>(0, 1) * p.y + rot.at<float>(0, 2) * p.z + tvec.at<float>(0, 0),
                rot.at<float>(1, 0) * p.x + rot.at<float>(1, 1) * p.y + rot.at<float>(1, 2) * p.z + tvec.at<float>(0, 1),
                rot.at<float>(2, 0) * p.x + rot.at<float>(2, 1) * p.y + rot.at<float>(2, 2) * p.z + tvec.at<float>(0, 2));
        cv::Point3f res_actual = dst.at<cv::Point3f>(0, i);
        cv::Point3f err = res_actual - res_gold;
-        ASSERT_LE(err.dot(err) / res_gold.dot(res_gold), 1e-3f);
+        ASSERT_LE(cv::norm(res_gold - res) / cv::norm(res_gold), 1e-3f);
    }
 }
-INSTANTIATE_TEST_CASE_P(Calib3D, TransformPoints, ALL_DEVICES);
+INSTANTIATE_TEST_CASE_P(GPU_Calib3D, ProjectPoints, ALL_DEVICES);
 ///////////////////////////////////////////////////////////////////////////////////////////////////////
-// solvePnPRansac
+// SolvePnPRansac
-struct SolvePnPRansac : TestWithParam<cv::gpu::DeviceInfo>
+struct SolvePnPRansac : testing::TestWithParam<cv::gpu::DeviceInfo>
 {
    static const int num_points = 5000;
    cv::gpu::DeviceInfo devInfo;
    cv::Mat object;
    cv::Mat camera_mat;
    std::vector<cv::Point2f> image_vec;
    cv::Mat rvec_gold;
    cv::Mat tvec_gold;
    virtual void SetUp()
    {
        devInfo = GetParam();
        cv::gpu::setDevice(devInfo.deviceID());
        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
        object = cvtest::randomMat(rng, cv::Size(num_points, 1), CV_32FC3, 0, 100, false);
        camera_mat = cvtest::randomMat(rng, cv::Size(3, 3), CV_32F, 0.5, 1, false);
        camera_mat.at<float>(0, 1) = 0.f;
        camera_mat.at<float>(1, 0) = 0.f;
        camera_mat.at<float>(2, 0) = 0.f;
        camera_mat.at<float>(2, 1) = 0.f;
        rvec_gold = cvtest::randomMat(rng, cv::Size(3, 1), CV_32F, 0, 1, false);
        tvec_gold = cvtest::randomMat(rng, cv::Size(3, 1), CV_32F, 0, 1, false);
        cv::projectPoints(object, rvec_gold, tvec_gold, camera_mat, cv::Mat(1, 8, CV_32F, cv::Scalar::all(0)), image_vec);
    }
 };
 TEST_P(SolvePnPRansac, Accuracy)
 {
    cv::Mat object = randomMat(cv::Size(5000, 1), CV_32FC3, 0, 100);
    cv::Mat camera_mat = randomMat(cv::Size(3, 3), CV_32F, 0.5, 1);
    camera_mat.at<float>(0, 1) = 0.f;
    camera_mat.at<float>(1, 0) = 0.f;
    camera_mat.at<float>(2, 0) = 0.f;
    camera_mat.at<float>(2, 1) = 0.f;
    std::vector<cv::Point2f> image_vec;
    cv::Mat rvec_gold;
    cv::Mat tvec_gold;
    rvec_gold = randomMat(cv::Size(3, 1), CV_32F, 0, 1);
    tvec_gold = randomMat(cv::Size(3, 1), CV_32F, 0, 1);
    cv::projectPoints(object, rvec_gold, tvec_gold, camera_mat, cv::Mat(1, 8, CV_32F, cv::Scalar::all(0)), image_vec);
    cv::Mat rvec, tvec;
    std::vector<int> inliers;
    cv::gpu::solvePnPRansac(object, cv::Mat(1, image_vec.size(), CV_32FC2, &image_vec[0]),
                            camera_mat, cv::Mat(1, 8, CV_32F, cv::Scalar::all(0)),
                            rvec, tvec, false, 200, 2.f, 100, &inliers);
-    cv::gpu::solvePnPRansac(object, cv::Mat(1, image_vec.size(), CV_32FC2, &image_vec[0]), camera_mat,
+    ASSERT_LE(cv::norm(rvec - rvec_gold), 1e-3);
-                            cv::Mat(1, 8, CV_32F, cv::Scalar::all(0)), rvec, tvec, false, 200, 2.f, 100, &inliers);
+    ASSERT_LE(cv::norm(tvec - tvec_gold), 1e-3);
    ASSERT_LE(cv::norm(rvec - rvec_gold), 1e-3f);
    ASSERT_LE(cv::norm(tvec - tvec_gold), 1e-3f);
 }
-INSTANTIATE_TEST_CASE_P(Calib3D, SolvePnPRansac, ALL_DEVICES);
+INSTANTIATE_TEST_CASE_P(GPU_Calib3D, SolvePnPRansac, ALL_DEVICES);
-#endif // HAVE_CUDA
+} // namespace
--- a/modules/gpu/test/test_copy_make_border.cpp
+++ b/modules/gpu/test/test_copy_make_border.cpp
@ -41,9 +41,11 @@
 #include "precomp.hpp"
-#ifdef HAVE_CUDA
+namespace {
-PARAM_TEST_CASE(CopyMakeBorder, cv::gpu::DeviceInfo, cv::Size, MatType, int, Border, UseRoi)
+IMPLEMENT_PARAM_CLASS(Border, int)
 PARAM_TEST_CASE(CopyMakeBorder, cv::gpu::DeviceInfo, cv::Size, MatType, Border, BorderType, UseRoi)
 {
    cv::gpu::DeviceInfo devInfo;
    cv::Size size;
@ -80,11 +82,19 @@ TEST_P(CopyMakeBorder, Accuracy)
 }
 INSTANTIATE_TEST_CASE_P(GPU_ImgProc, CopyMakeBorder, testing::Combine(
-    ALL_DEVICES, 
+    ALL_DEVICES,
    DIFFERENT_SIZES,
-    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
+    testing::Values(MatType(CV_8UC1),
-    testing::Values(1, 10, 50),
+                    MatType(CV_8UC3),
-    testing::Values(Border(cv::BORDER_REFLECT101), Border(cv::BORDER_REPLICATE), Border(cv::BORDER_CONSTANT), Border(cv::BORDER_REFLECT), Border(cv::BORDER_WRAP)),
+                    MatType(CV_8UC4),
                    MatType(CV_16UC1),
                    MatType(CV_16UC3),
                    MatType(CV_16UC4),
                    MatType(CV_32FC1),
                    MatType(CV_32FC3),
                    MatType(CV_32FC4)),
    testing::Values(Border(1), Border(10), Border(50)),
    ALL_BORDER_TYPES,
    WHOLE_SUBMAT));
-#endif // HAVE_CUDA
+} // namespace
--- a/modules/gpu/test/test_core.cpp
+++ b/modules/gpu/test/test_core.cpp
@ -41,10 +41,12 @@
 #include "precomp.hpp"
 namespace {
 ////////////////////////////////////////////////////////////////////////////////
 // Add_Array
-PARAM_TEST_CASE(Add_Array, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, int, UseRoi)
+PARAM_TEST_CASE(Add_Array, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, Channels, UseRoi)
 {
    cv::gpu::DeviceInfo devInfo;
    cv::Size size;
@ -90,7 +92,7 @@ INSTANTIATE_TEST_CASE_P(GPU_Core, Add_Array, testing::Combine(
    ALL_DEVICES,
    DIFFERENT_SIZES,
    DEPTH_PAIRS,
-    testing::Values(1, 2, 3, 4),
+    ALL_CHANNELS,
    WHOLE_SUBMAT));
 ////////////////////////////////////////////////////////////////////////////////
@ -139,7 +141,7 @@ INSTANTIATE_TEST_CASE_P(GPU_Core, Add_Scalar, testing::Combine(
 ////////////////////////////////////////////////////////////////////////////////
 // Subtract_Array
-PARAM_TEST_CASE(Subtract_Array, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, int, UseRoi)
+PARAM_TEST_CASE(Subtract_Array, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, Channels, UseRoi)
 {
    cv::gpu::DeviceInfo devInfo;
    cv::Size size;
@ -185,7 +187,7 @@ INSTANTIATE_TEST_CASE_P(GPU_Core, Subtract_Array, testing::Combine(
    ALL_DEVICES,
    DIFFERENT_SIZES,
    DEPTH_PAIRS,
-    testing::Values(1, 2, 3, 4),
+    ALL_CHANNELS,
    WHOLE_SUBMAT));
 ////////////////////////////////////////////////////////////////////////////////
@ -234,7 +236,7 @@ INSTANTIATE_TEST_CASE_P(GPU_Core, Subtract_Scalar, testing::Combine(
 ////////////////////////////////////////////////////////////////////////////////
 // Multiply_Array
-PARAM_TEST_CASE(Multiply_Array, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, int, UseRoi)
+PARAM_TEST_CASE(Multiply_Array, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, Channels, UseRoi)
 {
    cv::gpu::DeviceInfo devInfo;
    cv::Size size;
@ -279,7 +281,7 @@ INSTANTIATE_TEST_CASE_P(GPU_Core, Multiply_Array, testing::Combine(
    ALL_DEVICES,
    DIFFERENT_SIZES,
    DEPTH_PAIRS,
-    testing::Values(1, 2, 3, 4),
+    ALL_CHANNELS,
    WHOLE_SUBMAT));
 ////////////////////////////////////////////////////////////////////////////////
@ -425,7 +427,7 @@ INSTANTIATE_TEST_CASE_P(GPU_Core, Multiply_Scalar, testing::Combine(
 ////////////////////////////////////////////////////////////////////////////////
 // Divide_Array
-PARAM_TEST_CASE(Divide_Array, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, int, UseRoi)
+PARAM_TEST_CASE(Divide_Array, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, Channels, UseRoi)
 {
    cv::gpu::DeviceInfo devInfo;
    cv::Size size;
@ -470,7 +472,7 @@ INSTANTIATE_TEST_CASE_P(GPU_Core, Divide_Array, testing::Combine(
    ALL_DEVICES,
    DIFFERENT_SIZES,
    DEPTH_PAIRS,
-    testing::Values(1, 2, 3, 4),
+    ALL_CHANNELS,
    WHOLE_SUBMAT));
 ////////////////////////////////////////////////////////////////////////////////
@ -794,31 +796,28 @@ INSTANTIATE_TEST_CASE_P(GPU_Core, Sqr, testing::Combine(
 ////////////////////////////////////////////////////////////////////////////////
 // Sqrt
-namespace
+template <typename T> void sqrtImpl(const cv::Mat& src, cv::Mat& dst)
 {
-    template <typename T> void sqrtImpl(const cv::Mat& src, cv::Mat& dst)
+    dst.create(src.size(), src.type());
    {
        dst.create(src.size(), src.type());
-        for (int y = 0; y < src.rows; ++y)
+    for (int y = 0; y < src.rows; ++y)
-        {
+    {
-            for (int x = 0; x < src.cols; ++x)
+        for (int x = 0; x < src.cols; ++x)
-                dst.at<T>(y, x) = static_cast<T>(std::sqrt(static_cast<float>(src.at<T>(y, x))));
+            dst.at<T>(y, x) = static_cast<T>(std::sqrt(static_cast<float>(src.at<T>(y, x))));
        }
    }
 }
-    void sqrtGold(const cv::Mat& src, cv::Mat& dst)
+void sqrtGold(const cv::Mat& src, cv::Mat& dst)
-    {
+{
-        typedef void (*func_t)(const cv::Mat& src, cv::Mat& dst);
+    typedef void (*func_t)(const cv::Mat& src, cv::Mat& dst);
-        const func_t funcs[] =
+    const func_t funcs[] =
-        {
+    {
-            sqrtImpl<uchar>, sqrtImpl<schar>, sqrtImpl<ushort>, sqrtImpl<short>,
+        sqrtImpl<uchar>, sqrtImpl<schar>, sqrtImpl<ushort>, sqrtImpl<short>,
-            sqrtImpl<int>, sqrtImpl<float>
+        sqrtImpl<int>, sqrtImpl<float>
-        };
+    };
-        funcs[src.depth()](src, dst);
+    funcs[src.depth()](src, dst);
    }
 }
 PARAM_TEST_CASE(Sqrt, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi)
@ -864,31 +863,28 @@ INSTANTIATE_TEST_CASE_P(GPU_Core, Sqrt, testing::Combine(
 ////////////////////////////////////////////////////////////////////////////////
 // Log
-namespace
+template <typename T> void logImpl(const cv::Mat& src, cv::Mat& dst)
 {
-    template <typename T> void logImpl(const cv::Mat& src, cv::Mat& dst)
+    dst.create(src.size(), src.type());
    {
        dst.create(src.size(), src.type());
-        for (int y = 0; y < src.rows; ++y)
+    for (int y = 0; y < src.rows; ++y)
-        {
+    {
-            for (int x = 0; x < src.cols; ++x)
+        for (int x = 0; x < src.cols; ++x)
-                dst.at<T>(y, x) = static_cast<T>(std::log(static_cast<float>(src.at<T>(y, x))));
+            dst.at<T>(y, x) = static_cast<T>(std::log(static_cast<float>(src.at<T>(y, x))));
        }
    }
 }
-    void logGold(const cv::Mat& src, cv::Mat& dst)
+void logGold(const cv::Mat& src, cv::Mat& dst)
-    {
+{
-        typedef void (*func_t)(const cv::Mat& src, cv::Mat& dst);
+    typedef void (*func_t)(const cv::Mat& src, cv::Mat& dst);
-        const func_t funcs[] =
+    const func_t funcs[] =
-        {
+    {
-            logImpl<uchar>, logImpl<schar>, logImpl<ushort>, logImpl<short>,
+        logImpl<uchar>, logImpl<schar>, logImpl<ushort>, logImpl<short>,
-            logImpl<int>, logImpl<float>
+        logImpl<int>, logImpl<float>
-        };
+    };
-        funcs[src.depth()](src, dst);
+    funcs[src.depth()](src, dst);
    }
 }
 PARAM_TEST_CASE(Log, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi)
@ -974,6 +970,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Core, Exp, testing::Combine(
 ////////////////////////////////////////////////////////////////////////////////
 // compare
 CV_ENUM(CmpCode, cv::CMP_EQ, cv::CMP_GT, cv::CMP_GE, cv::CMP_LT, cv::CMP_LE, cv::CMP_NE)
 #define ALL_CMP_CODES testing::Values(CmpCode(cv::CMP_EQ), CmpCode(cv::CMP_NE), CmpCode(cv::CMP_GT), CmpCode(cv::CMP_GE), CmpCode(cv::CMP_LT), CmpCode(cv::CMP_LE))
 PARAM_TEST_CASE(Compare, cv::gpu::DeviceInfo, cv::Size, MatDepth, CmpCode, UseRoi)
 {
    cv::gpu::DeviceInfo devInfo;
@ -1088,7 +1087,7 @@ INSTANTIATE_TEST_CASE_P(GPU_Core, Bitwise_Array, testing::Combine(
 //////////////////////////////////////////////////////////////////////////////
 // Bitwise_Scalar
-PARAM_TEST_CASE(Bitwise_Scalar, cv::gpu::DeviceInfo, cv::Size, MatDepth, int)
+PARAM_TEST_CASE(Bitwise_Scalar, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels)
 {
    cv::gpu::DeviceInfo devInfo;
    cv::Size size;
@ -1150,43 +1149,40 @@ INSTANTIATE_TEST_CASE_P(GPU_Core, Bitwise_Scalar, testing::Combine(
    ALL_DEVICES,
    DIFFERENT_SIZES,
    testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_32S)),
-    testing::Values(1, 3, 4)));
+    IMAGE_CHANNELS));
 //////////////////////////////////////////////////////////////////////////////
 // RShift
-namespace
+template <typename T> void rhiftImpl(const cv::Mat& src, cv::Scalar_<int> val, cv::Mat& dst)
 {
-    template <typename T> void rhiftImpl(const cv::Mat& src, cv::Scalar_<int> val, cv::Mat& dst)
+    const int cn = src.channels();
    {
        const int cn = src.channels();
-        dst.create(src.size(), src.type());
+    dst.create(src.size(), src.type());
-        for (int y = 0; y < src.rows; ++y)
+    for (int y = 0; y < src.rows; ++y)
    {
        for (int x = 0; x < src.cols; ++x)
        {
-            for (int x = 0; x < src.cols; ++x)
+            for (int c = 0; c < cn; ++c)
-            {
+                dst.at<T>(y, x * cn + c) = src.at<T>(y, x * cn + c) >> val.val[c];
                for (int c = 0; c < cn; ++c)
                    dst.at<T>(y, x * cn + c) = src.at<T>(y, x * cn + c) >> val.val[c];
            }
        }
    }
 }
-    void rhiftGold(const cv::Mat& src, cv::Scalar_<int> val, cv::Mat& dst)
+void rhiftGold(const cv::Mat& src, cv::Scalar_<int> val, cv::Mat& dst)
-    {
+{
-        typedef void (*func_t)(const cv::Mat& src, cv::Scalar_<int> val, cv::Mat& dst);
+    typedef void (*func_t)(const cv::Mat& src, cv::Scalar_<int> val, cv::Mat& dst);
-        const func_t funcs[] =
+    const func_t funcs[] =
-        {
+    {
-            rhiftImpl<uchar>, rhiftImpl<schar>, rhiftImpl<ushort>, rhiftImpl<short>, rhiftImpl<int>
+        rhiftImpl<uchar>, rhiftImpl<schar>, rhiftImpl<ushort>, rhiftImpl<short>, rhiftImpl<int>
-        };
+    };
-        funcs[src.depth()](src, val, dst);
+    funcs[src.depth()](src, val, dst);
    }
 }
-PARAM_TEST_CASE(RShift, cv::gpu::DeviceInfo, cv::Size, MatDepth, int, UseRoi)
+PARAM_TEST_CASE(RShift, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels, UseRoi)
 {
    cv::gpu::DeviceInfo devInfo;
    cv::Size size;
@ -1229,44 +1225,41 @@ INSTANTIATE_TEST_CASE_P(GPU_Core, RShift, testing::Combine(
                    MatDepth(CV_16U),
                    MatDepth(CV_16S),
                    MatDepth(CV_32S)),
-    testing::Values(1, 3, 4),
+    IMAGE_CHANNELS,
    WHOLE_SUBMAT));
 //////////////////////////////////////////////////////////////////////////////
 // LShift
-namespace
+template <typename T> void lhiftImpl(const cv::Mat& src, cv::Scalar_<int> val, cv::Mat& dst)
 {
-    template <typename T> void lhiftImpl(const cv::Mat& src, cv::Scalar_<int> val, cv::Mat& dst)
+    const int cn = src.channels();
    {
        const int cn = src.channels();
-        dst.create(src.size(), src.type());
+    dst.create(src.size(), src.type());
-        for (int y = 0; y < src.rows; ++y)
+    for (int y = 0; y < src.rows; ++y)
    {
        for (int x = 0; x < src.cols; ++x)
        {
-            for (int x = 0; x < src.cols; ++x)
+            for (int c = 0; c < cn; ++c)
-            {
+                dst.at<T>(y, x * cn + c) = src.at<T>(y, x * cn + c) << val.val[c];
                for (int c = 0; c < cn; ++c)
                    dst.at<T>(y, x * cn + c) = src.at<T>(y, x * cn + c) << val.val[c];
            }
        }
    }
 }
-    void lhiftGold(const cv::Mat& src, cv::Scalar_<int> val, cv::Mat& dst)
+void lhiftGold(const cv::Mat& src, cv::Scalar_<int> val, cv::Mat& dst)
-    {
+{
-        typedef void (*func_t)(const cv::Mat& src, cv::Scalar_<int> val, cv::Mat& dst);
+    typedef void (*func_t)(const cv::Mat& src, cv::Scalar_<int> val, cv::Mat& dst);
-        const func_t funcs[] =
+    const func_t funcs[] =
-        {
+    {
-            lhiftImpl<uchar>, lhiftImpl<schar>, lhiftImpl<ushort>, lhiftImpl<short>, lhiftImpl<int>
+        lhiftImpl<uchar>, lhiftImpl<schar>, lhiftImpl<ushort>, lhiftImpl<short>, lhiftImpl<int>
-        };
+    };
-        funcs[src.depth()](src, val, dst);
+    funcs[src.depth()](src, val, dst);
    }
 }
-PARAM_TEST_CASE(LShift, cv::gpu::DeviceInfo, cv::Size, MatDepth, int, UseRoi)
+PARAM_TEST_CASE(LShift, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels, UseRoi)
 {
    cv::gpu::DeviceInfo devInfo;
    cv::Size size;
@ -1305,7 +1298,7 @@ INSTANTIATE_TEST_CASE_P(GPU_Core, LShift, testing::Combine(
    ALL_DEVICES,
    DIFFERENT_SIZES,
    testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_32S)),
-    testing::Values(1, 3, 4),
+    IMAGE_CHANNELS,
    WHOLE_SUBMAT));
 //////////////////////////////////////////////////////////////////////////////
@ -1411,7 +1404,7 @@ PARAM_TEST_CASE(Pow, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
 TEST_P(Pow, Accuracy)
 {
-    cv::Mat src = randomMat(size, depth, 0.0, 100.0);
+    cv::Mat src = randomMat(size, depth, 0.0, 10.0);
    double power = randomDouble(2.0, 4.0);
    if (src.depth() < CV_32F)
@ -1423,7 +1416,7 @@ TEST_P(Pow, Accuracy)
    cv::Mat dst_gold;
    cv::pow(src, power, dst_gold);
-    EXPECT_MAT_NEAR(dst_gold, dst, depth < CV_32F ? 0.0 : 1e-6);
+    EXPECT_MAT_NEAR(dst_gold, dst, depth < CV_32F ? 0.0 : 1e-1);
 }
 INSTANTIATE_TEST_CASE_P(GPU_Core, Pow, testing::Combine(
@ -1486,6 +1479,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Core, AddWeighted, testing::Combine(
 //////////////////////////////////////////////////////////////////////////////
 // GEMM
 CV_FLAGS(GemmFlags, 0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_3_T);
 #define ALL_GEMM_FLAGS testing::Values(GemmFlags(0), GemmFlags(cv::GEMM_1_T), GemmFlags(cv::GEMM_2_T), GemmFlags(cv::GEMM_3_T), GemmFlags(cv::GEMM_1_T | cv::GEMM_2_T), GemmFlags(cv::GEMM_1_T | cv::GEMM_3_T), GemmFlags(cv::GEMM_1_T | cv::GEMM_2_T | cv::GEMM_3_T))
 PARAM_TEST_CASE(GEMM, cv::gpu::DeviceInfo, cv::Size, MatType, GemmFlags, UseRoi)
 {
    cv::gpu::DeviceInfo devInfo;
@ -1579,6 +1575,10 @@ INSTANTIATE_TEST_CASE_P(GPU_Core, Transpose, testing::Combine(
 ////////////////////////////////////////////////////////////////////////////////
 // Flip
 enum {FLIP_BOTH = 0, FLIP_X = 1, FLIP_Y = -1};
 CV_ENUM(FlipCode, FLIP_BOTH, FLIP_X, FLIP_Y)
 #define ALL_FLIP_CODES testing::Values(FlipCode(FLIP_BOTH), FlipCode(FLIP_X), FlipCode(FLIP_Y))
 PARAM_TEST_CASE(Flip, cv::gpu::DeviceInfo, cv::Size, MatType, FlipCode, UseRoi)
 {
    cv::gpu::DeviceInfo devInfo;
@ -1772,7 +1772,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Core, Magnitude, testing::Combine(
 ////////////////////////////////////////////////////////////////////////////////
 // Phase
-PARAM_TEST_CASE(Phase, cv::gpu::DeviceInfo, cv::Size, bool, UseRoi)
+IMPLEMENT_PARAM_CLASS(AngleInDegrees, bool)
 PARAM_TEST_CASE(Phase, cv::gpu::DeviceInfo, cv::Size, AngleInDegrees, UseRoi)
 {
    cv::gpu::DeviceInfo devInfo;
    cv::Size size;
@ -1807,13 +1809,13 @@ TEST_P(Phase, Accuracy)
 INSTANTIATE_TEST_CASE_P(GPU_Core, Phase, testing::Combine(
    ALL_DEVICES,
    DIFFERENT_SIZES,
-    testing::Bool(),
+    testing::Values(AngleInDegrees(false), AngleInDegrees(true)),
    WHOLE_SUBMAT));
 ////////////////////////////////////////////////////////////////////////////////
 // CartToPolar
-PARAM_TEST_CASE(CartToPolar, cv::gpu::DeviceInfo, cv::Size, bool, UseRoi)
+PARAM_TEST_CASE(CartToPolar, cv::gpu::DeviceInfo, cv::Size, AngleInDegrees, UseRoi)
 {
    cv::gpu::DeviceInfo devInfo;
    cv::Size size;
@ -1851,13 +1853,13 @@ TEST_P(CartToPolar, Accuracy)
 INSTANTIATE_TEST_CASE_P(GPU_Core, CartToPolar, testing::Combine(
    ALL_DEVICES,
    DIFFERENT_SIZES,
-    testing::Bool(),
+    testing::Values(AngleInDegrees(false), AngleInDegrees(true)),
    WHOLE_SUBMAT));
 ////////////////////////////////////////////////////////////////////////////////
 // polarToCart
-PARAM_TEST_CASE(PolarToCart, cv::gpu::DeviceInfo, cv::Size, bool, UseRoi)
+PARAM_TEST_CASE(PolarToCart, cv::gpu::DeviceInfo, cv::Size, AngleInDegrees, UseRoi)
 {
    cv::gpu::DeviceInfo devInfo;
    cv::Size size;
@ -1895,7 +1897,7 @@ TEST_P(PolarToCart, Accuracy)
 INSTANTIATE_TEST_CASE_P(GPU_Core, PolarToCart, testing::Combine(
    ALL_DEVICES,
    DIFFERENT_SIZES,
-    testing::Bool(),
+    testing::Values(AngleInDegrees(false), AngleInDegrees(true)),
    WHOLE_SUBMAT));
 ////////////////////////////////////////////////////////////////////////////////
@ -2026,84 +2028,81 @@ INSTANTIATE_TEST_CASE_P(GPU_Core, NormDiff, testing::Combine(
 //////////////////////////////////////////////////////////////////////////////
 // Sum
-namespace
+template <typename T>
 cv::Scalar absSumImpl(const cv::Mat& src)
 {
-    template <typename T>
+    const int cn = src.channels();
    cv::Scalar absSumImpl(const cv::Mat& src)
    {
        const int cn = src.channels();
-        cv::Scalar sum = cv::Scalar::all(0);
+    cv::Scalar sum = cv::Scalar::all(0);
-        for (int y = 0; y < src.rows; ++y)
+    for (int y = 0; y < src.rows; ++y)
    {
        for (int x = 0; x < src.cols; ++x)
        {
-            for (int x = 0; x < src.cols; ++x)
+            for (int c = 0; c < cn; ++c)
-            {
+                sum[c] += std::abs(src.at<T>(y, x * cn + c));
                for (int c = 0; c < cn; ++c)
                    sum[c] += std::abs(src.at<T>(y, x * cn + c));
            }
        }
        return sum;
    }
-    cv::Scalar absSumGold(const cv::Mat& src)
+    return sum;
-    {
+}
        typedef cv::Scalar (*func_t)(const cv::Mat& src);
-        static const func_t funcs[] =
+cv::Scalar absSumGold(const cv::Mat& src)
-        {
+{
-            absSumImpl<uchar>,
+    typedef cv::Scalar (*func_t)(const cv::Mat& src);
            absSumImpl<schar>,
            absSumImpl<ushort>,
            absSumImpl<short>,
            absSumImpl<int>,
            absSumImpl<float>,
            absSumImpl<double>
        };
        return funcs[src.depth()](src);
    }
-    template <typename T>
+    static const func_t funcs[] =
    cv::Scalar sqrSumImpl(const cv::Mat& src)
    {
-        const int cn = src.channels();
+        absSumImpl<uchar>,
        absSumImpl<schar>,
        absSumImpl<ushort>,
        absSumImpl<short>,
        absSumImpl<int>,
        absSumImpl<float>,
        absSumImpl<double>
    };
    return funcs[src.depth()](src);
 }
 template <typename T>
 cv::Scalar sqrSumImpl(const cv::Mat& src)
 {
    const int cn = src.channels();
-        cv::Scalar sum = cv::Scalar::all(0);
+    cv::Scalar sum = cv::Scalar::all(0);
-        for (int y = 0; y < src.rows; ++y)
+    for (int y = 0; y < src.rows; ++y)
    {
        for (int x = 0; x < src.cols; ++x)
        {
-            for (int x = 0; x < src.cols; ++x)
+            for (int c = 0; c < cn; ++c)
            {
-                for (int c = 0; c < cn; ++c)
+                const T val = src.at<T>(y, x * cn + c);
-                {
+                sum[c] += val * val;
                    const T val = src.at<T>(y, x * cn + c);
                    sum[c] += val * val;
                }
            }
        }
        return sum;
    }
-    cv::Scalar sqrSumGold(const cv::Mat& src)
+    return sum;
 }
 cv::Scalar sqrSumGold(const cv::Mat& src)
 {
    typedef cv::Scalar (*func_t)(const cv::Mat& src);
    static const func_t funcs[] =
    {
-        typedef cv::Scalar (*func_t)(const cv::Mat& src);
+        sqrSumImpl<uchar>,
        sqrSumImpl<schar>,
        sqrSumImpl<ushort>,
        sqrSumImpl<short>,
        sqrSumImpl<int>,
        sqrSumImpl<float>,
        sqrSumImpl<double>
    };
-        static const func_t funcs[] =
+    return funcs[src.depth()](src);
        {
            sqrSumImpl<uchar>,
            sqrSumImpl<schar>,
            sqrSumImpl<ushort>,
            sqrSumImpl<short>,
            sqrSumImpl<int>,
            sqrSumImpl<float>,
            sqrSumImpl<double>
        };
        return funcs[src.depth()](src);
    }
 }
 PARAM_TEST_CASE(Sum, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi)
@ -2164,57 +2163,6 @@ INSTANTIATE_TEST_CASE_P(GPU_Core, Sum, testing::Combine(
 ////////////////////////////////////////////////////////////////////////////////
 // MinMax
 namespace
 {
    void minMaxLocGold(const cv::Mat& src, double* minVal_, double* maxVal_ = 0, cv::Point* minLoc_ = 0, cv::Point* maxLoc_ = 0, const cv::Mat& mask = cv::Mat())
    {
        if (src.depth() != CV_8S)
        {
            cv::minMaxLoc(src, minVal_, maxVal_, minLoc_, maxLoc_, mask);
            return;
        }
        // OpenCV's minMaxLoc doesn't support CV_8S type
        double minVal = std::numeric_limits<double>::max();
        cv::Point minLoc(-1, -1);
        double maxVal = -std::numeric_limits<double>::max();
        cv::Point maxLoc(-1, -1);
        for (int y = 0; y < src.rows; ++y)
        {
            const schar* src_row = src.ptr<signed char>(y);
            const uchar* mask_row = mask.empty() ? 0 : mask.ptr<unsigned char>(y);
            for (int x = 0; x < src.cols; ++x)
            {
                if (!mask_row || mask_row[x])
                {
                    schar val = src_row[x];
                    if (val < minVal)
                    {
                        minVal = val;
                        minLoc = cv::Point(x, y);
                    }
                    if (val > maxVal)
                    {
                        maxVal = val;
                        maxLoc = cv::Point(x, y);
                    }
                }
            }
        }
        if (minVal_) *minVal_ = minVal;
        if (maxVal_) *maxVal_ = maxVal;
        if (minLoc_) *minLoc_ = minLoc;
        if (maxLoc_) *maxLoc_ = maxLoc;
    }
 }
 PARAM_TEST_CASE(MinMax, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
 {
    cv::gpu::DeviceInfo devInfo;
@ -2278,31 +2226,28 @@ INSTANTIATE_TEST_CASE_P(GPU_Core, MinMax, testing::Combine(
 ////////////////////////////////////////////////////////////////////////////////
 // MinMaxLoc
-namespace
+template <typename T>
 void expectEqualImpl(const cv::Mat& src, cv::Point loc_gold, cv::Point loc)
 {
-    template <typename T>
+    EXPECT_EQ(src.at<T>(loc_gold.y, loc_gold.x), src.at<T>(loc.y, loc.x));
-    void expectEqualImpl(const cv::Mat& src, cv::Point loc_gold, cv::Point loc)
+}
-    {
+
-        EXPECT_EQ(src.at<T>(loc_gold.y, loc_gold.x), src.at<T>(loc.y, loc.x));
+void expectEqual(const cv::Mat& src, cv::Point loc_gold, cv::Point loc)
-    }
+{
    typedef void (*func_t)(const cv::Mat& src, cv::Point loc_gold, cv::Point loc);
-    void expectEqual(const cv::Mat& src, cv::Point loc_gold, cv::Point loc)
+    static const func_t funcs[] =
    {
-        typedef void (*func_t)(const cv::Mat& src, cv::Point loc_gold, cv::Point loc);
+        expectEqualImpl<uchar>,
        expectEqualImpl<schar>,
        expectEqualImpl<ushort>,
        expectEqualImpl<short>,
        expectEqualImpl<int>,
        expectEqualImpl<float>,
        expectEqualImpl<double>
    };
-        static const func_t funcs[] =
+    funcs[src.depth()](src, loc_gold, loc);
        {
            expectEqualImpl<uchar>,
            expectEqualImpl<schar>,
            expectEqualImpl<ushort>,
            expectEqualImpl<short>,
            expectEqualImpl<int>,
            expectEqualImpl<float>,
            expectEqualImpl<double>
        };
        funcs[src.depth()](src, loc_gold, loc);
    }
 }
 PARAM_TEST_CASE(MinMaxLoc, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
@ -2420,7 +2365,10 @@ INSTANTIATE_TEST_CASE_P(GPU_Core, CountNonZero, testing::Combine(
 //////////////////////////////////////////////////////////////////////////////
 // Reduce
-PARAM_TEST_CASE(Reduce, cv::gpu::DeviceInfo, cv::Size, MatDepth, int, ReduceCode, UseRoi)
+CV_ENUM(ReduceCode, CV_REDUCE_SUM, CV_REDUCE_AVG, CV_REDUCE_MAX, CV_REDUCE_MIN)
 #define ALL_REDUCE_CODES testing::Values(ReduceCode(CV_REDUCE_SUM), ReduceCode(CV_REDUCE_AVG), ReduceCode(CV_REDUCE_MAX), ReduceCode(CV_REDUCE_MIN))
 PARAM_TEST_CASE(Reduce, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels, ReduceCode, UseRoi)
 {
    cv::gpu::DeviceInfo devInfo;
    cv::Size size;
@ -2448,6 +2396,7 @@ PARAM_TEST_CASE(Reduce, cv::gpu::DeviceInfo, cv::Size, MatDepth, int, ReduceCode
        dst_depth = (reduceOp == CV_REDUCE_MAX || reduceOp == CV_REDUCE_MIN) ? depth : CV_32F;
        dst_type = CV_MAKE_TYPE(dst_depth, channels);
    }
 };
 TEST_P(Reduce, Rows)
@ -2486,6 +2435,8 @@ INSTANTIATE_TEST_CASE_P(GPU_Core, Reduce, testing::Combine(
                    MatDepth(CV_16U),
                    MatDepth(CV_16S),
                    MatDepth(CV_32F)),
-    testing::Values(1, 2, 3, 4),
+    ALL_CHANNELS,
    ALL_REDUCE_CODES,
    WHOLE_SUBMAT));
 } // namespace
--- a/modules/gpu/test/test_features2d.cpp
+++ b/modules/gpu/test/test_features2d.cpp
@ -41,12 +41,74 @@
 #include "precomp.hpp"
-#ifdef HAVE_CUDA
+namespace {
-using namespace cvtest;
+bool keyPointsEquals(const cv::KeyPoint& p1, const cv::KeyPoint& p2)
-using namespace testing;
+{
    const double maxPtDif = 1.0;
    const double maxSizeDif = 1.0;
    const double maxAngleDif = 2.0;
    const double maxResponseDif = 0.1;
    double dist = cv::norm(p1.pt - p2.pt);
    if (dist < maxPtDif &&
        fabs(p1.size - p2.size) < maxSizeDif &&
        abs(p1.angle - p2.angle) < maxAngleDif &&
        abs(p1.response - p2.response) < maxResponseDif &&
        p1.octave == p2.octave &&
        p1.class_id == p2.class_id)
    {
        return true;
    }
    return false;
 }
 struct KeyPointLess : std::binary_function<cv::KeyPoint, cv::KeyPoint, bool>
 {
    bool operator()(const cv::KeyPoint& kp1, const cv::KeyPoint& kp2) const
    {
        return kp1.pt.y < kp2.pt.y || (kp1.pt.y == kp2.pt.y && kp1.pt.x < kp2.pt.x);
    }
 };
-int getValidMatchesCount(const std::vector<cv::KeyPoint>& keypoints1, const std::vector<cv::KeyPoint>& keypoints2, const std::vector<cv::DMatch>& matches)
+testing::AssertionResult assertKeyPointsEquals(const char* gold_expr, const char* actual_expr, std::vector<cv::KeyPoint>& gold, std::vector<cv::KeyPoint>& actual)
 {
    if (gold.size() != actual.size())
    {
        return testing::AssertionFailure() << "KeyPoints size mistmach\n"
                                           << "\"" << gold_expr << "\" : " << gold.size() << "\n"
                                           << "\"" << actual_expr << "\" : " << actual.size();
    }
    std::sort(actual.begin(), actual.end(), KeyPointLess());
    std::sort(gold.begin(), gold.end(), KeyPointLess());
    for (size_t i; i < gold.size(); ++i)
    {
        const cv::KeyPoint& p1 = gold[i];
        const cv::KeyPoint& p2 = actual[i];
        if (!keyPointsEquals(p1, p2))
        {
            return testing::AssertionFailure() << "KeyPoints differ at " << i << "\n"
                                               << "\"" << gold_expr << "\" vs \"" << actual_expr << "\" : \n"
                                               << "pt : " << testing::PrintToString(p1.pt) << " vs " << testing::PrintToString(p2.pt) << "\n"
                                               << "size : " << p1.size << " vs " << p2.size << "\n"
                                               << "angle : " << p1.angle << " vs " << p2.angle << "\n"
                                               << "response : " << p1.response << " vs " << p2.response << "\n"
                                               << "octave : " << p1.octave << " vs " << p2.octave << "\n"
                                               << "class_id : " << p1.class_id << " vs " << p2.class_id;
        }
    }
    return ::testing::AssertionSuccess();
 }
 #define ASSERT_KEYPOINTS_EQ(gold, actual) EXPECT_PRED_FORMAT2(assertKeyPointsEquals, gold, actual);
 int getMatchedPointsCount(const std::vector<cv::KeyPoint>& keypoints1, const std::vector<cv::KeyPoint>& keypoints2, const std::vector<cv::DMatch>& matches)
 {
    int validCount = 0;
@ -57,22 +119,8 @@ int getValidMatchesCount(const std::vector<cv::KeyPoint>& keypoints1, const std:
        const cv::KeyPoint& p1 = keypoints1[m.queryIdx];
        const cv::KeyPoint& p2 = keypoints2[m.trainIdx];
-        const float maxPtDif = 1.f;
+        if (keyPointsEquals(p1, p2))
        const float maxSizeDif = 1.f;
        const float maxAngleDif = 2.f;
        const float maxResponseDif = 0.1f;
        float dist = (float) cv::norm(p1.pt - p2.pt);
        if (dist < maxPtDif &&
            fabs(p1.size - p2.size) < maxSizeDif &&
            abs(p1.angle - p2.angle) < maxAngleDif &&
            abs(p1.response - p2.response) < maxResponseDif &&
            p1.octave == p2.octave &&
            p1.class_id == p2.class_id)
        {
            ++validCount;
        }
    }
    return validCount;
@ -81,78 +129,280 @@ int getValidMatchesCount(const std::vector<cv::KeyPoint>& keypoints1, const std:
 /////////////////////////////////////////////////////////////////////////////////////////////////
 // SURF
-struct SURF : TestWithParam<cv::gpu::DeviceInfo>
+IMPLEMENT_PARAM_CLASS(SURF_HessianThreshold, double)
 IMPLEMENT_PARAM_CLASS(SURF_Octaves, int)
 IMPLEMENT_PARAM_CLASS(SURF_OctaveLayers, int)
 IMPLEMENT_PARAM_CLASS(SURF_Extended, bool)
 IMPLEMENT_PARAM_CLASS(SURF_Upright, bool)
 PARAM_TEST_CASE(SURF, cv::gpu::DeviceInfo, SURF_HessianThreshold, SURF_Octaves, SURF_OctaveLayers, SURF_Extended, SURF_Upright)
 {
    cv::gpu::DeviceInfo devInfo;
-
+    double hessianThreshold;
-    cv::Mat image;
+    int nOctaves;
-    cv::Mat mask;
+    int nOctaveLayers;
-
+    bool extended;
-    std::vector<cv::KeyPoint> keypoints_gold;
+    bool upright;
    std::vector<float> descriptors_gold;
    virtual void SetUp()
    {
-        devInfo = GetParam();
+        devInfo = GET_PARAM(0);
        hessianThreshold = GET_PARAM(1);
        nOctaves = GET_PARAM(2);
        nOctaveLayers = GET_PARAM(3);
        extended = GET_PARAM(4);
        upright = GET_PARAM(5);
        cv::gpu::setDevice(devInfo.deviceID());
        image = readImage("features2d/aloe.png", CV_LOAD_IMAGE_GRAYSCALE);
        ASSERT_FALSE(image.empty());
        mask = cv::Mat(image.size(), CV_8UC1, cv::Scalar::all(1));
        mask(cv::Range(0, image.rows / 2), cv::Range(0, image.cols / 2)).setTo(cv::Scalar::all(0));
        cv::SURF fdetector_gold;
        fdetector_gold.extended = false;
        fdetector_gold(image, mask, keypoints_gold, descriptors_gold);
    }
 };
-TEST_P(SURF, EmptyDataTest)
+TEST_P(SURF, Detector)
 {
-    cv::gpu::SURF_GPU fdetector;
+    cv::Mat image = readImage("features2d/aloe.png", cv::IMREAD_GRAYSCALE);
    ASSERT_FALSE(image.empty());
    cv::gpu::SURF_GPU surf;
    surf.hessianThreshold = hessianThreshold;
    surf.nOctaves = nOctaves;
    surf.nOctaveLayers = nOctaveLayers;
    surf.extended = extended;
    surf.upright = upright;
    surf.keypointsRatio = 0.05f;
    cv::gpu::GpuMat image;
    std::vector<cv::KeyPoint> keypoints;
-    std::vector<float> descriptors;
+    surf(loadMat(image), cv::gpu::GpuMat(), keypoints);
    cv::SURF surf_gold;
    surf_gold.hessianThreshold = hessianThreshold;
    surf_gold.nOctaves = nOctaves;
    surf_gold.nOctaveLayers = nOctaveLayers;
    surf_gold.extended = extended;
    surf_gold.upright = upright;
-    fdetector(image, cv::gpu::GpuMat(), keypoints, descriptors);
+    std::vector<cv::KeyPoint> keypoints_gold;
    surf_gold(image, cv::noArray(), keypoints_gold);
-    EXPECT_TRUE(keypoints.empty());
+    ASSERT_KEYPOINTS_EQ(keypoints_gold, keypoints);
    EXPECT_TRUE(descriptors.empty());
 }
-TEST_P(SURF, Accuracy)
+TEST_P(SURF, Detector_Masked)
 {
    cv::Mat image = readImage("features2d/aloe.png", cv::IMREAD_GRAYSCALE);
    ASSERT_FALSE(image.empty());
    cv::Mat mask(image.size(), CV_8UC1, cv::Scalar::all(1));
    mask(cv::Range(0, image.rows / 2), cv::Range(0, image.cols / 2)).setTo(cv::Scalar::all(0));
    cv::gpu::SURF_GPU surf;
    surf.hessianThreshold = hessianThreshold;
    surf.nOctaves = nOctaves;
    surf.nOctaveLayers = nOctaveLayers;
    surf.extended = extended;
    surf.upright = upright;
    surf.keypointsRatio = 0.05f;
    std::vector<cv::KeyPoint> keypoints;
-    cv::Mat descriptors;
+    surf(loadMat(image), loadMat(mask), keypoints);
-    cv::gpu::GpuMat dev_descriptors;
+    cv::SURF surf_gold;
-    cv::gpu::SURF_GPU fdetector; fdetector.extended = false;
+    surf_gold.hessianThreshold = hessianThreshold;
    surf_gold.nOctaves = nOctaves;
    surf_gold.nOctaveLayers = nOctaveLayers;
    surf_gold.extended = extended;
    surf_gold.upright = upright;
-    fdetector(loadMat(image), loadMat(mask), keypoints, dev_descriptors);
+    std::vector<cv::KeyPoint> keypoints_gold;
    surf_gold(image, mask, keypoints_gold);
    ASSERT_KEYPOINTS_EQ(keypoints_gold, keypoints);
 }
-    dev_descriptors.download(descriptors);
+TEST_P(SURF, Descriptor)
 {
    cv::Mat image = readImage("features2d/aloe.png", cv::IMREAD_GRAYSCALE);
    ASSERT_FALSE(image.empty());
    cv::gpu::SURF_GPU surf;
    surf.hessianThreshold = hessianThreshold;
    surf.nOctaves = nOctaves;
    surf.nOctaveLayers = nOctaveLayers;
    surf.extended = extended;
    surf.upright = upright;
    surf.keypointsRatio = 0.05f;
    cv::SURF surf_gold;
    surf_gold.hessianThreshold = hessianThreshold;
    surf_gold.nOctaves = nOctaves;
    surf_gold.nOctaveLayers = nOctaveLayers;
    surf_gold.extended = extended;
    surf_gold.upright = upright;
    std::vector<cv::KeyPoint> keypoints;
    surf_gold(image, cv::noArray(), keypoints);
    cv::gpu::GpuMat descriptors;
    surf(loadMat(image), cv::gpu::GpuMat(), keypoints, descriptors, true);
    cv::Mat descriptors_gold;
    surf_gold(image, cv::noArray(), keypoints, descriptors_gold, true);
    cv::BFMatcher matcher(cv::NORM_L2);
    std::vector<cv::DMatch> matches;
    matcher.match(descriptors_gold, cv::Mat(descriptors), matches);
-    matcher.match(cv::Mat(static_cast<int>(keypoints_gold.size()), 64, CV_32FC1, &descriptors_gold[0]), descriptors, matches);
+    int matchedCount = getMatchedPointsCount(keypoints, keypoints, matches);
    double matchedRatio = static_cast<double>(matchedCount) / keypoints.size();
-    int validCount = getValidMatchesCount(keypoints_gold, keypoints, matches);
+    EXPECT_GT(matchedRatio, 0.35);
 }
-    double validRatio = (double) validCount / matches.size();
+INSTANTIATE_TEST_CASE_P(GPU_Features2D, SURF, testing::Combine(
    ALL_DEVICES,
    testing::Values(SURF_HessianThreshold(100.0), SURF_HessianThreshold(500.0), SURF_HessianThreshold(1000.0)),
    testing::Values(SURF_Octaves(3), SURF_Octaves(4)),
    testing::Values(SURF_OctaveLayers(2), SURF_OctaveLayers(3)),
    testing::Values(SURF_Extended(false), SURF_Extended(true)),
    testing::Values(SURF_Upright(false), SURF_Upright(true))));
-    EXPECT_GT(validRatio, 0.5);
+/////////////////////////////////////////////////////////////////////////////////////////////////
 // FAST
 IMPLEMENT_PARAM_CLASS(FAST_Threshold, int)
 IMPLEMENT_PARAM_CLASS(FAST_NonmaxSupression, bool)
 PARAM_TEST_CASE(FAST, cv::gpu::DeviceInfo, FAST_Threshold, FAST_NonmaxSupression)
 {
    cv::gpu::DeviceInfo devInfo;
    int threshold;
    bool nonmaxSupression;
    virtual void SetUp()
    {
        devInfo = GET_PARAM(0);
        threshold = GET_PARAM(1);
        nonmaxSupression = GET_PARAM(2);
        cv::gpu::setDevice(devInfo.deviceID());
    }
 };
 TEST_P(FAST, Accuracy)
 {
    cv::Mat image = readImage("features2d/aloe.png", cv::IMREAD_GRAYSCALE);
    ASSERT_FALSE(image.empty());
    cv::gpu::FAST_GPU fast(threshold);
    fast.nonmaxSupression = nonmaxSupression;
    std::vector<cv::KeyPoint> keypoints;
    fast(loadMat(image), cv::gpu::GpuMat(), keypoints);
    std::vector<cv::KeyPoint> keypoints_gold;
    cv::FAST(image, keypoints_gold, threshold, nonmaxSupression);
    ASSERT_KEYPOINTS_EQ(keypoints_gold, keypoints);
 }
-INSTANTIATE_TEST_CASE_P(Features2D, SURF, DEVICES(cv::gpu::GLOBAL_ATOMICS));
+INSTANTIATE_TEST_CASE_P(GPU_Features2D, FAST, testing::Combine(
    ALL_DEVICES,
    testing::Values(FAST_Threshold(25), FAST_Threshold(50)),
    testing::Values(FAST_NonmaxSupression(false), FAST_NonmaxSupression(true))));
 /////////////////////////////////////////////////////////////////////////////////////////////////
 // ORB
 IMPLEMENT_PARAM_CLASS(ORB_FeaturesCount, int)
 IMPLEMENT_PARAM_CLASS(ORB_ScaleFactor, float)
 IMPLEMENT_PARAM_CLASS(ORB_LevelsCount, int)
 IMPLEMENT_PARAM_CLASS(ORB_EdgeThreshold, int)
 IMPLEMENT_PARAM_CLASS(ORB_firstLevel, int)
 IMPLEMENT_PARAM_CLASS(ORB_WTA_K, int)
 IMPLEMENT_PARAM_CLASS(ORB_PatchSize, int)
 IMPLEMENT_PARAM_CLASS(ORB_BlurForDescriptor, bool)
 CV_ENUM(ORB_ScoreType, cv::ORB::HARRIS_SCORE, cv::ORB::FAST_SCORE)
 PARAM_TEST_CASE(ORB, cv::gpu::DeviceInfo, ORB_FeaturesCount, ORB_ScaleFactor, ORB_LevelsCount, ORB_EdgeThreshold, ORB_firstLevel, ORB_WTA_K, ORB_ScoreType, ORB_PatchSize, ORB_BlurForDescriptor)
 {
    cv::gpu::DeviceInfo devInfo;
    int nFeatures;
    float scaleFactor;
    int nLevels;
    int edgeThreshold;
    int firstLevel;
    int WTA_K;
    int scoreType;
    int patchSize;
    bool blurForDescriptor;
    virtual void SetUp()
    {
        devInfo = GET_PARAM(0);
        nFeatures = GET_PARAM(1);
        scaleFactor = GET_PARAM(2);
        nLevels = GET_PARAM(3);
        edgeThreshold = GET_PARAM(4);
        firstLevel = GET_PARAM(5);
        WTA_K = GET_PARAM(6);
        scoreType = GET_PARAM(7);
        patchSize = GET_PARAM(8);
        blurForDescriptor = GET_PARAM(9);
        cv::gpu::setDevice(devInfo.deviceID());
    }
 };
 TEST_P(ORB, Accuracy)
 {
    cv::Mat image = readImage("features2d/aloe.png", cv::IMREAD_GRAYSCALE);
    ASSERT_FALSE(image.empty());
    cv::Mat mask(image.size(), CV_8UC1, cv::Scalar::all(1));
    mask(cv::Range(0, image.rows / 2), cv::Range(0, image.cols / 2)).setTo(cv::Scalar::all(0));
    cv::gpu::ORB_GPU orb(nFeatures, scaleFactor, nLevels, edgeThreshold, firstLevel, WTA_K, scoreType, patchSize);
    orb.blurForDescriptor = blurForDescriptor;
    std::vector<cv::KeyPoint> keypoints;
    cv::gpu::GpuMat descriptors;
    orb(loadMat(image), loadMat(mask), keypoints, descriptors);
    cv::ORB orb_gold(nFeatures, scaleFactor, nLevels, edgeThreshold, firstLevel, WTA_K, scoreType, patchSize);
    std::vector<cv::KeyPoint> keypoints_gold;
    cv::Mat descriptors_gold;
    orb_gold(image, mask, keypoints_gold, descriptors_gold);
    cv::BFMatcher matcher(cv::NORM_HAMMING);
    std::vector<cv::DMatch> matches;
    matcher.match(descriptors_gold, cv::Mat(descriptors), matches);
    int matchedCount = getMatchedPointsCount(keypoints_gold, keypoints, matches);
    double matchedRatio = static_cast<double>(matchedCount) / keypoints.size();
    EXPECT_GT(matchedRatio, 0.35);
 }
 INSTANTIATE_TEST_CASE_P(GPU_Features2D, ORB,  testing::Combine(
    ALL_DEVICES,
    testing::Values(ORB_FeaturesCount(1000)),
    testing::Values(ORB_ScaleFactor(1.2f)),
    testing::Values(ORB_LevelsCount(4), ORB_LevelsCount(8)),
    testing::Values(ORB_EdgeThreshold(31)),
    testing::Values(ORB_firstLevel(0), ORB_firstLevel(2)),
    testing::Values(ORB_WTA_K(2), ORB_WTA_K(3), ORB_WTA_K(4)),
    testing::Values(ORB_ScoreType(cv::ORB::HARRIS_SCORE)),
    testing::Values(ORB_PatchSize(31), ORB_PatchSize(29)),
    testing::Values(ORB_BlurForDescriptor(false), ORB_BlurForDescriptor(true))));
 /////////////////////////////////////////////////////////////////////////////////////////////////
 // BruteForceMatcher
-PARAM_TEST_CASE(BruteForceMatcher, cv::gpu::DeviceInfo, DistType, int)
+CV_ENUM(DistType, cv::gpu::BruteForceMatcher_GPU_base::L1Dist, cv::gpu::BruteForceMatcher_GPU_base::L2Dist, cv::gpu::BruteForceMatcher_GPU_base::HammingDist)
 IMPLEMENT_PARAM_CLASS(DescriptorSize, int)
 PARAM_TEST_CASE(BruteForceMatcher, cv::gpu::DeviceInfo, DistType, DescriptorSize)
 {
    cv::gpu::DeviceInfo devInfo;
    cv::gpu::BruteForceMatcher_GPU_base::DistType distType;
@ -212,10 +462,9 @@ PARAM_TEST_CASE(BruteForceMatcher, cv::gpu::DeviceInfo, DistType, int)
 TEST_P(BruteForceMatcher, Match)
 {
    std::vector<cv::DMatch> matches;
    cv::gpu::BruteForceMatcher_GPU_base matcher(distType);
    std::vector<cv::DMatch> matches;
    matcher.match(loadMat(query), loadMat(train), matches);
    ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
@ -234,17 +483,13 @@ TEST_P(BruteForceMatcher, Match)
 TEST_P(BruteForceMatcher, MatchAdd)
 {
    std::vector<cv::DMatch> matches;
    bool isMaskSupported;
    cv::gpu::BruteForceMatcher_GPU_base matcher(distType);
    cv::gpu::GpuMat d_train(train);
    // make add() twice to test such case
-    matcher.add(std::vector<cv::gpu::GpuMat>(1, d_train.rowRange(0, train.rows/2)));
+    matcher.add(std::vector<cv::gpu::GpuMat>(1, d_train.rowRange(0, train.rows / 2)));
-    matcher.add(std::vector<cv::gpu::GpuMat>(1, d_train.rowRange(train.rows/2, train.rows)));
+    matcher.add(std::vector<cv::gpu::GpuMat>(1, d_train.rowRange(train.rows / 2, train.rows)));
    // prepare masks (make first nearest match illegal)
    std::vector<cv::gpu::GpuMat> masks(2);
@ -255,28 +500,26 @@ TEST_P(BruteForceMatcher, MatchAdd)
            masks[mi].col(di * countFactor).setTo(cv::Scalar::all(0));
    }
    std::vector<cv::DMatch> matches;
    matcher.match(cv::gpu::GpuMat(query), matches, masks);
    isMaskSupported = matcher.isMaskSupported();
    ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
    int badCount = 0;
    int shift = matcher.isMaskSupported() ? 1 : 0;
    for (size_t i = 0; i < matches.size(); i++)
    {
        cv::DMatch match = matches[i];
-        int shift = isMaskSupported ? 1 : 0;
+
        if ((int)i < queryDescCount / 2)
        {
-            if ((int)i < queryDescCount / 2)
+            if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor + shift) || (match.imgIdx != 0))
-            {
+                badCount++;
-                if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor + shift) || (match.imgIdx != 0))
+        }
-                    badCount++;
+        else
-            }
+        {
-            else
+            if ((match.queryIdx != (int)i) || (match.trainIdx != ((int)i - queryDescCount / 2) * countFactor + shift) || (match.imgIdx != 1))
-            {
+                badCount++;
                if ((match.queryIdx != (int)i) || (match.trainIdx != ((int)i - queryDescCount / 2) * countFactor + shift) || (match.imgIdx != 1))
                    badCount++;
            }
        }
    }
@ -287,9 +530,9 @@ TEST_P(BruteForceMatcher, KnnMatch2)
 {
    const int knn = 2;
    std::vector< std::vector<cv::DMatch> > matches;
    cv::gpu::BruteForceMatcher_GPU_base matcher(distType);
    std::vector< std::vector<cv::DMatch> > matches;
    matcher.knnMatch(loadMat(query), loadMat(train), matches, knn);
    ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
@ -317,11 +560,11 @@ TEST_P(BruteForceMatcher, KnnMatch2)
 TEST_P(BruteForceMatcher, KnnMatch3)
 {
    cv::gpu::BruteForceMatcher_GPU_base matcher(distType);
    const int knn = 3;
    std::vector< std::vector<cv::DMatch> > matches;
    cv::gpu::BruteForceMatcher_GPU_base matcher(distType);
    matcher.knnMatch(loadMat(query), loadMat(train), matches, knn);
    ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
@ -350,9 +593,6 @@ TEST_P(BruteForceMatcher, KnnMatch3)
 TEST_P(BruteForceMatcher, KnnMatchAdd2)
 {
    const int knn = 2;
    std::vector< std::vector<cv::DMatch> > matches;
    bool isMaskSupported;
    cv::gpu::BruteForceMatcher_GPU_base matcher(distType);
@ -371,14 +611,14 @@ TEST_P(BruteForceMatcher, KnnMatchAdd2)
            masks[mi].col(di * countFactor).setTo(cv::Scalar::all(0));
    }
-    matcher.knnMatch(cv::gpu::GpuMat(query), matches, knn, masks);
+    std::vector< std::vector<cv::DMatch> > matches;
-    isMaskSupported = matcher.isMaskSupported();
+    matcher.knnMatch(cv::gpu::GpuMat(query), matches, knn, masks);
    ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
    int badCount = 0;
-    int shift = isMaskSupported ? 1 : 0;
+    int shift = matcher.isMaskSupported() ? 1 : 0;
    for (size_t i = 0; i < matches.size(); i++)
    {
        if ((int)matches[i].size() != knn)
@ -412,9 +652,6 @@ TEST_P(BruteForceMatcher, KnnMatchAdd2)
 TEST_P(BruteForceMatcher, KnnMatchAdd3)
 {
    const int knn = 3;
    std::vector< std::vector<cv::DMatch> > matches;
    bool isMaskSupported;
    cv::gpu::BruteForceMatcher_GPU_base matcher(distType);
@ -433,14 +670,13 @@ TEST_P(BruteForceMatcher, KnnMatchAdd3)
            masks[mi].col(di * countFactor).setTo(cv::Scalar::all(0));
    }
    std::vector< std::vector<cv::DMatch> > matches;
    matcher.knnMatch(cv::gpu::GpuMat(query), matches, knn, masks);
    isMaskSupported = matcher.isMaskSupported();
    ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
    int badCount = 0;
-    int shift = isMaskSupported ? 1 : 0;
+    int shift = matcher.isMaskSupported() ? 1 : 0;
    for (size_t i = 0; i < matches.size(); i++)
    {
        if ((int)matches[i].size() != knn)
@ -473,16 +709,11 @@ TEST_P(BruteForceMatcher, KnnMatchAdd3)
 TEST_P(BruteForceMatcher, RadiusMatch)
 {
    if (!supportFeature(devInfo, cv::gpu::SHARED_ATOMICS))
        return;
    const float radius = 1.f / countFactor;
    std::vector< std::vector<cv::DMatch> > matches;
    cv::gpu::BruteForceMatcher_GPU_base matcher(distType);
    std::vector< std::vector<cv::DMatch> > matches;
    matcher.radiusMatch(loadMat(query), loadMat(train), matches, radius);
    ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
@ -505,16 +736,9 @@ TEST_P(BruteForceMatcher, RadiusMatch)
 TEST_P(BruteForceMatcher, RadiusMatchAdd)
 {
-    if (!supportFeature(devInfo, cv::gpu::SHARED_ATOMICS))
+    const int n = 3;
        return;
    int n = 3;
    const float radius = 1.f / countFactor * n;
    std::vector< std::vector<cv::DMatch> > matches;
    bool isMaskSupported;
    cv::gpu::BruteForceMatcher_GPU_base matcher(distType);
    cv::gpu::GpuMat d_train(train);
@ -532,15 +756,14 @@ TEST_P(BruteForceMatcher, RadiusMatchAdd)
            masks[mi].col(di * countFactor).setTo(cv::Scalar::all(0));
    }
    std::vector< std::vector<cv::DMatch> > matches;
    matcher.radiusMatch(cv::gpu::GpuMat(query), matches, radius, masks);
    isMaskSupported = matcher.isMaskSupported();
    ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
    int badCount = 0;
-    int shift = isMaskSupported ? 1 : 0;
+    int shift = matcher.isMaskSupported() ? 1 : 0;
-    int needMatchCount = isMaskSupported ? n-1 : n;
+    int needMatchCount = matcher.isMaskSupported() ? n-1 : n;
    for (size_t i = 0; i < matches.size(); i++)
    {
        if ((int)matches[i].size() != needMatchCount)
@ -571,141 +794,9 @@ TEST_P(BruteForceMatcher, RadiusMatchAdd)
    ASSERT_EQ(0, badCount);
 }
-INSTANTIATE_TEST_CASE_P(Features2D, BruteForceMatcher, Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Features2D, BruteForceMatcher, testing::Combine(
-                        ALL_DEVICES,
+    ALL_DEVICES,
-                        Values(cv::gpu::BruteForceMatcher_GPU_base::L1Dist, cv::gpu::BruteForceMatcher_GPU_base::L2Dist),
+    testing::Values(DistType(cv::gpu::BruteForceMatcher_GPU_base::L1Dist), DistType(cv::gpu::BruteForceMatcher_GPU_base::L2Dist)),
-                        Values(57, 64, 83, 128, 179, 256, 304)));
+    testing::Values(DescriptorSize(57), DescriptorSize(64), DescriptorSize(83), DescriptorSize(128), DescriptorSize(179), DescriptorSize(256), DescriptorSize(304))));
 /////////////////////////////////////////////////////////////////////////////////////////////////
 // FAST
 struct FAST : TestWithParam<cv::gpu::DeviceInfo>
 {
    cv::gpu::DeviceInfo devInfo;
    cv::Mat image;
    int threshold;
    std::vector<cv::KeyPoint> keypoints_gold;
    virtual void SetUp()
    {
        devInfo = GetParam();
        cv::gpu::setDevice(devInfo.deviceID());
        image = readImage("features2d/aloe.png", CV_LOAD_IMAGE_GRAYSCALE);
        ASSERT_FALSE(image.empty());
        threshold = 30;
        cv::FAST(image, keypoints_gold, threshold);
    }
 };
 struct HashEq
 {
    size_t hash;
    inline HashEq(size_t hash_) : hash(hash_) {}
    inline bool operator ()(const cv::KeyPoint& kp) const
    {
        return kp.hash() == hash;
    }
 };
 struct KeyPointCompare
 {
    inline bool operator ()(const cv::KeyPoint& kp1, const cv::KeyPoint& kp2) const
    {
        return kp1.pt.y < kp2.pt.y || (kp1.pt.y == kp2.pt.y && kp1.pt.x < kp2.pt.x);
    }
 };
 TEST_P(FAST, Accuracy)
 {
    std::vector<cv::KeyPoint> keypoints;
    cv::gpu::FAST_GPU fastGPU(threshold);
    fastGPU(cv::gpu::GpuMat(image), cv::gpu::GpuMat(), keypoints);
    ASSERT_EQ(keypoints.size(), keypoints_gold.size());
    std::sort(keypoints.begin(), keypoints.end(), KeyPointCompare());
    for (size_t i = 0; i < keypoints_gold.size(); ++i)
    {
        const cv::KeyPoint& kp1 = keypoints[i];
        const cv::KeyPoint& kp2 = keypoints_gold[i];
        size_t h1 = kp1.hash();
        size_t h2 = kp2.hash();
        ASSERT_EQ(h1, h2);
    }
 }
 INSTANTIATE_TEST_CASE_P(Features2D, FAST, DEVICES(cv::gpu::GLOBAL_ATOMICS));
 /////////////////////////////////////////////////////////////////////////////////////////////////
 // ORB
 struct ORB : TestWithParam<cv::gpu::DeviceInfo>
 {
    cv::gpu::DeviceInfo devInfo;
    cv::Mat image;
    cv::Mat mask;
    int npoints;
    std::vector<cv::KeyPoint> keypoints_gold;
    cv::Mat descriptors_gold;
    virtual void SetUp()
    {
        devInfo = GetParam();
        cv::gpu::setDevice(devInfo.deviceID());
        image = readImage("features2d/aloe.png", CV_LOAD_IMAGE_GRAYSCALE);
        ASSERT_FALSE(image.empty());
        mask = cv::Mat(image.size(), CV_8UC1, cv::Scalar::all(1));
        mask(cv::Range(0, image.rows / 2), cv::Range(0, image.cols / 2)).setTo(cv::Scalar::all(0));
        npoints = 1000;
        cv::ORB orbCPU(npoints);
        orbCPU(image, mask, keypoints_gold, descriptors_gold);
    }
 };
 TEST_P(ORB, Accuracy)
 {
    std::vector<cv::KeyPoint> keypoints;
    cv::Mat descriptors;
    cv::gpu::ORB_GPU orbGPU(npoints);
    cv::gpu::GpuMat d_descriptors;
    orbGPU(cv::gpu::GpuMat(image), cv::gpu::GpuMat(mask), keypoints, d_descriptors);
    d_descriptors.download(descriptors);
    cv::BFMatcher matcher(cv::NORM_HAMMING);
    std::vector<cv::DMatch> matches;
    matcher.match(descriptors_gold, descriptors, matches);
    int count = getValidMatchesCount(keypoints_gold, keypoints, matches);
    double ratio = (double) count / matches.size();
    ASSERT_GE(ratio, 0.65);
 }
 INSTANTIATE_TEST_CASE_P(Features2D, ORB, DEVICES(cv::gpu::GLOBAL_ATOMICS));
-#endif // HAVE_CUDA
+} // namespace
--- a/modules/gpu/test/test_filters.cpp
+++ b/modules/gpu/test/test_filters.cpp
--- a/modules/gpu/test/test_imgproc.cpp
+++ b/modules/gpu/test/test_imgproc.cpp
@ -2198,7 +2198,7 @@ INSTANTIATE_TEST_CASE_P(ImgProc, EqualizeHist, ALL_DEVICES);
 ///////////////////////////////////////////////////////////////////////////////////////////////////////
 // cornerHarris
-PARAM_TEST_CASE(CornerHarris, cv::gpu::DeviceInfo, MatType, Border, int, int)
+PARAM_TEST_CASE(CornerHarris, cv::gpu::DeviceInfo, MatType, BorderType, int, int)
 {
    cv::gpu::DeviceInfo devInfo;
    int type;
@ -2257,7 +2257,7 @@ INSTANTIATE_TEST_CASE_P(ImgProc, CornerHarris, Combine(
 ///////////////////////////////////////////////////////////////////////////////////////////////////////
 // cornerMinEigen
-PARAM_TEST_CASE(CornerMinEigen, cv::gpu::DeviceInfo, MatType, Border, int, int)
+PARAM_TEST_CASE(CornerMinEigen, cv::gpu::DeviceInfo, MatType, BorderType, int, int)
 {
    cv::gpu::DeviceInfo devInfo;
    int type;
@ -2572,6 +2572,8 @@ INSTANTIATE_TEST_CASE_P(ImgProc, MeanShiftSegmentation, Combine(
 ////////////////////////////////////////////////////////////////////////////////
 // matchTemplate
 CV_ENUM(TemplateMethod, cv::TM_SQDIFF, cv::TM_SQDIFF_NORMED, cv::TM_CCORR, cv::TM_CCORR_NORMED, cv::TM_CCOEFF, cv::TM_CCOEFF_NORMED)
 PARAM_TEST_CASE(MatchTemplate8U, cv::gpu::DeviceInfo, int, TemplateMethod)
 {
    cv::gpu::DeviceInfo devInfo;
@ -2776,6 +2778,8 @@ INSTANTIATE_TEST_CASE_P(ImgProc, MatchTemplate_CCOEF_NORMED, Combine(
 ////////////////////////////////////////////////////////////////////////////
 // MulSpectrums
 CV_FLAGS(DftFlags, cv::DFT_INVERSE, cv::DFT_SCALE, cv::DFT_ROWS, cv::DFT_COMPLEX_OUTPUT, cv::DFT_REAL_OUTPUT)
 PARAM_TEST_CASE(MulSpectrums, cv::gpu::DeviceInfo, DftFlags)
 {
    cv::gpu::DeviceInfo devInfo;
--- a/modules/gpu/test/test_remap.cpp
+++ b/modules/gpu/test/test_remap.cpp
@ -70,7 +70,7 @@ namespace
    {
        typedef void (*func_t)(const cv::Mat& src, const cv::Mat& xmap, const cv::Mat& ymap, cv::Mat& dst, int borderType, cv::Scalar borderVal);
-        static const func_t nearest_funcs[] = 
+        static const func_t nearest_funcs[] =
        {
            remapImpl<unsigned char, NearestInterpolator>,
            remapImpl<signed char, NearestInterpolator>,
@ -80,7 +80,7 @@ namespace
            remapImpl<float, NearestInterpolator>
        };
-        static const func_t linear_funcs[] = 
+        static const func_t linear_funcs[] =
        {
            remapImpl<unsigned char, LinearInterpolator>,
            remapImpl<signed char, LinearInterpolator>,
@ -90,7 +90,7 @@ namespace
            remapImpl<float, LinearInterpolator>
        };
-        static const func_t cubic_funcs[] = 
+        static const func_t cubic_funcs[] =
        {
            remapImpl<unsigned char, CubicInterpolator>,
            remapImpl<signed char, CubicInterpolator>,
@ -109,7 +109,7 @@ namespace
 ///////////////////////////////////////////////////////////////////
 // Test
-PARAM_TEST_CASE(Remap, cv::gpu::DeviceInfo, cv::Size, MatType, Interpolation, Border, UseRoi)
+PARAM_TEST_CASE(Remap, cv::gpu::DeviceInfo, cv::Size, MatType, Interpolation, BorderType, UseRoi)
 {
    cv::gpu::DeviceInfo devInfo;
    cv::Size size;
@ -171,7 +171,7 @@ INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Remap, testing::Combine(
    DIFFERENT_SIZES,
    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
    testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)),
-    testing::Values(Border(cv::BORDER_REFLECT101), Border(cv::BORDER_REPLICATE), Border(cv::BORDER_CONSTANT), Border(cv::BORDER_REFLECT), Border(cv::BORDER_WRAP)),
+    testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT), BorderType(cv::BORDER_WRAP)),
    WHOLE_SUBMAT));
 #endif // HAVE_CUDA
--- a/modules/gpu/test/test_threshold.cpp
+++ b/modules/gpu/test/test_threshold.cpp
@ -43,6 +43,9 @@
 #ifdef HAVE_CUDA
 CV_ENUM(ThreshOp, cv::THRESH_BINARY, cv::THRESH_BINARY_INV, cv::THRESH_TRUNC, cv::THRESH_TOZERO, cv::THRESH_TOZERO_INV)
 #define ALL_THRESH_OPS testing::Values(ThreshOp(cv::THRESH_BINARY), ThreshOp(cv::THRESH_BINARY_INV), ThreshOp(cv::THRESH_TRUNC), ThreshOp(cv::THRESH_TOZERO), ThreshOp(cv::THRESH_TOZERO_INV))
 PARAM_TEST_CASE(Threshold, cv::gpu::DeviceInfo, cv::Size, MatType, ThreshOp, UseRoi)
 {
    cv::gpu::DeviceInfo devInfo;
--- a/modules/gpu/test/test_warp_affine.cpp
+++ b/modules/gpu/test/test_warp_affine.cpp
@ -175,7 +175,7 @@ namespace
 ///////////////////////////////////////////////////////////////////
 // Test
-PARAM_TEST_CASE(WarpAffine, cv::gpu::DeviceInfo, cv::Size, MatType, Inverse, Interpolation, Border, UseRoi)
+PARAM_TEST_CASE(WarpAffine, cv::gpu::DeviceInfo, cv::Size, MatType, Inverse, Interpolation, BorderType, UseRoi)
 {
    cv::gpu::DeviceInfo devInfo;
    cv::Size size;
@ -225,7 +225,7 @@ INSTANTIATE_TEST_CASE_P(GPU_ImgProc, WarpAffine, testing::Combine(
    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
    DIRECT_INVERSE,
    testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)),
-    testing::Values(Border(cv::BORDER_REFLECT101), Border(cv::BORDER_REPLICATE), Border(cv::BORDER_REFLECT), Border(cv::BORDER_WRAP)),
+    testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_REFLECT), BorderType(cv::BORDER_WRAP)),
    WHOLE_SUBMAT));
 ///////////////////////////////////////////////////////////////////
--- a/modules/gpu/test/test_warp_perspective.cpp
+++ b/modules/gpu/test/test_warp_perspective.cpp
@ -175,7 +175,7 @@ namespace
 ///////////////////////////////////////////////////////////////////
 // Test
-PARAM_TEST_CASE(WarpPerspective, cv::gpu::DeviceInfo, cv::Size, MatType, Inverse, Interpolation, Border, UseRoi)
+PARAM_TEST_CASE(WarpPerspective, cv::gpu::DeviceInfo, cv::Size, MatType, Inverse, Interpolation, BorderType, UseRoi)
 {
    cv::gpu::DeviceInfo devInfo;
    cv::Size size;
@ -225,7 +225,7 @@ INSTANTIATE_TEST_CASE_P(GPU_ImgProc, WarpPerspective, testing::Combine(
    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
    DIRECT_INVERSE,
    testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)),
-    testing::Values(Border(cv::BORDER_REFLECT101), Border(cv::BORDER_REPLICATE), Border(cv::BORDER_REFLECT), Border(cv::BORDER_WRAP)),
+    testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_REFLECT), BorderType(cv::BORDER_WRAP)),
    WHOLE_SUBMAT));
 ///////////////////////////////////////////////////////////////////
--- a/modules/gpu/test/utility.cpp
+++ b/modules/gpu/test/utility.cpp
@ -47,6 +47,9 @@ using namespace cv::gpu;
 using namespace cvtest;
 using namespace testing;
 //////////////////////////////////////////////////////////////////////
 // random generators
 int randomInt(int minVal, int maxVal)
 {
    RNG& rng = TS::ptr()->get_rng();
@ -74,6 +77,9 @@ Mat randomMat(Size size, int type, double minVal, double maxVal)
    return randomMat(TS::ptr()->get_rng(), size, type, minVal, maxVal, false);
 }
 //////////////////////////////////////////////////////////////////////
 // GpuMat create
 cv::gpu::GpuMat createMat(cv::Size size, int type, bool useRoi)
 {
    Size size0 = size;
@ -99,6 +105,30 @@ GpuMat loadMat(const Mat& m, bool useRoi)
    return d_m;
 }
 //////////////////////////////////////////////////////////////////////
 // Image load
 Mat readImage(const string& fileName, int flags)
 {
    return imread(string(cvtest::TS::ptr()->get_data_path()) + fileName, flags);
 }
 Mat readImageType(const string& fname, int type)
 {
    Mat src = readImage(fname, CV_MAT_CN(type) == 1 ? IMREAD_GRAYSCALE : IMREAD_COLOR);
    if (CV_MAT_CN(type) == 4)
    {
        Mat temp;
        cvtColor(src, temp, cv::COLOR_BGR2BGRA);
        swap(src, temp);
    }
    src.convertTo(src, CV_MAT_DEPTH(type));
    return src;
 }
 //////////////////////////////////////////////////////////////////////
 // Gpu devices
 bool supportFeature(const DeviceInfo& info, FeatureSet feature)
 {
    return TargetArchs::builtWith(feature) && info.supports(feature);
@ -150,86 +180,146 @@ vector<DeviceInfo> devices(FeatureSet feature)
    return devs_filtered;
 }
-vector<MatType> types(int depth_start, int depth_end, int cn_start, int cn_end)
+//////////////////////////////////////////////////////////////////////
-{
+// Additional assertion
    vector<MatType> v;
    v.reserve((depth_end - depth_start + 1) * (cn_end - cn_start + 1));
-    for (int depth = depth_start; depth <= depth_end; ++depth)
+Mat getMat(InputArray arr)
 {
    if (arr.kind() == _InputArray::GPU_MAT)
    {
-        for (int cn = cn_start; cn <= cn_end; ++cn)
+        Mat m;
-        {
+        arr.getGpuMat().download(m);
-            v.push_back(CV_MAKETYPE(depth, cn));
+        return m;
        }
    }
-    return v;
+    return arr.getMat();
 }
-const vector<MatType>& all_types()
+double checkNorm(InputArray m1, const InputArray m2)
 {
-    static vector<MatType> v = types(CV_8U, CV_64F, 1, 4);
+    return norm(getMat(m1), getMat(m2), NORM_INF);
    return v;
 }
-Mat readImage(const string& fileName, int flags)
+void minMaxLocGold(const Mat& src, double* minVal_, double* maxVal_, Point* minLoc_, Point* maxLoc_, const Mat& mask)
 {
-    return imread(string(cvtest::TS::ptr()->get_data_path()) + fileName, flags);
+    if (src.depth() != CV_8S)
-}
+    {
        minMaxLoc(src, minVal_, maxVal_, minLoc_, maxLoc_, mask);
        return;
    }
-Mat readImageType(const string& fname, int type)
+    // OpenCV's minMaxLoc doesn't support CV_8S type
-{
+    double minVal = numeric_limits<double>::max();
-    Mat src = readImage(fname, CV_MAT_CN(type) == 1 ? IMREAD_GRAYSCALE : IMREAD_COLOR);
+    Point minLoc(-1, -1);
-    if (CV_MAT_CN(type) == 4)
+
    double maxVal = -numeric_limits<double>::max();
    Point maxLoc(-1, -1);
    for (int y = 0; y < src.rows; ++y)
    {
-        Mat temp;
+        const schar* src_row = src.ptr<signed char>(y);
-        cvtColor(src, temp, cv::COLOR_BGR2BGRA);
+        const uchar* mask_row = mask.empty() ? 0 : mask.ptr<unsigned char>(y);
-        swap(src, temp);
+
        for (int x = 0; x < src.cols; ++x)
        {
            if (!mask_row || mask_row[x])
            {
                schar val = src_row[x];
                if (val < minVal)
                {
                    minVal = val;
                    minLoc = cv::Point(x, y);
                }
                if (val > maxVal)
                {
                    maxVal = val;
                    maxLoc = cv::Point(x, y);
                }
            }
        }
    }
-    src.convertTo(src, CV_MAT_DEPTH(type));
+
-    return src;
+    if (minVal_) *minVal_ = minVal;
    if (maxVal_) *maxVal_ = maxVal;
    if (minLoc_) *minLoc_ = minLoc;
    if (maxLoc_) *maxLoc_ = maxLoc;
 }
 namespace
 {
-    Mat getMat(InputArray arr)
+    template <typename T, typename OutT> string printMatValImpl(const Mat& m, Point p)
    {
-        if (arr.kind() == _InputArray::GPU_MAT)
+        const int cn = m.channels();
        ostringstream ostr;
        ostr << "(";
        p.x /= cn;
        ostr << static_cast<OutT>(m.at<T>(p.y, p.x * cn));
        for (int c = 1; c < m.channels(); ++c)
        {
-            Mat m;
+            ostr << ", " << static_cast<OutT>(m.at<T>(p.y, p.x * cn + c));
            arr.getGpuMat().download(m);
            return m;
        }
        ostr << ")";
        return ostr.str();
    }
    string printMatVal(const Mat& m, Point p)
    {
        typedef string (*func_t)(const Mat& m, Point p);
        static const func_t funcs[] =
        {
            printMatValImpl<uchar, int>, printMatValImpl<schar, int>, printMatValImpl<ushort, int>, printMatValImpl<short, int>,
            printMatValImpl<int, int>, printMatValImpl<float, float>, printMatValImpl<double, double>
        };
-        return arr.getMat();
+        return funcs[m.depth()](m, p);
    }
 }
-void showDiff(InputArray gold_, InputArray actual_, double eps)
+testing::AssertionResult assertMatNear(const char* expr1, const char* expr2, const char* eps_expr, cv::InputArray m1_, cv::InputArray m2_, double eps)
 {
-    Mat gold = getMat(gold_);
+    Mat m1 = getMat(m1_);
-    Mat actual = getMat(actual_);
+    Mat m2 = getMat(m2_);
-    Mat diff;
+    if (m1.size() != m2.size())
-    absdiff(gold, actual, diff);
+    {
-    threshold(diff, diff, eps, 255.0, cv::THRESH_BINARY);
+        return AssertionFailure() << "Matrices \"" << expr1 << "\" and \"" << expr2 << "\" have different sizes : \""
                                  << expr1 << "\" [" << PrintToString(m1.size()) << "] vs \""
                                  << expr2 << "\" [" << PrintToString(m2.size()) << "]";
    }
-    namedWindow("gold", WINDOW_NORMAL);
+    if (m1.type() != m2.type())
-    namedWindow("actual", WINDOW_NORMAL);
+    {
-    namedWindow("diff", WINDOW_NORMAL);
+        return AssertionFailure() << "Matrices \"" << expr1 << "\" and \"" << expr2 << "\" have different types : \""
                                  << expr1 << "\" [" << PrintToString(MatType(m1.type())) << "] vs \""
                                  << expr2 << "\" [" << PrintToString(MatType(m2.type())) << "]";
    }
-    imshow("gold", gold);
+    Mat diff;
-    imshow("actual", actual);
+    absdiff(m1.reshape(1), m2.reshape(1), diff);
    imshow("diff", diff);
-    waitKey();
+    double maxVal = 0.0;
-}
+    Point maxLoc;
    minMaxLocGold(diff, 0, &maxVal, 0, &maxLoc);
-double checkNorm(InputArray m1, const InputArray m2)
+    if (maxVal > eps)
-{
+    {
-    return norm(getMat(m1), getMat(m2), NORM_INF);
+        return AssertionFailure() << "The max difference between matrices \"" << expr1 << "\" and \"" << expr2
                                  << "\" is " << maxVal << " at (" << maxLoc.y << ", " << maxLoc.x / m1.channels() << ")"
                                  << ", which exceeds \"" << eps_expr << "\", where \""
                                  << expr1 << "\" at (" << maxLoc.y << ", " << maxLoc.x / m1.channels() << ") evaluates to " << printMatVal(m1, maxLoc) << ", \""
                                  << expr2 << "\" at (" << maxLoc.y << ", " << maxLoc.x / m1.channels() << ") evaluates to " << printMatVal(m2, maxLoc) << ", \""
                                  << eps_expr << "\" evaluates to " << eps;
    }
    return AssertionSuccess();
 }
 double checkSimilarity(InputArray m1, InputArray m2)
@ -239,6 +329,45 @@ double checkSimilarity(InputArray m1, InputArray m2)
    return std::abs(diff.at<float>(0, 0) - 1.f);
 }
 //////////////////////////////////////////////////////////////////////
 // Helper structs for value-parameterized tests
 vector<MatDepth> depths(int depth_start, int depth_end)
 {
    vector<MatDepth> v;
    v.reserve((depth_end - depth_start + 1));
    for (int depth = depth_start; depth <= depth_end; ++depth)
        v.push_back(depth);
    return v;
 }
 vector<MatType> types(int depth_start, int depth_end, int cn_start, int cn_end)
 {
    vector<MatType> v;
    v.reserve((depth_end - depth_start + 1) * (cn_end - cn_start + 1));
    for (int depth = depth_start; depth <= depth_end; ++depth)
    {
        for (int cn = cn_start; cn <= cn_end; ++cn)
        {
            v.push_back(CV_MAKETYPE(depth, cn));
        }
    }
    return v;
 }
 const vector<MatType>& all_types()
 {
    static vector<MatType> v = types(CV_8U, CV_64F, 1, 4);
    return v;
 }
 void cv::gpu::PrintTo(const DeviceInfo& info, ostream* os)
 {
    (*os) << info.name();
@ -259,3 +388,23 @@ void PrintTo(const Inverse& inverse, std::ostream* os)
    else
        (*os) << "direct";
 }
 void showDiff(InputArray gold_, InputArray actual_, double eps)
 {
    Mat gold = getMat(gold_);
    Mat actual = getMat(actual_);
    Mat diff;
    absdiff(gold, actual, diff);
    threshold(diff, diff, eps, 255.0, cv::THRESH_BINARY);
    namedWindow("gold", WINDOW_NORMAL);
    namedWindow("actual", WINDOW_NORMAL);
    namedWindow("diff", WINDOW_NORMAL);
    imshow("gold", gold);
    imshow("actual", actual);
    imshow("diff", diff);
    waitKey();
 }
--- a/modules/gpu/test/utility.hpp
+++ b/modules/gpu/test/utility.hpp
@ -42,37 +42,66 @@
 #ifndef __OPENCV_TEST_UTILITY_HPP__
 #define __OPENCV_TEST_UTILITY_HPP__
 #include <vector>
 #include <string>
 #include "opencv2/core/core.hpp"
 #include "opencv2/highgui/highgui.hpp"
 #include "opencv2/gpu/gpu.hpp"
 #include "opencv2/ts/ts.hpp"
 #include "opencv2/ts/ts_perf.hpp"
 //////////////////////////////////////////////////////////////////////
 // random generators
 int randomInt(int minVal, int maxVal);
 double randomDouble(double minVal, double maxVal);
 cv::Size randomSize(int minVal, int maxVal);
 cv::Scalar randomScalar(double minVal, double maxVal);
 cv::Mat randomMat(cv::Size size, int type, double minVal = 0.0, double maxVal = 255.0);
 //////////////////////////////////////////////////////////////////////
 // GpuMat create
 cv::gpu::GpuMat createMat(cv::Size size, int type, bool useRoi = false);
 cv::gpu::GpuMat loadMat(const cv::Mat& m, bool useRoi = false);
-void showDiff(cv::InputArray gold, cv::InputArray actual, double eps);
+//////////////////////////////////////////////////////////////////////
 // Image load
 //! read image from testdata folder
 cv::Mat readImage(const std::string& fileName, int flags = cv::IMREAD_COLOR);
 //! read image from testdata folder and convert it to specified type
 cv::Mat readImageType(const std::string& fname, int type);
 //////////////////////////////////////////////////////////////////////
 // Gpu devices
 //! return true if device supports specified feature and gpu module was built with support the feature.
 bool supportFeature(const cv::gpu::DeviceInfo& info, cv::gpu::FeatureSet feature);
 //! return all devices compatible with current gpu module build.
 const std::vector<cv::gpu::DeviceInfo>& devices();
 //! return all devices compatible with current gpu module build which support specified feature.
 std::vector<cv::gpu::DeviceInfo> devices(cv::gpu::FeatureSet feature);
-//! read image from testdata folder.
+#define ALL_DEVICES testing::ValuesIn(devices())
-cv::Mat readImage(const std::string& fileName, int flags = cv::IMREAD_COLOR);
+#define DEVICES(feature) testing::ValuesIn(devices(feature))
-cv::Mat readImageType(const std::string& fname, int type);
+
 //////////////////////////////////////////////////////////////////////
 // Additional assertion
 cv::Mat getMat(cv::InputArray arr);
 double checkNorm(cv::InputArray m1, cv::InputArray m2);
-#define EXPECT_MAT_NEAR(mat1, mat2, eps) \
+void minMaxLocGold(const cv::Mat& src, double* minVal_, double* maxVal_ = 0, cv::Point* minLoc_ = 0, cv::Point* maxLoc_ = 0, const cv::Mat& mask = cv::Mat());
-    { \
+
-        ASSERT_EQ(mat1.type(), mat2.type()); \
+testing::AssertionResult assertMatNear(const char* expr1, const char* expr2, const char* eps_expr, cv::InputArray m1, cv::InputArray m2, double eps);
-        ASSERT_EQ(mat1.size(), mat2.size()); \
+
-        EXPECT_LE(checkNorm(mat1, mat2), eps); \
+#define EXPECT_MAT_NEAR(m1, m2, eps) EXPECT_PRED_FORMAT3(assertMatNear, m1, m2, eps)
-    }
+#define ASSERT_MAT_NEAR(m1, m2, eps) ASSERT_PRED_FORMAT3(assertMatNear, m1, m2, eps)
 #define EXPECT_SCALAR_NEAR(s1, s2, eps) \
    { \
@ -81,6 +110,37 @@ double checkNorm(cv::InputArray m1, cv::InputArray m2);
        EXPECT_NEAR(s1[2], s2[2], eps); \
        EXPECT_NEAR(s1[3], s2[3], eps); \
    }
 #define ASSERT_SCALAR_NEAR(s1, s2, eps) \
    { \
        ASSERT_NEAR(s1[0], s2[0], eps); \
        ASSERT_NEAR(s1[1], s2[1], eps); \
        ASSERT_NEAR(s1[2], s2[2], eps); \
        ASSERT_NEAR(s1[3], s2[3], eps); \
    }
 #define EXPECT_POINT2_NEAR(p1, p2, eps) \
    { \
        EXPECT_NEAR(p1.x, p2.x, eps); \
        EXPECT_NEAR(p1.y, p2.y, eps); \
    }
 #define ASSERT_POINT2_NEAR(p1, p2, eps) \
    { \
        ASSERT_NEAR(p1.x, p2.x, eps); \
        ASSERT_NEAR(p1.y, p2.y, eps); \
    }
 #define EXPECT_POINT3_NEAR(p1, p2, eps) \
    { \
        EXPECT_NEAR(p1.x, p2.x, eps); \
        EXPECT_NEAR(p1.y, p2.y, eps); \
        EXPECT_NEAR(p1.z, p2.z, eps); \
    }
 #define ASSERT_POINT3_NEAR(p1, p2, eps) \
    { \
        ASSERT_NEAR(p1.x, p2.x, eps); \
        ASSERT_NEAR(p1.y, p2.y, eps); \
        ASSERT_NEAR(p1.z, p2.z, eps); \
    }
 double checkSimilarity(cv::InputArray m1, cv::InputArray m2);
@ -90,13 +150,63 @@ double checkSimilarity(cv::InputArray m1, cv::InputArray m2);
        ASSERT_EQ(mat1.size(), mat2.size()); \
        EXPECT_LE(checkSimilarity(mat1, mat2), eps); \
    }
 #define ASSERT_MAT_SIMILAR(mat1, mat2, eps) \
    { \
        ASSERT_EQ(mat1.type(), mat2.type()); \
        ASSERT_EQ(mat1.size(), mat2.size()); \
        ASSERT_LE(checkSimilarity(mat1, mat2), eps); \
    }
 //////////////////////////////////////////////////////////////////////
 // Helper structs for value-parameterized tests
 #define PARAM_TEST_CASE(name, ...) struct name : testing::TestWithParam< std::tr1::tuple< __VA_ARGS__ > >
 #define GET_PARAM(k) std::tr1::get< k >(GetParam())
 namespace cv { namespace gpu
 {
    void PrintTo(const DeviceInfo& info, std::ostream* os);
 }}
 #define DIFFERENT_SIZES testing::Values(cv::Size(128, 128), cv::Size(113, 113))
 // Depth
 using perf::MatDepth;
 //! return vector with depths from specified range.
 std::vector<MatDepth> depths(int depth_start, int depth_end);
 #define ALL_DEPTH testing::Values(MatDepth(CV_8U), MatDepth(CV_8S), MatDepth(CV_16U), MatDepth(CV_16S), MatDepth(CV_32S), MatDepth(CV_32F), MatDepth(CV_64F))
 #define DEPTHS(depth_start, depth_end) testing::ValuesIn(depths(depth_start, depth_end))
 #define DEPTH_PAIRS testing::Values(std::make_pair(MatDepth(CV_8U), MatDepth(CV_8U)),   \
                                    std::make_pair(MatDepth(CV_8U), MatDepth(CV_16U)),  \
                                    std::make_pair(MatDepth(CV_8U), MatDepth(CV_16S)),  \
                                    std::make_pair(MatDepth(CV_8U), MatDepth(CV_32S)),  \
                                    std::make_pair(MatDepth(CV_8U), MatDepth(CV_32F)),  \
                                    std::make_pair(MatDepth(CV_8U), MatDepth(CV_64F)),  \
                                                                                        \
                                    std::make_pair(MatDepth(CV_16U), MatDepth(CV_16U)), \
                                    std::make_pair(MatDepth(CV_16U), MatDepth(CV_32S)), \
                                    std::make_pair(MatDepth(CV_16U), MatDepth(CV_32F)), \
                                    std::make_pair(MatDepth(CV_16U), MatDepth(CV_64F)), \
                                                                                        \
                                    std::make_pair(MatDepth(CV_16S), MatDepth(CV_16S)), \
                                    std::make_pair(MatDepth(CV_16S), MatDepth(CV_32S)), \
                                    std::make_pair(MatDepth(CV_16S), MatDepth(CV_32F)), \
                                    std::make_pair(MatDepth(CV_16S), MatDepth(CV_64F)), \
                                                                                        \
                                    std::make_pair(MatDepth(CV_32S), MatDepth(CV_32S)), \
                                    std::make_pair(MatDepth(CV_32S), MatDepth(CV_32F)), \
                                    std::make_pair(MatDepth(CV_32S), MatDepth(CV_64F)), \
                                                                                        \
                                    std::make_pair(MatDepth(CV_32F), MatDepth(CV_32F)), \
                                    std::make_pair(MatDepth(CV_32F), MatDepth(CV_64F)), \
                                                                                        \
                                    std::make_pair(MatDepth(CV_64F), MatDepth(CV_64F)))
 // Type
 using perf::MatType;
 //! return vector with types from specified range.
@ -105,6 +215,11 @@ std::vector<MatType> types(int depth_start, int depth_end, int cn_start, int cn_
 //! return vector with all types (depth: CV_8U-CV_64F, channels: 1-4).
 const std::vector<MatType>& all_types();
 #define ALL_TYPES testing::ValuesIn(all_types())
 #define TYPES(depth_start, depth_end, cn_start, cn_end) testing::ValuesIn(types(depth_start, depth_end, cn_start, cn_end))
 // ROI
 class UseRoi
 {
 public:
@ -115,11 +230,15 @@ public:
 private:
    bool val_;
 };
 void PrintTo(const UseRoi& useRoi, std::ostream* os);
 #define WHOLE testing::Values(UseRoi(false))
 #define SUBMAT testing::Values(UseRoi(true))
 #define WHOLE_SUBMAT testing::Values(UseRoi(false), UseRoi(true))
 // Direct/Inverse
 class Inverse
 {
 public:
@ -133,75 +252,41 @@ private:
 void PrintTo(const Inverse& useRoi, std::ostream* os);
 #define DIRECT_INVERSE testing::Values(Inverse(false), Inverse(true))
-CV_ENUM(CmpCode, cv::CMP_EQ, cv::CMP_GT, cv::CMP_GE, cv::CMP_LT, cv::CMP_LE, cv::CMP_NE)
+// Param class
 #define ALL_CMP_CODES testing::Values(CmpCode(cv::CMP_EQ), CmpCode(cv::CMP_NE), CmpCode(cv::CMP_GT), CmpCode(cv::CMP_GE), CmpCode(cv::CMP_LT), CmpCode(cv::CMP_LE))
 CV_ENUM(NormCode, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2, cv::NORM_TYPE_MASK, cv::NORM_RELATIVE, cv::NORM_MINMAX)
 enum {FLIP_BOTH = 0, FLIP_X = 1, FLIP_Y = -1};
 CV_ENUM(FlipCode, FLIP_BOTH, FLIP_X, FLIP_Y)
 #define ALL_FLIP_CODES testing::Values(FlipCode(FLIP_BOTH), FlipCode(FLIP_X), FlipCode(FLIP_Y))
-CV_ENUM(ReduceCode, CV_REDUCE_SUM, CV_REDUCE_AVG, CV_REDUCE_MAX, CV_REDUCE_MIN)
+#define IMPLEMENT_PARAM_CLASS(name, type) \
-#define ALL_REDUCE_CODES testing::Values(ReduceCode(CV_REDUCE_SUM), ReduceCode(CV_REDUCE_AVG), ReduceCode(CV_REDUCE_MAX), ReduceCode(CV_REDUCE_MIN))
+    class name \
    { \
    public: \
        name ( type arg = type ()) : val_(arg) {} \
        operator type () const {return val_;} \
    private: \
        type val_; \
    }; \
    inline void PrintTo( name param, std::ostream* os) \
    { \
        *os << #name <<  "(" << static_cast< type >(param) << ")"; \
    }
-CV_FLAGS(GemmFlags, 0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_3_T);
+IMPLEMENT_PARAM_CLASS(Channels, int)
 #define ALL_GEMM_FLAGS testing::Values(GemmFlags(0), GemmFlags(cv::GEMM_1_T), GemmFlags(cv::GEMM_2_T), GemmFlags(cv::GEMM_3_T), GemmFlags(cv::GEMM_1_T | cv::GEMM_2_T), GemmFlags(cv::GEMM_1_T | cv::GEMM_3_T), GemmFlags(cv::GEMM_1_T | cv::GEMM_2_T | cv::GEMM_3_T))
-CV_ENUM(DistType, cv::gpu::BruteForceMatcher_GPU_base::L1Dist, cv::gpu::BruteForceMatcher_GPU_base::L2Dist)
+#define ALL_CHANNELS testing::Values(Channels(1), Channels(2), Channels(3), Channels(4))
 #define IMAGE_CHANNELS testing::Values(Channels(1), Channels(3), Channels(4))
-CV_ENUM(MorphOp, cv::MORPH_OPEN, cv::MORPH_CLOSE, cv::MORPH_GRADIENT, cv::MORPH_TOPHAT, cv::MORPH_BLACKHAT)
+// Flags and enums
-CV_ENUM(ThreshOp, cv::THRESH_BINARY, cv::THRESH_BINARY_INV, cv::THRESH_TRUNC, cv::THRESH_TOZERO, cv::THRESH_TOZERO_INV)
+CV_ENUM(NormCode, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2, cv::NORM_TYPE_MASK, cv::NORM_RELATIVE, cv::NORM_MINMAX)
 #define ALL_THRESH_OPS testing::Values(ThreshOp(cv::THRESH_BINARY), ThreshOp(cv::THRESH_BINARY_INV), ThreshOp(cv::THRESH_TRUNC), ThreshOp(cv::THRESH_TOZERO), ThreshOp(cv::THRESH_TOZERO_INV))
 CV_ENUM(Interpolation, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC)
-CV_ENUM(Border, cv::BORDER_REFLECT101, cv::BORDER_REPLICATE, cv::BORDER_CONSTANT, cv::BORDER_REFLECT, cv::BORDER_WRAP)
+CV_ENUM(BorderType, cv::BORDER_REFLECT101, cv::BORDER_REPLICATE, cv::BORDER_CONSTANT, cv::BORDER_REFLECT, cv::BORDER_WRAP)
 #define ALL_BORDER_TYPES testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT), BorderType(cv::BORDER_WRAP))
 CV_FLAGS(WarpFlags, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC, cv::WARP_INVERSE_MAP)
-CV_ENUM(TemplateMethod, cv::TM_SQDIFF, cv::TM_SQDIFF_NORMED, cv::TM_CCORR, cv::TM_CCORR_NORMED, cv::TM_CCOEFF, cv::TM_CCOEFF_NORMED)
+//////////////////////////////////////////////////////////////////////
-
+// Other
 CV_FLAGS(DftFlags, cv::DFT_INVERSE, cv::DFT_SCALE, cv::DFT_ROWS, cv::DFT_COMPLEX_OUTPUT, cv::DFT_REAL_OUTPUT)
 #define PARAM_TEST_CASE(name, ...) struct name : testing::TestWithParam< std::tr1::tuple< __VA_ARGS__ > >
 #define GET_PARAM(k) std::tr1::get< k >(GetParam())
 #define ALL_DEVICES testing::ValuesIn(devices())
 #define DEVICES(feature) testing::ValuesIn(devices(feature))
 #define DIFFERENT_SIZES testing::Values(cv::Size(128, 128), cv::Size(113, 113))
 #define ALL_DEPTH testing::Values(MatDepth(CV_8U), MatDepth(CV_8S), MatDepth(CV_16U), MatDepth(CV_16S), MatDepth(CV_32S), MatDepth(CV_32F), MatDepth(CV_64F))
 #define ALL_TYPES testing::ValuesIn(all_types())
 #define TYPES(depth_start, depth_end, cn_start, cn_end) testing::ValuesIn(types(depth_start, depth_end, cn_start, cn_end))
-#define DEPTH_PAIRS testing::Values(std::make_pair(MatDepth(CV_8U), MatDepth(CV_8U)),   \
+void showDiff(cv::InputArray gold, cv::InputArray actual, double eps);
                                    std::make_pair(MatDepth(CV_8U), MatDepth(CV_16U)),  \
                                    std::make_pair(MatDepth(CV_8U), MatDepth(CV_16S)),  \
                                    std::make_pair(MatDepth(CV_8U), MatDepth(CV_32S)),  \
                                    std::make_pair(MatDepth(CV_8U), MatDepth(CV_32F)),  \
                                    std::make_pair(MatDepth(CV_8U), MatDepth(CV_64F)),  \
                                                                                        \
                                    std::make_pair(MatDepth(CV_16U), MatDepth(CV_16U)), \
                                    std::make_pair(MatDepth(CV_16U), MatDepth(CV_32S)), \
                                    std::make_pair(MatDepth(CV_16U), MatDepth(CV_32F)), \
                                    std::make_pair(MatDepth(CV_16U), MatDepth(CV_64F)), \
                                                                                        \
                                    std::make_pair(MatDepth(CV_16S), MatDepth(CV_16S)), \
                                    std::make_pair(MatDepth(CV_16S), MatDepth(CV_32S)), \
                                    std::make_pair(MatDepth(CV_16S), MatDepth(CV_32F)), \
                                    std::make_pair(MatDepth(CV_16S), MatDepth(CV_64F)), \
                                                                                        \
                                    std::make_pair(MatDepth(CV_32S), MatDepth(CV_32S)), \
                                    std::make_pair(MatDepth(CV_32S), MatDepth(CV_32F)), \
                                    std::make_pair(MatDepth(CV_32S), MatDepth(CV_64F)), \
                                                                                        \
                                    std::make_pair(MatDepth(CV_32F), MatDepth(CV_32F)), \
                                    std::make_pair(MatDepth(CV_32F), MatDepth(CV_64F)), \
                                                                                        \
                                    std::make_pair(MatDepth(CV_64F), MatDepth(CV_64F)))
 #endif // __OPENCV_TEST_UTILITY_HPP__