gpufilters module for image filtering

12 years ago · 84de6ce036
parent 31c8b527c6
commit 84de6ce036
56 changed files with 995 additions and 379 deletions
--- a/modules/gpu/CMakeLists.txt
+++ b/modules/gpu/CMakeLists.txt
@ -3,7 +3,7 @@ if(ANDROID OR IOS)
 endif()

 set(the_description "GPU-accelerated Computer Vision")
-ocv_add_module(gpu opencv_imgproc opencv_calib3d opencv_objdetect opencv_video opencv_photo opencv_legacy opencv_gpuarithm)
+ocv_add_module(gpu opencv_imgproc opencv_calib3d opencv_objdetect opencv_video opencv_photo opencv_legacy opencv_gpuarithm opencv_gpufilters)

 ocv_module_include_directories("${CMAKE_CURRENT_SOURCE_DIR}/src/cuda")

--- a/modules/gpu/doc/gpu.rst
+++ b/modules/gpu/doc/gpu.rst
@ -11,6 +11,5 @@ gpu. GPU-accelerated Computer Vision
    image_processing
    object_detection
    feature_detection_and_description
-    image_filtering
    camera_calibration_and_3d_reconstruction
    video
--- a/modules/gpu/include/opencv2/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu.hpp
@ -51,225 +51,12 @@

 #include "opencv2/core/gpumat.hpp"
 #include "opencv2/gpuarithm.hpp"
+#include "opencv2/gpufilters.hpp"
 #include "opencv2/imgproc.hpp"
 #include "opencv2/objdetect.hpp"
 #include "opencv2/features2d.hpp"

 namespace cv { namespace gpu {
-//////////////////////////////// Filter Engine ////////////////////////////////
-
-/*!
-The Base Class for 1D or Row-wise Filters
-
-This is the base class for linear or non-linear filters that process 1D data.
-In particular, such filters are used for the "horizontal" filtering parts in separable filters.
-*/
-class CV_EXPORTS BaseRowFilter_GPU
-{
-public:
-    BaseRowFilter_GPU(int ksize_, int anchor_) : ksize(ksize_), anchor(anchor_) {}
-    virtual ~BaseRowFilter_GPU() {}
-    virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()) = 0;
-    int ksize, anchor;
-};
-
-/*!
-The Base Class for Column-wise Filters
-
-This is the base class for linear or non-linear filters that process columns of 2D arrays.
-Such filters are used for the "vertical" filtering parts in separable filters.
-*/
-class CV_EXPORTS BaseColumnFilter_GPU
-{
-public:
-    BaseColumnFilter_GPU(int ksize_, int anchor_) : ksize(ksize_), anchor(anchor_) {}
-    virtual ~BaseColumnFilter_GPU() {}
-    virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()) = 0;
-    int ksize, anchor;
-};
-
-/*!
-The Base Class for Non-Separable 2D Filters.
-
-This is the base class for linear or non-linear 2D filters.
-*/
-class CV_EXPORTS BaseFilter_GPU
-{
-public:
-    BaseFilter_GPU(const Size& ksize_, const Point& anchor_) : ksize(ksize_), anchor(anchor_) {}
-    virtual ~BaseFilter_GPU() {}
-    virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()) = 0;
-    Size ksize;
-    Point anchor;
-};
-
-/*!
-The Base Class for Filter Engine.
-
-The class can be used to apply an arbitrary filtering operation to an image.
-It contains all the necessary intermediate buffers.
-*/
-class CV_EXPORTS FilterEngine_GPU
-{
-public:
-    virtual ~FilterEngine_GPU() {}
-
-    virtual void apply(const GpuMat& src, GpuMat& dst, Rect roi = Rect(0,0,-1,-1), Stream& stream = Stream::Null()) = 0;
-};
-
-//! returns the non-separable filter engine with the specified filter
-CV_EXPORTS Ptr<FilterEngine_GPU> createFilter2D_GPU(const Ptr<BaseFilter_GPU>& filter2D, int srcType, int dstType);
-
-//! returns the separable filter engine with the specified filters
-CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>& rowFilter,
-    const Ptr<BaseColumnFilter_GPU>& columnFilter, int srcType, int bufType, int dstType);
-CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>& rowFilter,
-    const Ptr<BaseColumnFilter_GPU>& columnFilter, int srcType, int bufType, int dstType, GpuMat& buf);
-
-//! returns horizontal 1D box filter
-//! supports only CV_8UC1 source type and CV_32FC1 sum type
-CV_EXPORTS Ptr<BaseRowFilter_GPU> getRowSumFilter_GPU(int srcType, int sumType, int ksize, int anchor = -1);
-
-//! returns vertical 1D box filter
-//! supports only CV_8UC1 sum type and CV_32FC1 dst type
-CV_EXPORTS Ptr<BaseColumnFilter_GPU> getColumnSumFilter_GPU(int sumType, int dstType, int ksize, int anchor = -1);
-
-//! returns 2D box filter
-//! supports CV_8UC1 and CV_8UC4 source type, dst type must be the same as source type
-CV_EXPORTS Ptr<BaseFilter_GPU> getBoxFilter_GPU(int srcType, int dstType, const Size& ksize, Point anchor = Point(-1, -1));
-
-//! returns box filter engine
-CV_EXPORTS Ptr<FilterEngine_GPU> createBoxFilter_GPU(int srcType, int dstType, const Size& ksize,
-    const Point& anchor = Point(-1,-1));
-
-//! returns 2D morphological filter
-//! only MORPH_ERODE and MORPH_DILATE are supported
-//! supports CV_8UC1 and CV_8UC4 types
-//! kernel must have CV_8UC1 type, one rows and cols == ksize.width * ksize.height
-CV_EXPORTS Ptr<BaseFilter_GPU> getMorphologyFilter_GPU(int op, int type, const Mat& kernel, const Size& ksize,
-    Point anchor=Point(-1,-1));
-
-//! returns morphological filter engine. Only MORPH_ERODE and MORPH_DILATE are supported.
-CV_EXPORTS Ptr<FilterEngine_GPU> createMorphologyFilter_GPU(int op, int type, const Mat& kernel,
-    const Point& anchor = Point(-1,-1), int iterations = 1);
-CV_EXPORTS Ptr<FilterEngine_GPU> createMorphologyFilter_GPU(int op, int type, const Mat& kernel, GpuMat& buf,
-    const Point& anchor = Point(-1,-1), int iterations = 1);
-
-//! returns 2D filter with the specified kernel
-//! supports CV_8U, CV_16U and CV_32F one and four channel image
-CV_EXPORTS Ptr<BaseFilter_GPU> getLinearFilter_GPU(int srcType, int dstType, const Mat& kernel, Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
-
-//! returns the non-separable linear filter engine
-CV_EXPORTS Ptr<FilterEngine_GPU> createLinearFilter_GPU(int srcType, int dstType, const Mat& kernel,
-    Point anchor = Point(-1,-1), int borderType = BORDER_DEFAULT);
-
-//! returns the primitive row filter with the specified kernel.
-//! supports only CV_8UC1, CV_8UC4, CV_16SC1, CV_16SC2, CV_32SC1, CV_32FC1 source type.
-//! there are two version of algorithm: NPP and OpenCV.
-//! NPP calls when srcType == CV_8UC1 or srcType == CV_8UC4 and bufType == srcType,
-//! otherwise calls OpenCV version.
-//! NPP supports only BORDER_CONSTANT border type.
-//! OpenCV version supports only CV_32F as buffer depth and
-//! BORDER_REFLECT101, BORDER_REPLICATE and BORDER_CONSTANT border types.
-CV_EXPORTS Ptr<BaseRowFilter_GPU> getLinearRowFilter_GPU(int srcType, int bufType, const Mat& rowKernel,
-    int anchor = -1, int borderType = BORDER_DEFAULT);
-
-//! returns the primitive column filter with the specified kernel.
-//! supports only CV_8UC1, CV_8UC4, CV_16SC1, CV_16SC2, CV_32SC1, CV_32FC1 dst type.
-//! there are two version of algorithm: NPP and OpenCV.
-//! NPP calls when dstType == CV_8UC1 or dstType == CV_8UC4 and bufType == dstType,
-//! otherwise calls OpenCV version.
-//! NPP supports only BORDER_CONSTANT border type.
-//! OpenCV version supports only CV_32F as buffer depth and
-//! BORDER_REFLECT101, BORDER_REPLICATE and BORDER_CONSTANT border types.
-CV_EXPORTS Ptr<BaseColumnFilter_GPU> getLinearColumnFilter_GPU(int bufType, int dstType, const Mat& columnKernel,
-    int anchor = -1, int borderType = BORDER_DEFAULT);
-
-//! returns the separable linear filter engine
-CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat& rowKernel,
-    const Mat& columnKernel, const Point& anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT,
-    int columnBorderType = -1);
-CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat& rowKernel,
-    const Mat& columnKernel, GpuMat& buf, const Point& anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT,
-    int columnBorderType = -1);
-
-//! returns filter engine for the generalized Sobel operator
-CV_EXPORTS Ptr<FilterEngine_GPU> createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize,
-                                                       int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
-CV_EXPORTS Ptr<FilterEngine_GPU> createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize, GpuMat& buf,
-                                                       int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
-
-//! returns the Gaussian filter engine
-CV_EXPORTS Ptr<FilterEngine_GPU> createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0,
-                                                          int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
-CV_EXPORTS Ptr<FilterEngine_GPU> createGaussianFilter_GPU(int type, Size ksize, GpuMat& buf, double sigma1, double sigma2 = 0,
-                                                          int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
-
-//! returns maximum filter
-CV_EXPORTS Ptr<BaseFilter_GPU> getMaxFilter_GPU(int srcType, int dstType, const Size& ksize, Point anchor = Point(-1,-1));
-
-//! returns minimum filter
-CV_EXPORTS Ptr<BaseFilter_GPU> getMinFilter_GPU(int srcType, int dstType, const Size& ksize, Point anchor = Point(-1,-1));
-
-//! smooths the image using the normalized box filter
-//! supports CV_8UC1, CV_8UC4 types
-CV_EXPORTS void boxFilter(const GpuMat& src, GpuMat& dst, int ddepth, Size ksize, Point anchor = Point(-1,-1), Stream& stream = Stream::Null());
-
-//! a synonym for normalized box filter
-static inline void blur(const GpuMat& src, GpuMat& dst, Size ksize, Point anchor = Point(-1,-1), Stream& stream = Stream::Null())
-{
-    boxFilter(src, dst, -1, ksize, anchor, stream);
-}
-
-//! erodes the image (applies the local minimum operator)
-CV_EXPORTS void erode(const GpuMat& src, GpuMat& dst, const Mat& kernel, Point anchor = Point(-1, -1), int iterations = 1);
-CV_EXPORTS void erode(const GpuMat& src, GpuMat& dst, const Mat& kernel, GpuMat& buf,
-                      Point anchor = Point(-1, -1), int iterations = 1,
-                      Stream& stream = Stream::Null());
-
-//! dilates the image (applies the local maximum operator)
-CV_EXPORTS void dilate(const GpuMat& src, GpuMat& dst, const Mat& kernel, Point anchor = Point(-1, -1), int iterations = 1);
-CV_EXPORTS void dilate(const GpuMat& src, GpuMat& dst, const Mat& kernel, GpuMat& buf,
-                       Point anchor = Point(-1, -1), int iterations = 1,
-                       Stream& stream = Stream::Null());
-
-//! applies an advanced morphological operation to the image
-CV_EXPORTS void morphologyEx(const GpuMat& src, GpuMat& dst, int op, const Mat& kernel, Point anchor = Point(-1, -1), int iterations = 1);
-CV_EXPORTS void morphologyEx(const GpuMat& src, GpuMat& dst, int op, const Mat& kernel, GpuMat& buf1, GpuMat& buf2,
-                             Point anchor = Point(-1, -1), int iterations = 1, Stream& stream = Stream::Null());
-
-//! applies non-separable 2D linear filter to the image
-CV_EXPORTS void filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernel, Point anchor=Point(-1,-1), int borderType = BORDER_DEFAULT, Stream& stream = Stream::Null());
-
-//! applies separable 2D linear filter to the image
-CV_EXPORTS void sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY,
-                            Point anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
-CV_EXPORTS void sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY, GpuMat& buf,
-                            Point anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1,
-                            Stream& stream = Stream::Null());
-
-//! applies generalized Sobel operator to the image
-CV_EXPORTS void Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, int ksize = 3, double scale = 1,
-                      int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
-CV_EXPORTS void Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, GpuMat& buf, int ksize = 3, double scale = 1,
-                      int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, Stream& stream = Stream::Null());
-
-//! applies the vertical or horizontal Scharr operator to the image
-CV_EXPORTS void Scharr(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, double scale = 1,
-                       int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
-CV_EXPORTS void Scharr(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, GpuMat& buf, double scale = 1,
-                       int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, Stream& stream = Stream::Null());
-
-//! smooths the image using Gaussian filter.
-CV_EXPORTS void GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, double sigma1, double sigma2 = 0,
-                             int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
-CV_EXPORTS void GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, GpuMat& buf, double sigma1, double sigma2 = 0,
-                             int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, Stream& stream = Stream::Null());
-
-//! applies Laplacian operator to the image
-//! supports only ksize = 1 and ksize = 3
-CV_EXPORTS void Laplacian(const GpuMat& src, GpuMat& dst, int ddepth, int ksize = 1, double scale = 1, int borderType = BORDER_DEFAULT, Stream& stream = Stream::Null());
-
 ////////////////////////////// Image processing //////////////////////////////


--- a/modules/gpu/src/cuda/imgproc.cu
+++ b/modules/gpu/src/cuda/imgproc.cu
@ -895,112 +895,6 @@ namespace cv { namespace gpu { namespace cudev
            if (stream == 0)
                cudaSafeCall(cudaDeviceSynchronize());
        }
-
-        //////////////////////////////////////////////////////////////////////////
-        // filter2D
-
-        #define FILTER2D_MAX_KERNEL_SIZE 16
-
-        __constant__ float c_filter2DKernel[FILTER2D_MAX_KERNEL_SIZE * FILTER2D_MAX_KERNEL_SIZE];
-
-        template <class SrcT, typename D>
-        __global__ void filter2D(const SrcT src, PtrStepSz<D> dst, const int kWidth, const int kHeight, const int anchorX, const int anchorY)
-        {
-            typedef typename TypeVec<float, VecTraits<D>::cn>::vec_type sum_t;
-
-            const int x = blockIdx.x * blockDim.x + threadIdx.x;
-            const int y = blockIdx.y * blockDim.y + threadIdx.y;
-
-            if (x >= dst.cols || y >= dst.rows)
-                return;
-
-            sum_t res = VecTraits<sum_t>::all(0);
-            int kInd = 0;
-
-            for (int i = 0; i < kHeight; ++i)
-            {
-                for (int j = 0; j < kWidth; ++j)
-                    res = res + src(y - anchorY + i, x - anchorX + j) * c_filter2DKernel[kInd++];
-            }
-
-            dst(y, x) = saturate_cast<D>(res);
-        }
-
-        template <typename T, typename D, template <typename> class Brd> struct Filter2DCaller;
-
-        #define IMPLEMENT_FILTER2D_TEX_READER(type) \
-            texture< type , cudaTextureType2D, cudaReadModeElementType> tex_filter2D_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \
-            struct tex_filter2D_ ## type ## _reader \
-            { \
-                typedef type elem_type; \
-                typedef int index_type; \
-                const int xoff; \
-                const int yoff; \
-                tex_filter2D_ ## type ## _reader (int xoff_, int yoff_) : xoff(xoff_), yoff(yoff_) {} \
-                __device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \
-                { \
-                    return tex2D(tex_filter2D_ ## type , x + xoff, y + yoff); \
-                } \
-            }; \
-            template <typename D, template <typename> class Brd> struct Filter2DCaller< type , D, Brd> \
-            { \
-                static void call(const PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSz<D> dst, \
-                    int kWidth, int kHeight, int anchorX, int anchorY, const float* borderValue, cudaStream_t stream) \
-                { \
-                    typedef typename TypeVec<float, VecTraits< type >::cn>::vec_type work_type; \
-                    dim3 block(16, 16); \
-                    dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
-                    bindTexture(&tex_filter2D_ ## type , srcWhole); \
-                    tex_filter2D_ ## type ##_reader texSrc(xoff, yoff); \
-                    Brd<work_type> brd(dst.rows, dst.cols, VecTraits<work_type>::make(borderValue)); \
-                    BorderReader< tex_filter2D_ ## type ##_reader, Brd<work_type> > brdSrc(texSrc, brd); \
-                    filter2D<<<grid, block, 0, stream>>>(brdSrc, dst, kWidth, kHeight, anchorX, anchorY); \
-                    cudaSafeCall( cudaGetLastError() ); \
-                    if (stream == 0) \
-                        cudaSafeCall( cudaDeviceSynchronize() ); \
-                } \
-            };
-
-        IMPLEMENT_FILTER2D_TEX_READER(uchar);
-        IMPLEMENT_FILTER2D_TEX_READER(uchar4);
-
-        IMPLEMENT_FILTER2D_TEX_READER(ushort);
-        IMPLEMENT_FILTER2D_TEX_READER(ushort4);
-
-        IMPLEMENT_FILTER2D_TEX_READER(float);
-        IMPLEMENT_FILTER2D_TEX_READER(float4);
-
-        #undef IMPLEMENT_FILTER2D_TEX_READER
-
-        template <typename T, typename D>
-        void filter2D_gpu(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst,
-                          int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel,
-                          int borderMode, const float* borderValue, cudaStream_t stream)
-        {
-            typedef void (*func_t)(const PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<D> dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* borderValue, cudaStream_t stream);
-            static const func_t funcs[] =
-            {
-                Filter2DCaller<T, D, BrdReflect101>::call,
-                Filter2DCaller<T, D, BrdReplicate>::call,
-                Filter2DCaller<T, D, BrdConstant>::call,
-                Filter2DCaller<T, D, BrdReflect>::call,
-                Filter2DCaller<T, D, BrdWrap>::call
-            };
-
-            if (stream == 0)
-                cudaSafeCall( cudaMemcpyToSymbol(c_filter2DKernel, kernel, kWidth * kHeight * sizeof(float), 0, cudaMemcpyDeviceToDevice) );
-            else
-                cudaSafeCall( cudaMemcpyToSymbolAsync(c_filter2DKernel, kernel, kWidth * kHeight * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) );
-
-            funcs[borderMode](static_cast< PtrStepSz<T> >(srcWhole), ofsX, ofsY, static_cast< PtrStepSz<D> >(dst), kWidth, kHeight, anchorX, anchorY, borderValue, stream);
-        }
-
-        template void filter2D_gpu<uchar, uchar>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
-        template void filter2D_gpu<uchar4, uchar4>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
-        template void filter2D_gpu<ushort, ushort>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
-        template void filter2D_gpu<ushort4, ushort4>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
-        template void filter2D_gpu<float, float>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
-        template void filter2D_gpu<float4, float4>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
    } // namespace imgproc
 }}} // namespace cv { namespace gpu { namespace cudev {

--- a/modules/gpufilters/CMakeLists.txt
+++ b/modules/gpufilters/CMakeLists.txt
@ -0,0 +1,9 @@
+if(ANDROID OR IOS)
+  ocv_module_disable(gpufilters)
+endif()
+
+set(the_description "GPU-accelerated Image Filtering")
+
+ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4324 /wd4512 -Wundef -Wmissing-declarations)
+
+ocv_define_module(gpufilters opencv_imgproc OPTIONAL opencv_gpuarithm)
--- a/modules/gpufilters/doc/filtering.rst
+++ b/modules/gpufilters/doc/filtering.rst
--- a/modules/gpufilters/doc/gpufilters.rst
+++ b/modules/gpufilters/doc/gpufilters.rst
@ -0,0 +1,8 @@
+*******************************************
+gpufilters. GPU-accelerated Image Filtering
+*******************************************
+
+.. toctree::
+    :maxdepth: 1
+
+    filtering
--- a/modules/gpufilters/include/opencv2/gpufilters.hpp
+++ b/modules/gpufilters/include/opencv2/gpufilters.hpp
@ -0,0 +1,269 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_GPUFILTERS_HPP__
+#define __OPENCV_GPUFILTERS_HPP__
+
+#ifndef __cplusplus
+#  error gpufilters.hpp header must be compiled as C++
+#endif
+
+#include "opencv2/core/gpumat.hpp"
+#include "opencv2/core/base.hpp"
+
+namespace cv { namespace gpu {
+
+/*!
+The Base Class for 1D or Row-wise Filters
+
+This is the base class for linear or non-linear filters that process 1D data.
+In particular, such filters are used for the "horizontal" filtering parts in separable filters.
+*/
+class CV_EXPORTS BaseRowFilter_GPU
+{
+public:
+    BaseRowFilter_GPU(int ksize_, int anchor_) : ksize(ksize_), anchor(anchor_) {}
+    virtual ~BaseRowFilter_GPU() {}
+    virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()) = 0;
+    int ksize, anchor;
+};
+
+/*!
+The Base Class for Column-wise Filters
+
+This is the base class for linear or non-linear filters that process columns of 2D arrays.
+Such filters are used for the "vertical" filtering parts in separable filters.
+*/
+class CV_EXPORTS BaseColumnFilter_GPU
+{
+public:
+    BaseColumnFilter_GPU(int ksize_, int anchor_) : ksize(ksize_), anchor(anchor_) {}
+    virtual ~BaseColumnFilter_GPU() {}
+    virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()) = 0;
+    int ksize, anchor;
+};
+
+/*!
+The Base Class for Non-Separable 2D Filters.
+
+This is the base class for linear or non-linear 2D filters.
+*/
+class CV_EXPORTS BaseFilter_GPU
+{
+public:
+    BaseFilter_GPU(const Size& ksize_, const Point& anchor_) : ksize(ksize_), anchor(anchor_) {}
+    virtual ~BaseFilter_GPU() {}
+    virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()) = 0;
+    Size ksize;
+    Point anchor;
+};
+
+/*!
+The Base Class for Filter Engine.
+
+The class can be used to apply an arbitrary filtering operation to an image.
+It contains all the necessary intermediate buffers.
+*/
+class CV_EXPORTS FilterEngine_GPU
+{
+public:
+    virtual ~FilterEngine_GPU() {}
+
+    virtual void apply(const GpuMat& src, GpuMat& dst, Rect roi = Rect(0,0,-1,-1), Stream& stream = Stream::Null()) = 0;
+};
+
+//! returns the non-separable filter engine with the specified filter
+CV_EXPORTS Ptr<FilterEngine_GPU> createFilter2D_GPU(const Ptr<BaseFilter_GPU>& filter2D, int srcType, int dstType);
+
+//! returns the separable filter engine with the specified filters
+CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>& rowFilter,
+    const Ptr<BaseColumnFilter_GPU>& columnFilter, int srcType, int bufType, int dstType);
+CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>& rowFilter,
+    const Ptr<BaseColumnFilter_GPU>& columnFilter, int srcType, int bufType, int dstType, GpuMat& buf);
+
+//! returns horizontal 1D box filter
+//! supports only CV_8UC1 source type and CV_32FC1 sum type
+CV_EXPORTS Ptr<BaseRowFilter_GPU> getRowSumFilter_GPU(int srcType, int sumType, int ksize, int anchor = -1);
+
+//! returns vertical 1D box filter
+//! supports only CV_8UC1 sum type and CV_32FC1 dst type
+CV_EXPORTS Ptr<BaseColumnFilter_GPU> getColumnSumFilter_GPU(int sumType, int dstType, int ksize, int anchor = -1);
+
+//! returns 2D box filter
+//! supports CV_8UC1 and CV_8UC4 source type, dst type must be the same as source type
+CV_EXPORTS Ptr<BaseFilter_GPU> getBoxFilter_GPU(int srcType, int dstType, const Size& ksize, Point anchor = Point(-1, -1));
+
+//! returns box filter engine
+CV_EXPORTS Ptr<FilterEngine_GPU> createBoxFilter_GPU(int srcType, int dstType, const Size& ksize,
+    const Point& anchor = Point(-1,-1));
+
+//! returns 2D morphological filter
+//! only MORPH_ERODE and MORPH_DILATE are supported
+//! supports CV_8UC1 and CV_8UC4 types
+//! kernel must have CV_8UC1 type, one rows and cols == ksize.width * ksize.height
+CV_EXPORTS Ptr<BaseFilter_GPU> getMorphologyFilter_GPU(int op, int type, const Mat& kernel, const Size& ksize,
+    Point anchor=Point(-1,-1));
+
+//! returns morphological filter engine. Only MORPH_ERODE and MORPH_DILATE are supported.
+CV_EXPORTS Ptr<FilterEngine_GPU> createMorphologyFilter_GPU(int op, int type, const Mat& kernel,
+    const Point& anchor = Point(-1,-1), int iterations = 1);
+CV_EXPORTS Ptr<FilterEngine_GPU> createMorphologyFilter_GPU(int op, int type, const Mat& kernel, GpuMat& buf,
+    const Point& anchor = Point(-1,-1), int iterations = 1);
+
+//! returns 2D filter with the specified kernel
+//! supports CV_8U, CV_16U and CV_32F one and four channel image
+CV_EXPORTS Ptr<BaseFilter_GPU> getLinearFilter_GPU(int srcType, int dstType, const Mat& kernel, Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
+
+//! returns the non-separable linear filter engine
+CV_EXPORTS Ptr<FilterEngine_GPU> createLinearFilter_GPU(int srcType, int dstType, const Mat& kernel,
+    Point anchor = Point(-1,-1), int borderType = BORDER_DEFAULT);
+
+//! returns the primitive row filter with the specified kernel.
+//! supports only CV_8UC1, CV_8UC4, CV_16SC1, CV_16SC2, CV_32SC1, CV_32FC1 source type.
+//! there are two version of algorithm: NPP and OpenCV.
+//! NPP calls when srcType == CV_8UC1 or srcType == CV_8UC4 and bufType == srcType,
+//! otherwise calls OpenCV version.
+//! NPP supports only BORDER_CONSTANT border type.
+//! OpenCV version supports only CV_32F as buffer depth and
+//! BORDER_REFLECT101, BORDER_REPLICATE and BORDER_CONSTANT border types.
+CV_EXPORTS Ptr<BaseRowFilter_GPU> getLinearRowFilter_GPU(int srcType, int bufType, const Mat& rowKernel,
+    int anchor = -1, int borderType = BORDER_DEFAULT);
+
+//! returns the primitive column filter with the specified kernel.
+//! supports only CV_8UC1, CV_8UC4, CV_16SC1, CV_16SC2, CV_32SC1, CV_32FC1 dst type.
+//! there are two version of algorithm: NPP and OpenCV.
+//! NPP calls when dstType == CV_8UC1 or dstType == CV_8UC4 and bufType == dstType,
+//! otherwise calls OpenCV version.
+//! NPP supports only BORDER_CONSTANT border type.
+//! OpenCV version supports only CV_32F as buffer depth and
+//! BORDER_REFLECT101, BORDER_REPLICATE and BORDER_CONSTANT border types.
+CV_EXPORTS Ptr<BaseColumnFilter_GPU> getLinearColumnFilter_GPU(int bufType, int dstType, const Mat& columnKernel,
+    int anchor = -1, int borderType = BORDER_DEFAULT);
+
+//! returns the separable linear filter engine
+CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat& rowKernel,
+    const Mat& columnKernel, const Point& anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT,
+    int columnBorderType = -1);
+CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat& rowKernel,
+    const Mat& columnKernel, GpuMat& buf, const Point& anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT,
+    int columnBorderType = -1);
+
+//! returns filter engine for the generalized Sobel operator
+CV_EXPORTS Ptr<FilterEngine_GPU> createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize,
+                                                       int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
+CV_EXPORTS Ptr<FilterEngine_GPU> createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize, GpuMat& buf,
+                                                       int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
+
+//! returns the Gaussian filter engine
+CV_EXPORTS Ptr<FilterEngine_GPU> createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0,
+                                                          int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
+CV_EXPORTS Ptr<FilterEngine_GPU> createGaussianFilter_GPU(int type, Size ksize, GpuMat& buf, double sigma1, double sigma2 = 0,
+                                                          int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
+
+//! returns maximum filter
+CV_EXPORTS Ptr<BaseFilter_GPU> getMaxFilter_GPU(int srcType, int dstType, const Size& ksize, Point anchor = Point(-1,-1));
+
+//! returns minimum filter
+CV_EXPORTS Ptr<BaseFilter_GPU> getMinFilter_GPU(int srcType, int dstType, const Size& ksize, Point anchor = Point(-1,-1));
+
+//! smooths the image using the normalized box filter
+//! supports CV_8UC1, CV_8UC4 types
+CV_EXPORTS void boxFilter(const GpuMat& src, GpuMat& dst, int ddepth, Size ksize, Point anchor = Point(-1,-1), Stream& stream = Stream::Null());
+
+//! a synonym for normalized box filter
+static inline void blur(const GpuMat& src, GpuMat& dst, Size ksize, Point anchor = Point(-1,-1), Stream& stream = Stream::Null())
+{
+    boxFilter(src, dst, -1, ksize, anchor, stream);
+}
+
+//! erodes the image (applies the local minimum operator)
+CV_EXPORTS void erode(const GpuMat& src, GpuMat& dst, const Mat& kernel, Point anchor = Point(-1, -1), int iterations = 1);
+CV_EXPORTS void erode(const GpuMat& src, GpuMat& dst, const Mat& kernel, GpuMat& buf,
+                      Point anchor = Point(-1, -1), int iterations = 1,
+                      Stream& stream = Stream::Null());
+
+//! dilates the image (applies the local maximum operator)
+CV_EXPORTS void dilate(const GpuMat& src, GpuMat& dst, const Mat& kernel, Point anchor = Point(-1, -1), int iterations = 1);
+CV_EXPORTS void dilate(const GpuMat& src, GpuMat& dst, const Mat& kernel, GpuMat& buf,
+                       Point anchor = Point(-1, -1), int iterations = 1,
+                       Stream& stream = Stream::Null());
+
+//! applies an advanced morphological operation to the image
+CV_EXPORTS void morphologyEx(const GpuMat& src, GpuMat& dst, int op, const Mat& kernel, Point anchor = Point(-1, -1), int iterations = 1);
+CV_EXPORTS void morphologyEx(const GpuMat& src, GpuMat& dst, int op, const Mat& kernel, GpuMat& buf1, GpuMat& buf2,
+                             Point anchor = Point(-1, -1), int iterations = 1, Stream& stream = Stream::Null());
+
+//! applies non-separable 2D linear filter to the image
+CV_EXPORTS void filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernel, Point anchor=Point(-1,-1), int borderType = BORDER_DEFAULT, Stream& stream = Stream::Null());
+
+//! applies separable 2D linear filter to the image
+CV_EXPORTS void sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY,
+                            Point anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
+CV_EXPORTS void sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY, GpuMat& buf,
+                            Point anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1,
+                            Stream& stream = Stream::Null());
+
+//! applies generalized Sobel operator to the image
+CV_EXPORTS void Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, int ksize = 3, double scale = 1,
+                      int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
+CV_EXPORTS void Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, GpuMat& buf, int ksize = 3, double scale = 1,
+                      int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, Stream& stream = Stream::Null());
+
+//! applies the vertical or horizontal Scharr operator to the image
+CV_EXPORTS void Scharr(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, double scale = 1,
+                       int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
+CV_EXPORTS void Scharr(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, GpuMat& buf, double scale = 1,
+                       int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, Stream& stream = Stream::Null());
+
+//! smooths the image using Gaussian filter.
+CV_EXPORTS void GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, double sigma1, double sigma2 = 0,
+                             int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
+CV_EXPORTS void GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, GpuMat& buf, double sigma1, double sigma2 = 0,
+                             int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, Stream& stream = Stream::Null());
+
+//! applies Laplacian operator to the image
+//! supports only ksize = 1 and ksize = 3
+CV_EXPORTS void Laplacian(const GpuMat& src, GpuMat& dst, int ddepth, int ksize = 1, double scale = 1, int borderType = BORDER_DEFAULT, Stream& stream = Stream::Null());
+
+}} // namespace cv { namespace gpu {
+
+#endif /* __OPENCV_GPUFILTERS_HPP__ */
--- a/modules/gpufilters/perf/perf_filters.cpp
+++ b/modules/gpufilters/perf/perf_filters.cpp
@ -51,7 +51,7 @@ using namespace perf;

 DEF_PARAM_TEST(Sz_Type_KernelSz, cv::Size, MatType, int);

-PERF_TEST_P(Sz_Type_KernelSz, Filters_Blur,
+PERF_TEST_P(Sz_Type_KernelSz, Blur,
            Combine(GPU_TYPICAL_MAT_SIZES,
                    Values(CV_8UC1, CV_8UC4),
                    Values(3, 5, 7)))
@ -87,7 +87,7 @@ PERF_TEST_P(Sz_Type_KernelSz, Filters_Blur,
 //////////////////////////////////////////////////////////////////////
 // Sobel

-PERF_TEST_P(Sz_Type_KernelSz, Filters_Sobel, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1), Values(3, 5, 7, 9, 11, 13, 15)))
+PERF_TEST_P(Sz_Type_KernelSz, Sobel, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1), Values(3, 5, 7, 9, 11, 13, 15)))
 {
    declare.time(20.0);

@ -121,7 +121,7 @@ PERF_TEST_P(Sz_Type_KernelSz, Filters_Sobel, Combine(GPU_TYPICAL_MAT_SIZES, Valu
 //////////////////////////////////////////////////////////////////////
 // Scharr

-PERF_TEST_P(Sz_Type, Filters_Scharr, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1)))
+PERF_TEST_P(Sz_Type, Scharr, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1)))
 {
    declare.time(20.0);

@ -154,7 +154,7 @@ PERF_TEST_P(Sz_Type, Filters_Scharr, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U
 //////////////////////////////////////////////////////////////////////
 // GaussianBlur

-PERF_TEST_P(Sz_Type_KernelSz, Filters_GaussianBlur, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1), Values(3, 5, 7, 9, 11, 13, 15)))
+PERF_TEST_P(Sz_Type_KernelSz, GaussianBlur, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1), Values(3, 5, 7, 9, 11, 13, 15)))
 {
    declare.time(20.0);

@ -188,7 +188,7 @@ PERF_TEST_P(Sz_Type_KernelSz, Filters_GaussianBlur, Combine(GPU_TYPICAL_MAT_SIZE
 //////////////////////////////////////////////////////////////////////
 // Laplacian

-PERF_TEST_P(Sz_Type_KernelSz, Filters_Laplacian, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(1, 3)))
+PERF_TEST_P(Sz_Type_KernelSz, Laplacian, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(1, 3)))
 {
    declare.time(20.0);

@ -221,7 +221,7 @@ PERF_TEST_P(Sz_Type_KernelSz, Filters_Laplacian, Combine(GPU_TYPICAL_MAT_SIZES,
 //////////////////////////////////////////////////////////////////////
 // Erode

-PERF_TEST_P(Sz_Type, Filters_Erode, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4)))
+PERF_TEST_P(Sz_Type, Erode, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4)))
 {
    declare.time(20.0);

@ -256,7 +256,7 @@ PERF_TEST_P(Sz_Type, Filters_Erode, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC
 //////////////////////////////////////////////////////////////////////
 // Dilate

-PERF_TEST_P(Sz_Type, Filters_Dilate, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4)))
+PERF_TEST_P(Sz_Type, Dilate, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4)))
 {
    declare.time(20.0);

@ -295,7 +295,7 @@ CV_ENUM(MorphOp, MORPH_OPEN, MORPH_CLOSE, MORPH_GRADIENT, MORPH_TOPHAT, MORPH_BL

 DEF_PARAM_TEST(Sz_Type_Op, cv::Size, MatType, MorphOp);

-PERF_TEST_P(Sz_Type_Op, Filters_MorphologyEx, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4), MorphOp::all()))
+PERF_TEST_P(Sz_Type_Op, MorphologyEx, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4), MorphOp::all()))
 {
    declare.time(20.0);

@ -332,7 +332,7 @@ PERF_TEST_P(Sz_Type_Op, Filters_MorphologyEx, Combine(GPU_TYPICAL_MAT_SIZES, Val
 //////////////////////////////////////////////////////////////////////
 // Filter2D

-PERF_TEST_P(Sz_Type_KernelSz, Filters_Filter2D, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(3, 5, 7, 9, 11, 13, 15)))
+PERF_TEST_P(Sz_Type_KernelSz, Filter2D, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(3, 5, 7, 9, 11, 13, 15)))
 {
    declare.time(20.0);

--- a/modules/gpufilters/perf/perf_main.cpp
+++ b/modules/gpufilters/perf/perf_main.cpp
@ -0,0 +1,47 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "perf_precomp.hpp"
+
+using namespace perf;
+
+CV_PERF_TEST_MAIN(gpufilters, printCudaInfo())
--- a/modules/gpufilters/perf/perf_precomp.cpp
+++ b/modules/gpufilters/perf/perf_precomp.cpp
@ -0,0 +1,43 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "perf_precomp.hpp"
--- a/modules/gpufilters/perf/perf_precomp.hpp
+++ b/modules/gpufilters/perf/perf_precomp.hpp
@ -0,0 +1,64 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifdef __GNUC__
+#  pragma GCC diagnostic ignored "-Wmissing-declarations"
+#  if defined __clang__ || defined __APPLE__
+#    pragma GCC diagnostic ignored "-Wmissing-prototypes"
+#    pragma GCC diagnostic ignored "-Wextra"
+#  endif
+#endif
+
+#ifndef __OPENCV_PERF_PRECOMP_HPP__
+#define __OPENCV_PERF_PRECOMP_HPP__
+
+#include "opencv2/ts.hpp"
+#include "opencv2/ts/gpu_perf.hpp"
+
+#include "opencv2/gpufilters.hpp"
+#include "opencv2/imgproc.hpp"
+
+#ifdef GTEST_CREATE_SHARED_LIBRARY
+#error no modules except ts should have GTEST_CREATE_SHARED_LIBRARY defined
+#endif
+
+#endif
--- a/modules/gpufilters/src/cuda/column_filter.16sc1.cu
+++ b/modules/gpufilters/src/cuda/column_filter.16sc1.cu
@ -42,7 +42,7 @@

 #if !defined CUDA_DISABLER

-#include "column_filter.h"
+#include "column_filter.hpp"

 namespace filter
 {
--- a/modules/gpufilters/src/cuda/column_filter.16sc3.cu
+++ b/modules/gpufilters/src/cuda/column_filter.16sc3.cu
@ -42,7 +42,7 @@

 #if !defined CUDA_DISABLER

-#include "column_filter.h"
+#include "column_filter.hpp"

 namespace filter
 {
--- a/modules/gpufilters/src/cuda/column_filter.16sc4.cu
+++ b/modules/gpufilters/src/cuda/column_filter.16sc4.cu
@ -42,7 +42,7 @@

 #if !defined CUDA_DISABLER

-#include "column_filter.h"
+#include "column_filter.hpp"

 namespace filter
 {
--- a/modules/gpufilters/src/cuda/column_filter.16uc1.cu
+++ b/modules/gpufilters/src/cuda/column_filter.16uc1.cu
@ -42,7 +42,7 @@

 #if !defined CUDA_DISABLER

-#include "column_filter.h"
+#include "column_filter.hpp"

 namespace filter
 {
--- a/modules/gpufilters/src/cuda/column_filter.16uc3.cu
+++ b/modules/gpufilters/src/cuda/column_filter.16uc3.cu
@ -42,7 +42,7 @@

 #if !defined CUDA_DISABLER

-#include "column_filter.h"
+#include "column_filter.hpp"

 namespace filter
 {
--- a/modules/gpufilters/src/cuda/column_filter.16uc4.cu
+++ b/modules/gpufilters/src/cuda/column_filter.16uc4.cu
@ -42,7 +42,7 @@

 #if !defined CUDA_DISABLER

-#include "column_filter.h"
+#include "column_filter.hpp"

 namespace filter
 {
--- a/modules/gpufilters/src/cuda/column_filter.32fc1.cu
+++ b/modules/gpufilters/src/cuda/column_filter.32fc1.cu
@ -42,7 +42,7 @@

 #if !defined CUDA_DISABLER

-#include "column_filter.h"
+#include "column_filter.hpp"

 namespace filter
 {
--- a/modules/gpufilters/src/cuda/column_filter.32fc3.cu
+++ b/modules/gpufilters/src/cuda/column_filter.32fc3.cu
@ -42,7 +42,7 @@

 #if !defined CUDA_DISABLER

-#include "column_filter.h"
+#include "column_filter.hpp"

 namespace filter
 {
--- a/modules/gpufilters/src/cuda/column_filter.32fc4.cu
+++ b/modules/gpufilters/src/cuda/column_filter.32fc4.cu
@ -42,7 +42,7 @@

 #if !defined CUDA_DISABLER

-#include "column_filter.h"
+#include "column_filter.hpp"

 namespace filter
 {
--- a/modules/gpufilters/src/cuda/column_filter.32sc1.cu
+++ b/modules/gpufilters/src/cuda/column_filter.32sc1.cu
@ -42,7 +42,7 @@

 #if !defined CUDA_DISABLER

-#include "column_filter.h"
+#include "column_filter.hpp"

 namespace filter
 {
--- a/modules/gpufilters/src/cuda/column_filter.32sc3.cu
+++ b/modules/gpufilters/src/cuda/column_filter.32sc3.cu
@ -42,7 +42,7 @@

 #if !defined CUDA_DISABLER

-#include "column_filter.h"
+#include "column_filter.hpp"

 namespace filter
 {
--- a/modules/gpufilters/src/cuda/column_filter.32sc4.cu
+++ b/modules/gpufilters/src/cuda/column_filter.32sc4.cu
@ -42,7 +42,7 @@

 #if !defined CUDA_DISABLER

-#include "column_filter.h"
+#include "column_filter.hpp"

 namespace filter
 {
--- a/modules/gpufilters/src/cuda/column_filter.8uc1.cu
+++ b/modules/gpufilters/src/cuda/column_filter.8uc1.cu
@ -42,7 +42,7 @@

 #if !defined CUDA_DISABLER

-#include "column_filter.h"
+#include "column_filter.hpp"

 namespace filter
 {
--- a/modules/gpufilters/src/cuda/column_filter.8uc3.cu
+++ b/modules/gpufilters/src/cuda/column_filter.8uc3.cu
@ -42,7 +42,7 @@

 #if !defined CUDA_DISABLER

-#include "column_filter.h"
+#include "column_filter.hpp"

 namespace filter
 {
--- a/modules/gpufilters/src/cuda/column_filter.8uc4.cu
+++ b/modules/gpufilters/src/cuda/column_filter.8uc4.cu
@ -42,7 +42,7 @@

 #if !defined CUDA_DISABLER

-#include "column_filter.h"
+#include "column_filter.hpp"

 namespace filter
 {
--- a/modules/gpufilters/src/cuda/column_filter.hpp
+++ b/modules/gpufilters/src/cuda/column_filter.hpp
--- a/modules/gpufilters/src/cuda/filter2d.cu
+++ b/modules/gpufilters/src/cuda/filter2d.cu
@ -0,0 +1,158 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#if !defined CUDA_DISABLER
+
+#include "opencv2/core/cuda/common.hpp"
+#include "opencv2/core/cuda/saturate_cast.hpp"
+#include "opencv2/core/cuda/border_interpolate.hpp"
+
+namespace cv { namespace gpu { namespace cudev
+{
+    namespace imgproc
+    {
+        #define FILTER2D_MAX_KERNEL_SIZE 16
+
+        __constant__ float c_filter2DKernel[FILTER2D_MAX_KERNEL_SIZE * FILTER2D_MAX_KERNEL_SIZE];
+
+        template <class SrcT, typename D>
+        __global__ void filter2D(const SrcT src, PtrStepSz<D> dst, const int kWidth, const int kHeight, const int anchorX, const int anchorY)
+        {
+            typedef typename TypeVec<float, VecTraits<D>::cn>::vec_type sum_t;
+
+            const int x = blockIdx.x * blockDim.x + threadIdx.x;
+            const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+            if (x >= dst.cols || y >= dst.rows)
+                return;
+
+            sum_t res = VecTraits<sum_t>::all(0);
+            int kInd = 0;
+
+            for (int i = 0; i < kHeight; ++i)
+            {
+                for (int j = 0; j < kWidth; ++j)
+                    res = res + src(y - anchorY + i, x - anchorX + j) * c_filter2DKernel[kInd++];
+            }
+
+            dst(y, x) = saturate_cast<D>(res);
+        }
+
+        template <typename T, typename D, template <typename> class Brd> struct Filter2DCaller;
+
+        #define IMPLEMENT_FILTER2D_TEX_READER(type) \
+            texture< type , cudaTextureType2D, cudaReadModeElementType> tex_filter2D_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \
+            struct tex_filter2D_ ## type ## _reader \
+            { \
+                typedef type elem_type; \
+                typedef int index_type; \
+                const int xoff; \
+                const int yoff; \
+                tex_filter2D_ ## type ## _reader (int xoff_, int yoff_) : xoff(xoff_), yoff(yoff_) {} \
+                __device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \
+                { \
+                    return tex2D(tex_filter2D_ ## type , x + xoff, y + yoff); \
+                } \
+            }; \
+            template <typename D, template <typename> class Brd> struct Filter2DCaller< type , D, Brd> \
+            { \
+                static void call(const PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSz<D> dst, \
+                    int kWidth, int kHeight, int anchorX, int anchorY, const float* borderValue, cudaStream_t stream) \
+                { \
+                    typedef typename TypeVec<float, VecTraits< type >::cn>::vec_type work_type; \
+                    dim3 block(16, 16); \
+                    dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
+                    bindTexture(&tex_filter2D_ ## type , srcWhole); \
+                    tex_filter2D_ ## type ##_reader texSrc(xoff, yoff); \
+                    Brd<work_type> brd(dst.rows, dst.cols, VecTraits<work_type>::make(borderValue)); \
+                    BorderReader< tex_filter2D_ ## type ##_reader, Brd<work_type> > brdSrc(texSrc, brd); \
+                    filter2D<<<grid, block, 0, stream>>>(brdSrc, dst, kWidth, kHeight, anchorX, anchorY); \
+                    cudaSafeCall( cudaGetLastError() ); \
+                    if (stream == 0) \
+                        cudaSafeCall( cudaDeviceSynchronize() ); \
+                } \
+            };
+
+        IMPLEMENT_FILTER2D_TEX_READER(uchar);
+        IMPLEMENT_FILTER2D_TEX_READER(uchar4);
+
+        IMPLEMENT_FILTER2D_TEX_READER(ushort);
+        IMPLEMENT_FILTER2D_TEX_READER(ushort4);
+
+        IMPLEMENT_FILTER2D_TEX_READER(float);
+        IMPLEMENT_FILTER2D_TEX_READER(float4);
+
+        #undef IMPLEMENT_FILTER2D_TEX_READER
+
+        template <typename T, typename D>
+        void filter2D_gpu(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst,
+                          int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel,
+                          int borderMode, const float* borderValue, cudaStream_t stream)
+        {
+            typedef void (*func_t)(const PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<D> dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* borderValue, cudaStream_t stream);
+            static const func_t funcs[] =
+            {
+                Filter2DCaller<T, D, BrdReflect101>::call,
+                Filter2DCaller<T, D, BrdReplicate>::call,
+                Filter2DCaller<T, D, BrdConstant>::call,
+                Filter2DCaller<T, D, BrdReflect>::call,
+                Filter2DCaller<T, D, BrdWrap>::call
+            };
+
+            if (stream == 0)
+                cudaSafeCall( cudaMemcpyToSymbol(c_filter2DKernel, kernel, kWidth * kHeight * sizeof(float), 0, cudaMemcpyDeviceToDevice) );
+            else
+                cudaSafeCall( cudaMemcpyToSymbolAsync(c_filter2DKernel, kernel, kWidth * kHeight * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) );
+
+            funcs[borderMode](static_cast< PtrStepSz<T> >(srcWhole), ofsX, ofsY, static_cast< PtrStepSz<D> >(dst), kWidth, kHeight, anchorX, anchorY, borderValue, stream);
+        }
+
+        template void filter2D_gpu<uchar, uchar>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
+        template void filter2D_gpu<uchar4, uchar4>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
+        template void filter2D_gpu<ushort, ushort>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
+        template void filter2D_gpu<ushort4, ushort4>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
+        template void filter2D_gpu<float, float>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
+        template void filter2D_gpu<float4, float4>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
+    }
+}}}
+
+#endif // CUDA_DISABLER
--- a/modules/gpufilters/src/cuda/row_filter.16sc1.cu
+++ b/modules/gpufilters/src/cuda/row_filter.16sc1.cu
@ -42,7 +42,7 @@

 #if !defined CUDA_DISABLER

-#include "row_filter.h"
+#include "row_filter.hpp"

 namespace filter
 {
--- a/modules/gpufilters/src/cuda/row_filter.16sc3.cu
+++ b/modules/gpufilters/src/cuda/row_filter.16sc3.cu
@ -42,7 +42,7 @@

 #if !defined CUDA_DISABLER

-#include "row_filter.h"
+#include "row_filter.hpp"

 namespace filter
 {
--- a/modules/gpufilters/src/cuda/row_filter.16sc4.cu
+++ b/modules/gpufilters/src/cuda/row_filter.16sc4.cu
@ -42,7 +42,7 @@

 #if !defined CUDA_DISABLER

-#include "row_filter.h"
+#include "row_filter.hpp"

 namespace filter
 {
--- a/modules/gpufilters/src/cuda/row_filter.16uc1.cu
+++ b/modules/gpufilters/src/cuda/row_filter.16uc1.cu
@ -42,7 +42,7 @@

 #if !defined CUDA_DISABLER

-#include "row_filter.h"
+#include "row_filter.hpp"

 namespace filter
 {
--- a/modules/gpufilters/src/cuda/row_filter.16uc3.cu
+++ b/modules/gpufilters/src/cuda/row_filter.16uc3.cu
@ -42,7 +42,7 @@

 #if !defined CUDA_DISABLER

-#include "row_filter.h"
+#include "row_filter.hpp"

 namespace filter
 {
--- a/modules/gpufilters/src/cuda/row_filter.16uc4.cu
+++ b/modules/gpufilters/src/cuda/row_filter.16uc4.cu
@ -42,7 +42,7 @@

 #if !defined CUDA_DISABLER

-#include "row_filter.h"
+#include "row_filter.hpp"

 namespace filter
 {
--- a/modules/gpufilters/src/cuda/row_filter.32fc1.cu
+++ b/modules/gpufilters/src/cuda/row_filter.32fc1.cu
@ -42,7 +42,7 @@

 #if !defined CUDA_DISABLER

-#include "row_filter.h"
+#include "row_filter.hpp"

 namespace filter
 {
--- a/modules/gpufilters/src/cuda/row_filter.32fc3.cu
+++ b/modules/gpufilters/src/cuda/row_filter.32fc3.cu
@ -42,7 +42,7 @@

 #if !defined CUDA_DISABLER

-#include "row_filter.h"
+#include "row_filter.hpp"

 namespace filter
 {
--- a/modules/gpufilters/src/cuda/row_filter.32fc4.cu
+++ b/modules/gpufilters/src/cuda/row_filter.32fc4.cu
@ -42,7 +42,7 @@

 #if !defined CUDA_DISABLER

-#include "row_filter.h"
+#include "row_filter.hpp"

 namespace filter
 {
--- a/modules/gpufilters/src/cuda/row_filter.32sc1.cu
+++ b/modules/gpufilters/src/cuda/row_filter.32sc1.cu
@ -42,7 +42,7 @@

 #if !defined CUDA_DISABLER

-#include "row_filter.h"
+#include "row_filter.hpp"

 namespace filter
 {
--- a/modules/gpufilters/src/cuda/row_filter.32sc3.cu
+++ b/modules/gpufilters/src/cuda/row_filter.32sc3.cu
@ -42,7 +42,7 @@

 #if !defined CUDA_DISABLER

-#include "row_filter.h"
+#include "row_filter.hpp"

 namespace filter
 {
--- a/modules/gpufilters/src/cuda/row_filter.32sc4.cu
+++ b/modules/gpufilters/src/cuda/row_filter.32sc4.cu
@ -42,7 +42,7 @@

 #if !defined CUDA_DISABLER

-#include "row_filter.h"
+#include "row_filter.hpp"

 namespace filter
 {
--- a/modules/gpufilters/src/cuda/row_filter.8uc1.cu
+++ b/modules/gpufilters/src/cuda/row_filter.8uc1.cu
@ -42,7 +42,7 @@

 #if !defined CUDA_DISABLER

-#include "row_filter.h"
+#include "row_filter.hpp"

 namespace filter
 {
--- a/modules/gpufilters/src/cuda/row_filter.8uc3.cu
+++ b/modules/gpufilters/src/cuda/row_filter.8uc3.cu
@ -42,7 +42,7 @@

 #if !defined CUDA_DISABLER

-#include "row_filter.h"
+#include "row_filter.hpp"

 namespace filter
 {
--- a/modules/gpufilters/src/cuda/row_filter.8uc4.cu
+++ b/modules/gpufilters/src/cuda/row_filter.8uc4.cu
@ -42,7 +42,7 @@

 #if !defined CUDA_DISABLER

-#include "row_filter.h"
+#include "row_filter.hpp"

 namespace filter
 {
--- a/modules/gpufilters/src/cuda/row_filter.hpp
+++ b/modules/gpufilters/src/cuda/row_filter.hpp
--- a/modules/gpufilters/src/filtering.cpp
+++ b/modules/gpufilters/src/filtering.cpp
@ -45,7 +45,6 @@
 using namespace cv;
 using namespace cv::gpu;

-
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)

 Ptr<FilterEngine_GPU> cv::gpu::createFilter2D_GPU(const Ptr<BaseFilter_GPU>&, int, int) { throw_no_cuda(); return Ptr<FilterEngine_GPU>(0); }
@ -628,31 +627,44 @@ void cv::gpu::morphologyEx(const GpuMat& src, GpuMat& dst, int op, const Mat& ke
 {
    switch( op )
    {
-    case MORPH_ERODE:   erode(src, dst, kernel, buf1, anchor, iterations, stream); break;
-    case MORPH_DILATE: dilate(src, dst, kernel, buf1, anchor, iterations, stream); break;
+    case MORPH_ERODE:
+        erode(src, dst, kernel, buf1, anchor, iterations, stream);
+        break;
+
+    case MORPH_DILATE:
+        dilate(src, dst, kernel, buf1, anchor, iterations, stream);
+        break;
+
    case MORPH_OPEN:
        erode(src, buf2, kernel, buf1, anchor, iterations, stream);
        dilate(buf2, dst, kernel, buf1, anchor, iterations, stream);
        break;
+
    case MORPH_CLOSE:
        dilate(src, buf2, kernel, buf1, anchor, iterations, stream);
        erode(buf2, dst, kernel, buf1, anchor, iterations, stream);
        break;
+
+#ifdef HAVE_OPENCV_GPUARITHM
    case MORPH_GRADIENT:
        erode(src, buf2, kernel, buf1, anchor, iterations, stream);
        dilate(src, dst, kernel, buf1, anchor, iterations, stream);
-        subtract(dst, buf2, dst, GpuMat(), -1, stream);
+        gpu::subtract(dst, buf2, dst, GpuMat(), -1, stream);
        break;
+
    case MORPH_TOPHAT:
        erode(src, dst, kernel, buf1, anchor, iterations, stream);
        dilate(dst, buf2, kernel, buf1, anchor, iterations, stream);
-        subtract(src, buf2, dst, GpuMat(), -1, stream);
+        gpu::subtract(src, buf2, dst, GpuMat(), -1, stream);
        break;
+
    case MORPH_BLACKHAT:
        dilate(src, dst, kernel, buf1, anchor, iterations, stream);
        erode(dst, buf2, kernel, buf1, anchor, iterations, stream);
-        subtract(buf2, src, dst, GpuMat(), -1, stream);
+        gpu::subtract(buf2, src, dst, GpuMat(), -1, stream);
        break;
+#endif
+
    default:
        CV_Error(cv::Error::StsBadArg, "unknown morphological operation");
    }
--- a/modules/gpufilters/src/precomp.cpp
+++ b/modules/gpufilters/src/precomp.cpp
@ -0,0 +1,43 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
--- a/modules/gpufilters/src/precomp.hpp
+++ b/modules/gpufilters/src/precomp.hpp
@ -0,0 +1,59 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_PRECOMP_H__
+#define __OPENCV_PRECOMP_H__
+
+#include <limits>
+
+#include "opencv2/gpufilters.hpp"
+#include "opencv2/imgproc.hpp"
+
+#include "opencv2/core/gpu_private.hpp"
+
+#include "opencv2/opencv_modules.hpp"
+
+#ifdef HAVE_OPENCV_GPUARITHM
+#  include "opencv2/gpuarithm.hpp"
+#endif
+
+#endif /* __OPENCV_PRECOMP_H__ */
--- a/modules/gpufilters/test/test_filters.cpp
+++ b/modules/gpufilters/test/test_filters.cpp
@ -105,7 +105,7 @@ GPU_TEST_P(Blur, Accuracy)
    EXPECT_MAT_NEAR(getInnerROI(dst_gold, ksize), getInnerROI(dst, ksize), 1.0);
 }

-INSTANTIATE_TEST_CASE_P(GPU_Filter, Blur, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Filters, Blur, testing::Combine(
    ALL_DEVICES,
    DIFFERENT_SIZES,
    testing::Values(MatType(CV_8UC1), MatType(CV_8UC4)),
@ -164,7 +164,7 @@ GPU_TEST_P(Sobel, Accuracy)
    EXPECT_MAT_NEAR(getInnerROI(dst_gold, ksize), getInnerROI(dst, ksize), CV_MAT_DEPTH(type) < CV_32F ? 0.0 : 0.1);
 }

-INSTANTIATE_TEST_CASE_P(GPU_Filter, Sobel, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Filters, Sobel, testing::Combine(
    ALL_DEVICES,
    DIFFERENT_SIZES,
    testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_16S), MatDepth(CV_32F)),
@ -227,7 +227,7 @@ GPU_TEST_P(Scharr, Accuracy)
    EXPECT_MAT_NEAR(getInnerROI(dst_gold, cv::Size(3, 3)), getInnerROI(dst, cv::Size(3, 3)), CV_MAT_DEPTH(type) < CV_32F ? 0.0 : 0.1);
 }

-INSTANTIATE_TEST_CASE_P(GPU_Filter, Scharr, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Filters, Scharr, testing::Combine(
    ALL_DEVICES,
    DIFFERENT_SIZES,
    testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_16S), MatDepth(CV_32F)),
@ -301,7 +301,7 @@ GPU_TEST_P(GaussianBlur, Accuracy)
    }
 }

-INSTANTIATE_TEST_CASE_P(GPU_Filter, GaussianBlur, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Filters, GaussianBlur, testing::Combine(
    ALL_DEVICES,
    DIFFERENT_SIZES,
    testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_16S), MatDepth(CV_32F)),
@ -363,7 +363,7 @@ GPU_TEST_P(Laplacian, Accuracy)
    EXPECT_MAT_NEAR(dst_gold, dst, src.depth() < CV_32F ? 0.0 : 1e-3);
 }

-INSTANTIATE_TEST_CASE_P(GPU_Filter, Laplacian, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Filters, Laplacian, testing::Combine(
    ALL_DEVICES,
    DIFFERENT_SIZES,
    testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_32FC1)),
@ -411,7 +411,7 @@ GPU_TEST_P(Erode, Accuracy)
    EXPECT_MAT_NEAR(getInnerROI(dst_gold, ksize), getInnerROI(dst, ksize), 0.0);
 }

-INSTANTIATE_TEST_CASE_P(GPU_Filter, Erode, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Filters, Erode, testing::Combine(
    ALL_DEVICES,
    DIFFERENT_SIZES,
    testing::Values(MatType(CV_8UC1), MatType(CV_8UC4)),
@ -460,7 +460,7 @@ GPU_TEST_P(Dilate, Accuracy)
    EXPECT_MAT_NEAR(getInnerROI(dst_gold, ksize), getInnerROI(dst, ksize), 0.0);
 }

-INSTANTIATE_TEST_CASE_P(GPU_Filter, Dilate, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Filters, Dilate, testing::Combine(
    ALL_DEVICES,
    DIFFERENT_SIZES,
    testing::Values(MatType(CV_8UC1), MatType(CV_8UC4)),
@ -513,7 +513,7 @@ GPU_TEST_P(MorphEx, Accuracy)
    EXPECT_MAT_NEAR(getInnerROI(dst_gold, border), getInnerROI(dst, border), 0.0);
 }

-INSTANTIATE_TEST_CASE_P(GPU_Filter, MorphEx, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Filters, MorphEx, testing::Combine(
    ALL_DEVICES,
    DIFFERENT_SIZES,
    testing::Values(MatType(CV_8UC1), MatType(CV_8UC4)),
@ -565,7 +565,7 @@ GPU_TEST_P(Filter2D, Accuracy)
    EXPECT_MAT_NEAR(dst_gold, dst, CV_MAT_DEPTH(type) == CV_32F ? 1e-1 : 1.0);
 }

-INSTANTIATE_TEST_CASE_P(GPU_Filter, Filter2D, testing::Combine(
+INSTANTIATE_TEST_CASE_P(GPU_Filters, Filter2D, testing::Combine(
    ALL_DEVICES,
    DIFFERENT_SIZES,
    testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC4)),
--- a/modules/gpufilters/test/test_main.cpp
+++ b/modules/gpufilters/test/test_main.cpp
@ -0,0 +1,120 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+
+#ifdef HAVE_CUDA
+
+using namespace std;
+using namespace cv;
+using namespace cv::gpu;
+using namespace cvtest;
+using namespace testing;
+
+int main(int argc, char** argv)
+{
+    try
+    {
+        const std::string keys =
+                "{ h help ?            |      | Print help}"
+                "{ i info              |      | Print information about system and exit }"
+                "{ device              | -1   | Device on which tests will be executed (-1 means all devices) }"
+                ;
+
+        CommandLineParser cmd(argc, (const char**)argv, keys);
+
+        if (cmd.has("help"))
+        {
+            cmd.printMessage();
+            return 0;
+        }
+
+        printCudaInfo();
+
+        if (cmd.has("info"))
+        {
+            return 0;
+        }
+
+        int device = cmd.get<int>("device");
+        if (device < 0)
+        {
+            DeviceManager::instance().loadAll();
+
+            cout << "Run tests on all supported devices \n" << endl;
+        }
+        else
+        {
+            DeviceManager::instance().load(device);
+
+            DeviceInfo info(device);
+            cout << "Run tests on device " << device << " [" << info.name() << "] \n" << endl;
+        }
+
+        TS::ptr()->init("gpu");
+        InitGoogleTest(&argc, argv);
+
+        return RUN_ALL_TESTS();
+    }
+    catch (const exception& e)
+    {
+        cerr << e.what() << endl;
+        return -1;
+    }
+    catch (...)
+    {
+        cerr << "Unknown error" << endl;
+        return -1;
+    }
+
+    return 0;
+}
+
+#else // HAVE_CUDA
+
+int main()
+{
+    printf("OpenCV was built without CUDA support\n");
+    return 0;
+}
+
+#endif // HAVE_CUDA
--- a/modules/gpufilters/test/test_precomp.cpp
+++ b/modules/gpufilters/test/test_precomp.cpp
@ -0,0 +1,43 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
--- a/modules/gpufilters/test/test_precomp.hpp
+++ b/modules/gpufilters/test/test_precomp.hpp
@ -0,0 +1,60 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifdef __GNUC__
+#  pragma GCC diagnostic ignored "-Wmissing-declarations"
+#  if defined __clang__ || defined __APPLE__
+#    pragma GCC diagnostic ignored "-Wmissing-prototypes"
+#    pragma GCC diagnostic ignored "-Wextra"
+#  endif
+#endif
+
+#ifndef __OPENCV_TEST_PRECOMP_HPP__
+#define __OPENCV_TEST_PRECOMP_HPP__
+
+#include "opencv2/ts.hpp"
+#include "opencv2/ts/gpu_test.hpp"
+
+#include "opencv2/gpufilters.hpp"
+#include "opencv2/imgproc.hpp"
+
+#endif
--- a/modules/stitching/CMakeLists.txt
+++ b/modules/stitching/CMakeLists.txt
@ -1,3 +1,3 @@
 set(the_description "Images stitching")
-ocv_define_module(stitching opencv_imgproc opencv_features2d opencv_calib3d opencv_objdetect OPTIONAL opencv_gpu opencv_gpuarithm opencv_nonfree)
+ocv_define_module(stitching opencv_imgproc opencv_features2d opencv_calib3d opencv_objdetect OPTIONAL opencv_gpu opencv_gpuarithm opencv_gpufilters opencv_nonfree)

--- a/modules/superres/CMakeLists.txt
+++ b/modules/superres/CMakeLists.txt
@ -4,4 +4,4 @@ endif()

 set(the_description "Super Resolution")
 ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 -Wundef)
-ocv_define_module(superres opencv_imgproc opencv_video OPTIONAL opencv_highgui opencv_gpu opencv_gpucodec opencv_gpuarithm)
+ocv_define_module(superres opencv_imgproc opencv_video OPTIONAL opencv_highgui opencv_gpu opencv_gpuarithm opencv_gpufilters opencv_gpucodec)
--- a/samples/cpp/CMakeLists.txt
+++ b/samples/cpp/CMakeLists.txt
@ -18,6 +18,7 @@ if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND)

  if(HAVE_opencv_gpu)
    ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpuarithm/include")
+    ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpufilters/include")
    ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpu/include")
  endif()

--- a/samples/gpu/CMakeLists.txt
+++ b/samples/gpu/CMakeLists.txt
@ -2,7 +2,7 @@ SET(OPENCV_GPU_SAMPLES_REQUIRED_DEPS opencv_core opencv_flann opencv_imgproc ope
                                     opencv_ml opencv_video opencv_objdetect opencv_features2d
                                     opencv_calib3d opencv_legacy opencv_contrib opencv_gpu
                                     opencv_nonfree opencv_softcascade opencv_superres
-                                     opencv_gpucodec opencv_gpuarithm)
+                                     opencv_gpucodec opencv_gpuarithm opencv_gpufilters)

 ocv_check_dependencies(${OPENCV_GPU_SAMPLES_REQUIRED_DEPS})