diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp
index 7c1ebb786a..9cd0709995 100644
--- a/modules/gpu/include/opencv2/gpu/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu/gpu.hpp
@@ -124,6 +124,8 @@ namespace cv
             // Checks whether the GPU module can be run on the given device
             bool isCompatible() const;
 
+            int deviceID() const { return device_id_; }
+
         private:
             void query();
             void queryMemory(size_t& free_memory, size_t& total_memory) const;
@@ -517,14 +519,14 @@ namespace cv
         //! supports CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1 types
         CV_EXPORTS void multiply(const GpuMat& a, const GpuMat& b, GpuMat& c, Stream& stream = Stream::Null());
         //! multiplies matrix to a scalar (c = a * s)
-        //! supports CV_32FC1 and CV_32FC2 type
+        //! supports CV_32FC1 type
         CV_EXPORTS void multiply(const GpuMat& a, const Scalar& sc, GpuMat& c, Stream& stream = Stream::Null());
 
         //! computes element-wise quotient of the two arrays (c = a / b)
         //! supports CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1 types
         CV_EXPORTS void divide(const GpuMat& a, const GpuMat& b, GpuMat& c, Stream& stream = Stream::Null());
         //! computes element-wise quotient of matrix and scalar (c = a / s)
-        //! supports CV_32FC1 and CV_32FC2 type
+        //! supports CV_32FC1 type
         CV_EXPORTS void divide(const GpuMat& a, const Scalar& sc, GpuMat& c, Stream& stream = Stream::Null());
 
         //! computes exponent of each matrix element (b = e**a)
@@ -1412,9 +1414,9 @@ namespace cv
             void radiusMatch(const GpuMat& queryDescs, std::vector< std::vector<DMatch> >& matches, float maxDistance,
                 const std::vector<GpuMat>& masks = std::vector<GpuMat>(), bool compactResult = false);
 
-        private:
             DistType distType;
 
+        private:
             std::vector<GpuMat> trainDescCollection;
         };
 
diff --git a/modules/gpu/src/calib3d.cpp b/modules/gpu/src/calib3d.cpp
index 84db041148..301ea8167e 100644
--- a/modules/gpu/src/calib3d.cpp
+++ b/modules/gpu/src/calib3d.cpp
@@ -1,290 +1,290 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-
-#if !defined(HAVE_CUDA)
-
-void cv::gpu::transformPoints(const GpuMat&, const Mat&, const Mat&, GpuMat&, Stream&) { throw_nogpu(); }
-
-void cv::gpu::projectPoints(const GpuMat&, const Mat&, const Mat&, const Mat&, const Mat&, GpuMat&, Stream&) { throw_nogpu(); }
-
-void cv::gpu::solvePnPRansac(const Mat&, const Mat&, const Mat&, const Mat&, Mat&, Mat&, bool, int, float, int, vector<int>*) { throw_nogpu(); }
-
-#else
-
-using namespace cv;
-using namespace cv::gpu;
-
-namespace cv { namespace gpu { namespace transform_points 
-{
-    void call(const DevMem2D_<float3> src, const float* rot, const float* transl, DevMem2D_<float3> dst, cudaStream_t stream);
-}}}
-
-namespace
-{
-    void transformPointsCaller(const GpuMat& src, const Mat& rvec, const Mat& tvec, GpuMat& dst, cudaStream_t stream)
-    {
-        CV_Assert(src.rows == 1 && src.cols > 0 && src.type() == CV_32FC3);
-        CV_Assert(rvec.size() == Size(3, 1) && rvec.type() == CV_32F);
-        CV_Assert(tvec.size() == Size(3, 1) && tvec.type() == CV_32F);
-
-        // Convert rotation vector into matrix
-        Mat rot;
-        Rodrigues(rvec, rot);
-
-        dst.create(src.size(), src.type());
-        transform_points::call(src, rot.ptr<float>(), tvec.ptr<float>(), dst, stream);
-    }
-}
-
-void cv::gpu::transformPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec, GpuMat& dst, Stream& stream)
-{
-    ::transformPointsCaller(src, rvec, tvec, dst, StreamAccessor::getStream(stream));
-}
-
-namespace cv { namespace gpu { namespace project_points 
-{
-    void call(const DevMem2D_<float3> src, const float* rot, const float* transl, const float* proj, DevMem2D_<float2> dst, cudaStream_t stream);
-}}}
-
-
-namespace
-{
-    void projectPointsCaller(const GpuMat& src, const Mat& rvec, const Mat& tvec, const Mat& camera_mat, const Mat& dist_coef, GpuMat& dst, cudaStream_t stream)
-    {
-        CV_Assert(src.rows == 1 && src.cols > 0 && src.type() == CV_32FC3);
-        CV_Assert(rvec.size() == Size(3, 1) && rvec.type() == CV_32F);
-        CV_Assert(tvec.size() == Size(3, 1) && tvec.type() == CV_32F);
-        CV_Assert(camera_mat.size() == Size(3, 3) && camera_mat.type() == CV_32F);
-        CV_Assert(dist_coef.empty()); // Undistortion isn't supported
-
-        // Convert rotation vector into matrix
-        Mat rot;
-        Rodrigues(rvec, rot);
-
-        dst.create(src.size(), CV_32FC2);
-        project_points::call(src, rot.ptr<float>(), tvec.ptr<float>(), camera_mat.ptr<float>(), dst,stream);
-    }
-}
-
-void cv::gpu::projectPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec, const Mat& camera_mat, const Mat& dist_coef, GpuMat& dst, Stream& stream)
-{
-    ::projectPointsCaller(src, rvec, tvec, camera_mat, dist_coef, dst, StreamAccessor::getStream(stream));
-}
-
-
-namespace cv { namespace gpu { namespace solve_pnp_ransac
-{
-    int maxNumIters();
-
-    void computeHypothesisScores(
-            const int num_hypotheses, const int num_points, const float* rot_matrices,
-            const float3* transl_vectors, const float3* object, const float2* image,
-            const float dist_threshold, int* hypothesis_scores);
-}}}
-
-namespace
-{
-    // Selects subset_size random different points from [0, num_points - 1] range
-    void selectRandom(int subset_size, int num_points, vector<int>& subset)
-    {
-        subset.resize(subset_size);
-        for (int i = 0; i < subset_size; ++i)
-        {
-            bool was;
-            do
-            {
-                subset[i] = rand() % num_points;
-                was = false;
-                for (int j = 0; j < i; ++j)
-                    if (subset[j] == subset[i])
-                    {
-                        was = true;
-                        break;
-                    }
-            } while (was);
-        }
-    }
-
-    // Computes rotation, translation pair for small subsets if the input data
-    class TransformHypothesesGenerator
-    {
-    public:
-        TransformHypothesesGenerator(const Mat& object_, const Mat& image_, const Mat& dist_coef_, 
-                                     const Mat& camera_mat_, int num_points_, int subset_size_, 
-                                     Mat rot_matrices_, Mat transl_vectors_)
-                : object(&object_), image(&image_), dist_coef(&dist_coef_), camera_mat(&camera_mat_), 
-                  num_points(num_points_), subset_size(subset_size_), rot_matrices(rot_matrices_), 
-                  transl_vectors(transl_vectors_) {}
-
-        void operator()(const BlockedRange& range) const
-        {
-            // Input data for generation of the current hypothesis
-            vector<int> subset_indices(subset_size);
-            Mat_<Point3f> object_subset(1, subset_size);
-            Mat_<Point2f> image_subset(1, subset_size);
-
-            // Current hypothesis data
-            Mat rot_vec(1, 3, CV_64F);
-            Mat rot_mat(3, 3, CV_64F);
-            Mat transl_vec(1, 3, CV_64F);
-
-            for (int iter = range.begin(); iter < range.end(); ++iter)
-            {
-                selectRandom(subset_size, num_points, subset_indices);
-                for (int i = 0; i < subset_size; ++i)
-                {
-                   object_subset(0, i) = object->at<Point3f>(subset_indices[i]);
-                   image_subset(0, i) = image->at<Point2f>(subset_indices[i]);
-                }
-
-                solvePnP(object_subset, image_subset, *camera_mat, *dist_coef, rot_vec, transl_vec);
-
-                // Remember translation vector
-                Mat transl_vec_ = transl_vectors.colRange(iter * 3, (iter + 1) * 3);
-                transl_vec = transl_vec.reshape(0, 1);
-                transl_vec.convertTo(transl_vec_, CV_32F);
-
-                // Remember rotation matrix
-                Rodrigues(rot_vec, rot_mat);
-                Mat rot_mat_ = rot_matrices.colRange(iter * 9, (iter + 1) * 9).reshape(0, 3);
-                rot_mat.convertTo(rot_mat_, CV_32F);
-            }
-        }
-
-        const Mat* object;
-        const Mat* image;
-        const Mat* dist_coef;
-        const Mat* camera_mat;
-        int num_points;
-        int subset_size;
-
-        // Hypotheses storage (global)
-        Mat rot_matrices;
-        Mat transl_vectors;
-    };
-}
-
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+#if !defined(HAVE_CUDA)
+
+void cv::gpu::transformPoints(const GpuMat&, const Mat&, const Mat&, GpuMat&, Stream&) { throw_nogpu(); }
+
+void cv::gpu::projectPoints(const GpuMat&, const Mat&, const Mat&, const Mat&, const Mat&, GpuMat&, Stream&) { throw_nogpu(); }
+
+void cv::gpu::solvePnPRansac(const Mat&, const Mat&, const Mat&, const Mat&, Mat&, Mat&, bool, int, float, int, vector<int>*) { throw_nogpu(); }
+
+#else
+
+using namespace cv;
+using namespace cv::gpu;
+
+namespace cv { namespace gpu { namespace transform_points 
+{
+    void call(const DevMem2D_<float3> src, const float* rot, const float* transl, DevMem2D_<float3> dst, cudaStream_t stream);
+}}}
+
+namespace
+{
+    void transformPointsCaller(const GpuMat& src, const Mat& rvec, const Mat& tvec, GpuMat& dst, cudaStream_t stream)
+    {
+        CV_Assert(src.rows == 1 && src.cols > 0 && src.type() == CV_32FC3);
+        CV_Assert(rvec.size() == Size(3, 1) && rvec.type() == CV_32F);
+        CV_Assert(tvec.size() == Size(3, 1) && tvec.type() == CV_32F);
+
+        // Convert rotation vector into matrix
+        Mat rot;
+        Rodrigues(rvec, rot);
+
+        dst.create(src.size(), src.type());
+        transform_points::call(src, rot.ptr<float>(), tvec.ptr<float>(), dst, stream);
+    }
+}
+
+void cv::gpu::transformPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec, GpuMat& dst, Stream& stream)
+{
+    ::transformPointsCaller(src, rvec, tvec, dst, StreamAccessor::getStream(stream));
+}
+
+namespace cv { namespace gpu { namespace project_points 
+{
+    void call(const DevMem2D_<float3> src, const float* rot, const float* transl, const float* proj, DevMem2D_<float2> dst, cudaStream_t stream);
+}}}
+
+
+namespace
+{
+    void projectPointsCaller(const GpuMat& src, const Mat& rvec, const Mat& tvec, const Mat& camera_mat, const Mat& dist_coef, GpuMat& dst, cudaStream_t stream)
+    {
+        CV_Assert(src.rows == 1 && src.cols > 0 && src.type() == CV_32FC3);
+        CV_Assert(rvec.size() == Size(3, 1) && rvec.type() == CV_32F);
+        CV_Assert(tvec.size() == Size(3, 1) && tvec.type() == CV_32F);
+        CV_Assert(camera_mat.size() == Size(3, 3) && camera_mat.type() == CV_32F);
+        CV_Assert(dist_coef.empty()); // Undistortion isn't supported
+
+        // Convert rotation vector into matrix
+        Mat rot;
+        Rodrigues(rvec, rot);
+
+        dst.create(src.size(), CV_32FC2);
+        project_points::call(src, rot.ptr<float>(), tvec.ptr<float>(), camera_mat.ptr<float>(), dst,stream);
+    }
+}
+
+void cv::gpu::projectPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec, const Mat& camera_mat, const Mat& dist_coef, GpuMat& dst, Stream& stream)
+{
+    ::projectPointsCaller(src, rvec, tvec, camera_mat, dist_coef, dst, StreamAccessor::getStream(stream));
+}
+
+
+namespace cv { namespace gpu { namespace solve_pnp_ransac
+{
+    int maxNumIters();
+
+    void computeHypothesisScores(
+            const int num_hypotheses, const int num_points, const float* rot_matrices,
+            const float3* transl_vectors, const float3* object, const float2* image,
+            const float dist_threshold, int* hypothesis_scores);
+}}}
+
+namespace
+{
+    // Selects subset_size random different points from [0, num_points - 1] range
+    void selectRandom(int subset_size, int num_points, vector<int>& subset)
+    {
+        subset.resize(subset_size);
+        for (int i = 0; i < subset_size; ++i)
+        {
+            bool was;
+            do
+            {
+                subset[i] = rand() % num_points;
+                was = false;
+                for (int j = 0; j < i; ++j)
+                    if (subset[j] == subset[i])
+                    {
+                        was = true;
+                        break;
+                    }
+            } while (was);
+        }
+    }
+
+    // Computes rotation, translation pair for small subsets if the input data
+    class TransformHypothesesGenerator
+    {
+    public:
+        TransformHypothesesGenerator(const Mat& object_, const Mat& image_, const Mat& dist_coef_, 
+                                     const Mat& camera_mat_, int num_points_, int subset_size_, 
+                                     Mat rot_matrices_, Mat transl_vectors_)
+                : object(&object_), image(&image_), dist_coef(&dist_coef_), camera_mat(&camera_mat_), 
+                  num_points(num_points_), subset_size(subset_size_), rot_matrices(rot_matrices_), 
+                  transl_vectors(transl_vectors_) {}
+
+        void operator()(const BlockedRange& range) const
+        {
+            // Input data for generation of the current hypothesis
+            vector<int> subset_indices(subset_size);
+            Mat_<Point3f> object_subset(1, subset_size);
+            Mat_<Point2f> image_subset(1, subset_size);
+
+            // Current hypothesis data
+            Mat rot_vec(1, 3, CV_64F);
+            Mat rot_mat(3, 3, CV_64F);
+            Mat transl_vec(1, 3, CV_64F);
+
+            for (int iter = range.begin(); iter < range.end(); ++iter)
+            {
+                selectRandom(subset_size, num_points, subset_indices);
+                for (int i = 0; i < subset_size; ++i)
+                {
+                   object_subset(0, i) = object->at<Point3f>(subset_indices[i]);
+                   image_subset(0, i) = image->at<Point2f>(subset_indices[i]);
+                }
+
+                solvePnP(object_subset, image_subset, *camera_mat, *dist_coef, rot_vec, transl_vec);
+
+                // Remember translation vector
+                Mat transl_vec_ = transl_vectors.colRange(iter * 3, (iter + 1) * 3);
+                transl_vec = transl_vec.reshape(0, 1);
+                transl_vec.convertTo(transl_vec_, CV_32F);
+
+                // Remember rotation matrix
+                Rodrigues(rot_vec, rot_mat);
+                Mat rot_mat_ = rot_matrices.colRange(iter * 9, (iter + 1) * 9).reshape(0, 3);
+                rot_mat.convertTo(rot_mat_, CV_32F);
+            }
+        }
+
+        const Mat* object;
+        const Mat* image;
+        const Mat* dist_coef;
+        const Mat* camera_mat;
+        int num_points;
+        int subset_size;
+
+        // Hypotheses storage (global)
+        Mat rot_matrices;
+        Mat transl_vectors;
+    };
+}
+
 void cv::gpu::solvePnPRansac(const Mat& object, const Mat& image, const Mat& camera_mat,
                              const Mat& dist_coef, Mat& rvec, Mat& tvec, bool use_extrinsic_guess,
                              int num_iters, float max_dist, int min_inlier_count, 
-                             vector<int>* inliers)
-{
-    CV_Assert(object.rows == 1 && object.cols > 0 && object.type() == CV_32FC3);
-    CV_Assert(image.rows == 1 && image.cols > 0 && image.type() == CV_32FC2);
-    CV_Assert(object.cols == image.cols);
-    CV_Assert(camera_mat.size() == Size(3, 3) && camera_mat.type() == CV_32F);
-    CV_Assert(!use_extrinsic_guess); // We don't support initial guess for now
-    CV_Assert(num_iters <= solve_pnp_ransac::maxNumIters());
-
-    const int subset_size = 4;
-    const int num_points = object.cols;
-    CV_Assert(num_points >= subset_size);
-
-    // Unapply distortion and intrinsic camera transformations
-    Mat eye_camera_mat = Mat::eye(3, 3, CV_32F);
-    Mat empty_dist_coef;
-    Mat image_normalized;
-    undistortPoints(image, image_normalized, camera_mat, dist_coef, Mat(), eye_camera_mat);
-
-    // Hypotheses storage (global)
-    Mat rot_matrices(1, num_iters * 9, CV_32F);
-    Mat transl_vectors(1, num_iters * 3, CV_32F);
-
-    // Generate set of hypotheses using small subsets of the input data
-    TransformHypothesesGenerator body(object, image_normalized, empty_dist_coef, eye_camera_mat, 
-                                      num_points, subset_size, rot_matrices, transl_vectors);
-    parallel_for(BlockedRange(0, num_iters), body);
-
-    // Compute scores (i.e. number of inliers) for each hypothesis
-    GpuMat d_object(object);
-    GpuMat d_image_normalized(image_normalized);
-    GpuMat d_hypothesis_scores(1, num_iters, CV_32S);
-    solve_pnp_ransac::computeHypothesisScores(
-            num_iters, num_points, rot_matrices.ptr<float>(), transl_vectors.ptr<float3>(),
-            d_object.ptr<float3>(), d_image_normalized.ptr<float2>(), max_dist * max_dist, 
-            d_hypothesis_scores.ptr<int>());
-
-    // Find the best hypothesis index
-    Point best_idx;
-    double best_score;
-    minMaxLoc(d_hypothesis_scores, NULL, &best_score, NULL, &best_idx);
-    int num_inliers = static_cast<int>(best_score);
-
-    // Extract the best hypothesis data
-
-    Mat rot_mat = rot_matrices.colRange(best_idx.x * 9, (best_idx.x + 1) * 9).reshape(0, 3);
-    Rodrigues(rot_mat, rvec);
-    rvec = rvec.reshape(0, 1);
-
-    tvec = transl_vectors.colRange(best_idx.x * 3, (best_idx.x + 1) * 3).clone();
-    tvec = tvec.reshape(0, 1);
-
-    // Build vector of inlier indices
-    if (inliers != NULL)
-    {
-        inliers->clear();
-        inliers->reserve(num_inliers);
-
-        Point3f p, p_transf;
-        Point2f p_proj;
-        const float* rot = rot_mat.ptr<float>();
-        const float* transl = tvec.ptr<float>();
-
-        for (int i = 0; i < num_points; ++i)
-        {
-            p = object.at<Point3f>(0, i);
-            p_transf.x = rot[0] * p.x + rot[1] * p.y + rot[2] * p.z + transl[0];
-            p_transf.y = rot[3] * p.x + rot[4] * p.y + rot[5] * p.z + transl[1];
-            p_transf.z = rot[6] * p.x + rot[7] * p.y + rot[8] * p.z + transl[2];
-            p_proj.x = p_transf.x / p_transf.z;
-            p_proj.y = p_transf.y / p_transf.z;
-            if (norm(p_proj - image_normalized.at<Point2f>(0, i)) < max_dist)
-                inliers->push_back(i);
-        }
-    }
-}
-
-#endif
-
-
+                             vector<int>* inliers)
+{
+    CV_Assert(object.rows == 1 && object.cols > 0 && object.type() == CV_32FC3);
+    CV_Assert(image.rows == 1 && image.cols > 0 && image.type() == CV_32FC2);
+    CV_Assert(object.cols == image.cols);
+    CV_Assert(camera_mat.size() == Size(3, 3) && camera_mat.type() == CV_32F);
+    CV_Assert(!use_extrinsic_guess); // We don't support initial guess for now
+    CV_Assert(num_iters <= solve_pnp_ransac::maxNumIters());
+
+    const int subset_size = 4;
+    const int num_points = object.cols;
+    CV_Assert(num_points >= subset_size);
+
+    // Unapply distortion and intrinsic camera transformations
+    Mat eye_camera_mat = Mat::eye(3, 3, CV_32F);
+    Mat empty_dist_coef;
+    Mat image_normalized;
+    undistortPoints(image, image_normalized, camera_mat, dist_coef, Mat(), eye_camera_mat);
+
+    // Hypotheses storage (global)
+    Mat rot_matrices(1, num_iters * 9, CV_32F);
+    Mat transl_vectors(1, num_iters * 3, CV_32F);
+
+    // Generate set of hypotheses using small subsets of the input data
+    TransformHypothesesGenerator body(object, image_normalized, empty_dist_coef, eye_camera_mat, 
+                                      num_points, subset_size, rot_matrices, transl_vectors);
+    parallel_for(BlockedRange(0, num_iters), body);
+
+    // Compute scores (i.e. number of inliers) for each hypothesis
+    GpuMat d_object(object);
+    GpuMat d_image_normalized(image_normalized);
+    GpuMat d_hypothesis_scores(1, num_iters, CV_32S);
+    solve_pnp_ransac::computeHypothesisScores(
+            num_iters, num_points, rot_matrices.ptr<float>(), transl_vectors.ptr<float3>(),
+            d_object.ptr<float3>(), d_image_normalized.ptr<float2>(), max_dist * max_dist, 
+            d_hypothesis_scores.ptr<int>());
+
+    // Find the best hypothesis index
+    Point best_idx;
+    double best_score;
+    minMaxLoc(d_hypothesis_scores, NULL, &best_score, NULL, &best_idx);
+    int num_inliers = static_cast<int>(best_score);
+
+    // Extract the best hypothesis data
+
+    Mat rot_mat = rot_matrices.colRange(best_idx.x * 9, (best_idx.x + 1) * 9).reshape(0, 3);
+    Rodrigues(rot_mat, rvec);
+    rvec = rvec.reshape(0, 1);
+
+    tvec = transl_vectors.colRange(best_idx.x * 3, (best_idx.x + 1) * 3).clone();
+    tvec = tvec.reshape(0, 1);
+
+    // Build vector of inlier indices
+    if (inliers != NULL)
+    {
+        inliers->clear();
+        inliers->reserve(num_inliers);
+
+        Point3f p, p_transf;
+        Point2f p_proj;
+        const float* rot = rot_mat.ptr<float>();
+        const float* transl = tvec.ptr<float>();
+
+        for (int i = 0; i < num_points; ++i)
+        {
+            p = object.at<Point3f>(0, i);
+            p_transf.x = rot[0] * p.x + rot[1] * p.y + rot[2] * p.z + transl[0];
+            p_transf.y = rot[3] * p.x + rot[4] * p.y + rot[5] * p.z + transl[1];
+            p_transf.z = rot[6] * p.x + rot[7] * p.y + rot[8] * p.z + transl[2];
+            p_proj.x = p_transf.x / p_transf.z;
+            p_proj.y = p_transf.y / p_transf.z;
+            if (norm(p_proj - image_normalized.at<Point2f>(0, i)) < max_dist)
+                inliers->push_back(i);
+        }
+    }
+}
+
+#endif
+
+
diff --git a/modules/gpu/src/cuda/brute_force_matcher.cu b/modules/gpu/src/cuda/brute_force_matcher.cu
index fa06589f3a..27b8e530a4 100644
--- a/modules/gpu/src/cuda/brute_force_matcher.cu
+++ b/modules/gpu/src/cuda/brute_force_matcher.cu
@@ -646,9 +646,9 @@ namespace cv { namespace gpu { namespace bfmatcher
             matchCached_caller<16, 16, 64, true, Dist>(queryDescs, train, mask, trainIdx, imgIdx, distance, stream);
         else if (queryDescs.cols < 128)
             matchCached_caller<16, 16, 128, false, Dist>(queryDescs, train, mask, trainIdx, imgIdx, distance, stream);
-        else if (queryDescs.cols == 128)
+        else if (queryDescs.cols == 128 && cc_12)
             matchCached_caller<16, 16, 128, true, Dist>(queryDescs, train, mask, trainIdx, imgIdx, distance, stream);
-        else if (queryDescs.cols < 256)
+        else if (queryDescs.cols < 256 && cc_12)
             matchCached_caller<16, 16, 256, false, Dist>(queryDescs, train, mask, trainIdx, imgIdx, distance, stream);
         else if (queryDescs.cols == 256 && cc_12)
             matchCached_caller<16, 16, 256, true, Dist>(queryDescs, train, mask, trainIdx, imgIdx, distance, stream);
diff --git a/modules/gpu/src/cuda/mathfunc.cu b/modules/gpu/src/cuda/mathfunc.cu
index bed7a0421a..f68562587d 100644
--- a/modules/gpu/src/cuda/mathfunc.cu
+++ b/modules/gpu/src/cuda/mathfunc.cu
@@ -82,7 +82,9 @@ namespace cv { namespace gpu { namespace mathfunc
     {
         static __device__ __forceinline__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float scale)
         {
-            dst[y * dst_step + x] = scale * atan2f(y_data, x_data);
+            float angle = atan2f(y_data, x_data);
+            angle += (angle < 0) * 2.0 * CV_PI;
+            dst[y * dst_step + x] = scale * angle;
         }
     };
     template <typename Mag, typename Angle>
diff --git a/modules/gpu/src/element_operations.cpp b/modules/gpu/src/element_operations.cpp
index 7123a83ff7..3fdba4a48d 100644
--- a/modules/gpu/src/element_operations.cpp
+++ b/modules/gpu/src/element_operations.cpp
@@ -211,22 +211,42 @@ void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream&
 
 void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream)
 {
-    typedef void (*caller_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst, cudaStream_t stream);
-    static const caller_t callers[] = {0, NppArithmScalar<1, nppiMulC_32f_C1R>::calc, NppArithmScalar<2, nppiMulC_32fc_C1R>::calc};
+    CV_Assert(src.type() == CV_32FC1);
 
-    CV_Assert(src.type() == CV_32FC1 || src.type() == CV_32FC2);
+    dst.create(src.size(), src.type());
 
-    callers[src.channels()](src, sc, dst, StreamAccessor::getStream(stream));
+    NppiSize sz;
+    sz.width  = src.cols;
+    sz.height = src.rows;
+
+    cudaStream_t cudaStream = StreamAccessor::getStream(stream);
+
+    NppStreamHandler h(cudaStream);
+
+    nppSafeCall( nppiMulC_32f_C1R(src.ptr<Npp32f>(), src.step, (Npp32f)sc[0], dst.ptr<Npp32f>(), dst.step, sz) );
+
+    if (cudaStream == 0)
+        cudaSafeCall( cudaDeviceSynchronize() );
 }
 
 void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream)
 {
-    typedef void (*caller_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst, cudaStream_t stream);
-    static const caller_t callers[] = {0, NppArithmScalar<1, nppiDivC_32f_C1R>::calc, NppArithmScalar<2, nppiDivC_32fc_C1R>::calc};
+    CV_Assert(src.type() == CV_32FC1);
 
-    CV_Assert(src.type() == CV_32FC1 || src.type() == CV_32FC2);
+    dst.create(src.size(), src.type());
 
-    callers[src.channels()](src, sc, dst, StreamAccessor::getStream(stream));
+    NppiSize sz;
+    sz.width  = src.cols;
+    sz.height = src.rows;
+
+    cudaStream_t cudaStream = StreamAccessor::getStream(stream);
+
+    NppStreamHandler h(cudaStream);
+
+    nppSafeCall( nppiDivC_32f_C1R(src.ptr<Npp32f>(), src.step, (Npp32f)sc[0], dst.ptr<Npp32f>(), dst.step, sz) );
+
+    if (cudaStream == 0)
+        cudaSafeCall( cudaDeviceSynchronize() );
 }
 
 
diff --git a/modules/gpu/test/nvidia/NCVAutoTestLister.hpp b/modules/gpu/test/nvidia/NCVAutoTestLister.hpp
index d8106efc74..bdaa8fac67 100644
--- a/modules/gpu/test/nvidia/NCVAutoTestLister.hpp
+++ b/modules/gpu/test/nvidia/NCVAutoTestLister.hpp
@@ -15,15 +15,22 @@
 
 #include "NCVTest.hpp"
 
+enum OutputLevel
+{
+    OutputLevelNone,
+    OutputLevelCompact,
+    OutputLevelFull
+};
+
 class NCVAutoTestLister
 {
 public:
 
-    NCVAutoTestLister(std::string testSuiteName, NcvBool bStopOnFirstFail=false, NcvBool bCompactOutput=true)
+    NCVAutoTestLister(std::string testSuiteName, OutputLevel outputLevel = OutputLevelCompact, NcvBool bStopOnFirstFail=false)
         :
     testSuiteName(testSuiteName),
-    bStopOnFirstFail(bStopOnFirstFail),
-    bCompactOutput(bCompactOutput)
+    outputLevel(outputLevel),
+    bStopOnFirstFail(bStopOnFirstFail)
     {
     }
 
@@ -38,7 +45,7 @@ public:
         Ncv32u nFailed = 0;
         Ncv32u nFailedMem = 0;
 
-        if (bCompactOutput)
+        if (outputLevel == OutputLevelCompact)
         {
             printf("Test suite '%s' with %d tests\n", 
                 testSuiteName.c_str(),
@@ -52,7 +59,7 @@ public:
             NCVTestReport curReport;
             bool res = curTest.executeTest(curReport);
 
-            if (!bCompactOutput)
+            if (outputLevel == OutputLevelFull)
             {
                 printf("Test %3i %16s; Consumed mem GPU = %8d, CPU = %8d; %s\n",
                     i,
@@ -65,7 +72,7 @@ public:
             if (res)
             {
                 nPassed++;
-                if (bCompactOutput)
+                if (outputLevel == OutputLevelCompact)
                 {
                     printf(".");
                 }
@@ -75,7 +82,7 @@ public:
                 if (!curReport.statsText["rcode"].compare("FAILED"))
                 {
                     nFailed++;
-                    if (bCompactOutput)
+                    if (outputLevel == OutputLevelCompact)
                     {
                         printf("x");
                     }
@@ -87,7 +94,7 @@ public:
                 else
                 {
                     nFailedMem++;
-                    if (bCompactOutput)
+                    if (outputLevel == OutputLevelCompact)
                     {
                         printf("m");
                     }
@@ -95,17 +102,20 @@ public:
             }
             fflush(stdout);
         }
-        if (bCompactOutput)
+        if (outputLevel == OutputLevelCompact)
         {
             printf("\n");
         }
 
-        printf("Test suite '%s' complete: %d total, %d passed, %d memory errors, %d failed\n\n", 
-            testSuiteName.c_str(),
-            (int)(this->tests.size()),
-            nPassed,
-            nFailedMem,
-            nFailed);
+        if (outputLevel != OutputLevelNone)
+        {
+            printf("Test suite '%s' complete: %d total, %d passed, %d memory errors, %d failed\n\n", 
+                testSuiteName.c_str(),
+                (int)(this->tests.size()),
+                nPassed,
+                nFailedMem,
+                nFailed);
+        }
 
         bool passed = nFailed == 0 && nFailedMem == 0;
         return passed;
@@ -121,9 +131,9 @@ public:
 
 private:
 
-    NcvBool bStopOnFirstFail;
-    NcvBool bCompactOutput;
     std::string testSuiteName;
+    OutputLevel outputLevel;
+    NcvBool bStopOnFirstFail;
     std::vector<INCVTest *> tests;
 };
 
diff --git a/modules/gpu/test/nvidia/main_nvidia.cpp b/modules/gpu/test/nvidia/main_nvidia.cpp
index 31f0cb1121..994737eee4 100644
--- a/modules/gpu/test/nvidia/main_nvidia.cpp
+++ b/modules/gpu/test/nvidia/main_nvidia.cpp
@@ -288,70 +288,162 @@ static void devNullOutput(const char *msg)
 
 }
 
+bool nvidia_NPPST_Integral_Image(const std::string& test_data_path, OutputLevel outputLevel)
+{
+	path = test_data_path;
+    ncvSetDebugOutputHandler(devNullOutput);
 
-bool main_nvidia(const std::string& test_data_path)
+    NCVAutoTestLister testListerII("NPPST Integral Image", outputLevel);
+    
+    NCVTestSourceProvider<Ncv8u> testSrcRandom_8u(2010, 0, 255, 4096, 4096);
+    NCVTestSourceProvider<Ncv32f> testSrcRandom_32f(2010, -1.0f, 1.0f, 4096, 4096);
+
+    generateIntegralTests<Ncv8u, Ncv32u>(testListerII, testSrcRandom_8u, 4096, 4096);
+    generateIntegralTests<Ncv32f, Ncv32f>(testListerII, testSrcRandom_32f, 4096, 4096);
+
+    return testListerII.invoke();
+}
+
+bool nvidia_NPPST_Squared_Integral_Image(const std::string& test_data_path, OutputLevel outputLevel)
 {
 	path = test_data_path;
+    ncvSetDebugOutputHandler(devNullOutput);
 
-    printf("Testing NVIDIA Computer Vision SDK\n");
-    printf("==================================\n");
+    NCVAutoTestLister testListerSII("NPPST Squared Integral Image", outputLevel);
 
+    NCVTestSourceProvider<Ncv8u> testSrcRandom_8u(2010, 0, 255, 4096, 4096);
+
+    generateSquaredIntegralTests(testListerSII, testSrcRandom_8u, 4096, 4096);
+
+    return testListerSII.invoke();
+}
+
+bool nvidia_NPPST_RectStdDev(const std::string& test_data_path, OutputLevel outputLevel)
+{
+	path = test_data_path;
     ncvSetDebugOutputHandler(devNullOutput);
+    
+    NCVAutoTestLister testListerRStdDev("NPPST RectStdDev", outputLevel);
+    
+    NCVTestSourceProvider<Ncv8u> testSrcRandom_8u(2010, 0, 255, 4096, 4096);
 
-    NCVAutoTestLister testListerII("NPPST Integral Image"                   );//,,true, false);
-    NCVAutoTestLister testListerSII("NPPST Squared Integral Image"          );//,,true, false);
-    NCVAutoTestLister testListerRStdDev("NPPST RectStdDev"                  );//,,true, false);
-    NCVAutoTestLister testListerResize("NPPST Resize"                       );//,,true, false);
-    NCVAutoTestLister testListerNPPSTVectorOperations("NPPST Vector Operations"  );//,,true, false);
-    NCVAutoTestLister testListerTranspose("NPPST Transpose"                 );//,,true, false);
+    generateRectStdDevTests(testListerRStdDev, testSrcRandom_8u, 4096, 4096);
 
-    NCVAutoTestLister testListerVectorOperations("Vector Operations"        );//,,true, false);
-    NCVAutoTestLister testListerHaarLoader("Haar Cascade Loader"            );//,,true, false);
-    NCVAutoTestLister testListerHaarAppl("Haar Cascade Application"         );//,,true, false);
-    NCVAutoTestLister testListerHypFiltration("Hypotheses Filtration"       );//,,true, false);
-    NCVAutoTestLister testListerVisualize("Visualization"                   );//,,true, false);
+    return testListerRStdDev.invoke();
+}
+
+bool nvidia_NPPST_Resize(const std::string& test_data_path, OutputLevel outputLevel)
+{
+	path = test_data_path;
+    ncvSetDebugOutputHandler(devNullOutput);
+
+    NCVAutoTestLister testListerResize("NPPST Resize", outputLevel);
 
-    printf("Initializing data source providers\n");
     NCVTestSourceProvider<Ncv32u> testSrcRandom_32u(2010, 0, 0xFFFFFFFF, 4096, 4096);
-    NCVTestSourceProvider<Ncv8u> testSrcRandom_8u(2010, 0, 255, 4096, 4096);
     NCVTestSourceProvider<Ncv64u> testSrcRandom_64u(2010, 0, -1, 4096, 4096);
-    NCVTestSourceProvider<Ncv8u> testSrcFacesVGA_8u(path + "group_1_640x480_VGA.pgm");
-    NCVTestSourceProvider<Ncv32f> testSrcRandom_32f(2010, -1.0f, 1.0f, 4096, 4096);
 
-    printf("Generating NPPST test suites\n");
-    generateIntegralTests<Ncv8u, Ncv32u>(testListerII, testSrcRandom_8u, 4096, 4096);
-    generateIntegralTests<Ncv32f, Ncv32f>(testListerII, testSrcRandom_32f, 4096, 4096);
-    generateSquaredIntegralTests(testListerSII, testSrcRandom_8u, 4096, 4096);
-    generateRectStdDevTests(testListerRStdDev, testSrcRandom_8u, 4096, 4096);
     generateResizeTests(testListerResize, testSrcRandom_32u);
     generateResizeTests(testListerResize, testSrcRandom_64u);
+
+    return testListerResize.invoke();
+}
+
+bool nvidia_NPPST_Vector_Operations(const std::string& test_data_path, OutputLevel outputLevel)
+{
+	path = test_data_path;
+    ncvSetDebugOutputHandler(devNullOutput);
+    
+    NCVAutoTestLister testListerNPPSTVectorOperations("NPPST Vector Operations", outputLevel);
+    
+    NCVTestSourceProvider<Ncv32u> testSrcRandom_32u(2010, 0, 0xFFFFFFFF, 4096, 4096);
+
     generateNPPSTVectorTests(testListerNPPSTVectorOperations, testSrcRandom_32u, 4096*4096);
+
+    return testListerNPPSTVectorOperations.invoke();
+}
+
+bool nvidia_NPPST_Transpose(const std::string& test_data_path, OutputLevel outputLevel)
+{
+	path = test_data_path;
+    ncvSetDebugOutputHandler(devNullOutput);
+
+    NCVAutoTestLister testListerTranspose("NPPST Transpose", outputLevel);
+    
+    NCVTestSourceProvider<Ncv32u> testSrcRandom_32u(2010, 0, 0xFFFFFFFF, 4096, 4096);
+    NCVTestSourceProvider<Ncv64u> testSrcRandom_64u(2010, 0, -1, 4096, 4096);
+
     generateTransposeTests(testListerTranspose, testSrcRandom_32u);
     generateTransposeTests(testListerTranspose, testSrcRandom_64u);
 
-    printf("Generating NCV test suites\n");
-    generateDrawRectsTests(testListerVisualize, testSrcRandom_8u, testSrcRandom_32u, 4096, 4096);
-    generateDrawRectsTests(testListerVisualize, testSrcRandom_32u, testSrcRandom_32u, 4096, 4096);
+    return testListerTranspose.invoke();
+}
+
+bool nvidia_NCV_Vector_Operations(const std::string& test_data_path, OutputLevel outputLevel)
+{
+	path = test_data_path;
+    ncvSetDebugOutputHandler(devNullOutput);
+    
+    NCVAutoTestLister testListerVectorOperations("Vector Operations", outputLevel);
+
+    NCVTestSourceProvider<Ncv32u> testSrcRandom_32u(2010, 0, 0xFFFFFFFF, 4096, 4096);
+    
     generateVectorTests(testListerVectorOperations, testSrcRandom_32u, 4096*4096);
-    generateHypothesesFiltrationTests(testListerHypFiltration, testSrcRandom_32u, 1024);
+
+    return testListerVectorOperations.invoke();
+}
+
+bool nvidia_NCV_Haar_Cascade_Loader(const std::string& test_data_path, OutputLevel outputLevel)
+{
+	path = test_data_path;
+    ncvSetDebugOutputHandler(devNullOutput);
+
+    NCVAutoTestLister testListerHaarLoader("Haar Cascade Loader", outputLevel);
+    
     generateHaarLoaderTests(testListerHaarLoader);
+
+    return testListerHaarLoader.invoke();
+}
+
+bool nvidia_NCV_Haar_Cascade_Application(const std::string& test_data_path, OutputLevel outputLevel)
+{
+	path = test_data_path;
+    ncvSetDebugOutputHandler(devNullOutput);
+    
+    NCVAutoTestLister testListerHaarAppl("Haar Cascade Application", outputLevel);
+    
+    NCVTestSourceProvider<Ncv8u> testSrcFacesVGA_8u(path + "group_1_640x480_VGA.pgm");
+    
     generateHaarApplicationTests(testListerHaarAppl, testSrcFacesVGA_8u, 1280, 720);
 
-    // Indicate if at least one test failed
-    bool passed = true;
-
-    // Invoke all tests
-    passed &= testListerII.invoke();
-    passed &= testListerSII.invoke();
-    passed &= testListerRStdDev.invoke();
-    passed &= testListerResize.invoke();
-    passed &= testListerNPPSTVectorOperations.invoke();
-    passed &= testListerTranspose.invoke();
-    passed &= testListerVisualize.invoke();
-    passed &= testListerVectorOperations.invoke();
-    passed &= testListerHypFiltration.invoke();
-    passed &= testListerHaarLoader.invoke();
-    passed &= testListerHaarAppl.invoke();
-
-    return passed;
+    return testListerHaarAppl.invoke();
+}
+
+bool nvidia_NCV_Hypotheses_Filtration(const std::string& test_data_path, OutputLevel outputLevel)
+{
+	path = test_data_path;
+    ncvSetDebugOutputHandler(devNullOutput);
+    
+    NCVAutoTestLister testListerHypFiltration("Hypotheses Filtration", outputLevel);
+    
+    NCVTestSourceProvider<Ncv32u> testSrcRandom_32u(2010, 0, 0xFFFFFFFF, 4096, 4096);
+
+    generateHypothesesFiltrationTests(testListerHypFiltration, testSrcRandom_32u, 1024);
+
+    return testListerHypFiltration.invoke();
+}
+
+bool nvidia_NCV_Visualization(const std::string& test_data_path, OutputLevel outputLevel)
+{
+	path = test_data_path;
+    ncvSetDebugOutputHandler(devNullOutput);
+
+    NCVAutoTestLister testListerVisualize("Visualization", outputLevel);
+
+    NCVTestSourceProvider<Ncv8u> testSrcRandom_8u(2010, 0, 255, 4096, 4096);
+    NCVTestSourceProvider<Ncv32u> testSrcRandom_32u(2010, 0, 0xFFFFFFFF, 4096, 4096);
+    
+    generateDrawRectsTests(testListerVisualize, testSrcRandom_8u, testSrcRandom_32u, 4096, 4096);
+    generateDrawRectsTests(testListerVisualize, testSrcRandom_32u, testSrcRandom_32u, 4096, 4096);
+
+    return testListerVisualize.invoke();
 }
diff --git a/modules/gpu/test/test_arithm.cpp b/modules/gpu/test/test_arithm.cpp
index 7e72b24afe..8ca0ceda2f 100644
--- a/modules/gpu/test/test_arithm.cpp
+++ b/modules/gpu/test/test_arithm.cpp
@@ -39,1026 +39,1609 @@
 //
 //M*/
 
-#include <iostream>
-#include <cmath>
-#include <limits>
 #include "test_precomp.hpp"
 
-using namespace cv;
-using namespace std;
-using namespace gpu;
+#ifdef HAVE_CUDA
 
-#define CHECK(pred, err) if (!(pred)) { \
-    ts->printf(cvtest::TS::CONSOLE, "Fail: \"%s\" at line: %d\n", #pred, __LINE__); \
-    ts->set_failed_test_info(err); \
-    return; }
-
-class CV_GpuArithmTest : public cvtest::BaseTest
+struct ArithmTest : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
 {
-public:
-    CV_GpuArithmTest(const char* /*test_name*/, const char* /*test_funcs*/){}
-    virtual ~CV_GpuArithmTest() {}
+    cv::gpu::DeviceInfo devInfo;
+    int type;
 
-protected:
-    void run(int);
+    cv::Size size;
+    cv::Mat mat1, mat2;
+        
+    virtual void SetUp()
+    {
+        devInfo = std::tr1::get<0>(GetParam());
+        type = std::tr1::get<1>(GetParam());
 
-    int test(int type);
+        cv::gpu::setDevice(devInfo.deviceID());
 
-    virtual int test(const Mat& mat1, const Mat& mat2) = 0;
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
 
-    int CheckNorm(const Mat& m1, const Mat& m2, double eps = 1e-5);
-    int CheckNorm(const Scalar& s1, const Scalar& s2, double eps = 1e-5);
-    int CheckNorm(double d1, double d2, double eps = 1e-5);
+        size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
+        
+        mat1 = cvtest::randomMat(rng, size, type, 1, 16, false);
+        mat2 = cvtest::randomMat(rng, size, type, 1, 16, false);
+    }
 };
 
-int CV_GpuArithmTest::test(int type)
+////////////////////////////////////////////////////////////////////////////////
+// add
+
+struct AddArray : ArithmTest {};
+
+TEST_P(AddArray, Accuracy) 
 {
-    cv::Size sz(200, 200);
-    cv::Mat mat1(sz, type), mat2(sz, type);
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+    PRINT_PARAM(size);
     
-    cv::RNG& rng = ts->get_rng();
+    cv::Mat dst_gold;
+    cv::add(mat1, mat2, dst_gold);
 
-    if (type != CV_32FC1)
-    {
-        rng.fill(mat1, cv::RNG::UNIFORM, cv::Scalar::all(1), cv::Scalar::all(20));
-        rng.fill(mat2, cv::RNG::UNIFORM, cv::Scalar::all(1), cv::Scalar::all(20));
-    }
-    else
-    {
-        rng.fill(mat1, cv::RNG::UNIFORM, cv::Scalar::all(0.1), cv::Scalar::all(1.0));
-        rng.fill(mat2, cv::RNG::UNIFORM, cv::Scalar::all(0.1), cv::Scalar::all(1.0));
-    }
+    cv::Mat dst;
 
-    return test(mat1, mat2);
-}
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat gpuRes;
 
-int CV_GpuArithmTest::CheckNorm(const Mat& m1, const Mat& m2, double eps)
-{
-    double ret = norm(m1, m2, NORM_INF);
+        cv::gpu::add(cv::gpu::GpuMat(mat1), cv::gpu::GpuMat(mat2), gpuRes);
 
-    if (ret < eps)
-        return cvtest::TS::OK;
+        gpuRes.download(dst);
+    );
 
-    ts->printf(cvtest::TS::LOG, "\nNorm: %f\n", ret);
-    return cvtest::TS::FAIL_GENERIC;
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
 }
 
-int CV_GpuArithmTest::CheckNorm(const Scalar& s1, const Scalar& s2, double eps)
-{
-    int ret0 = CheckNorm(s1[0], s2[0], eps), 
-        ret1 = CheckNorm(s1[1], s2[1], eps), 
-        ret2 = CheckNorm(s1[2], s2[2], eps), 
-        ret3 = CheckNorm(s1[3], s2[3], eps);
+INSTANTIATE_TEST_CASE_P(Arithm, AddArray, testing::Combine(
+                        testing::ValuesIn(devices()),
+                        testing::Values(CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1)));
 
-    return (ret0 == cvtest::TS::OK && ret1 == cvtest::TS::OK && ret2 == cvtest::TS::OK && ret3 == cvtest::TS::OK) ? cvtest::TS::OK : cvtest::TS::FAIL_GENERIC;
-}
+struct AddScalar : ArithmTest {};
 
-int CV_GpuArithmTest::CheckNorm(double d1, double d2, double eps)
+TEST_P(AddScalar, Accuracy) 
 {
-    double ret = ::fabs(d1 - d2);
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+    PRINT_PARAM(size);
 
-    if (ret < eps)
-        return cvtest::TS::OK;
+    cv::RNG& rng = cvtest::TS::ptr()->get_rng();
 
-    ts->printf(cvtest::TS::LOG, "\nNorm: %f\n", ret);
-    return cvtest::TS::FAIL_GENERIC;
-}
+    cv::Scalar val(rng.uniform(0.1, 3.0), rng.uniform(0.1, 3.0));
 
-void CV_GpuArithmTest::run( int )
-{
-    int testResult = cvtest::TS::OK;
+    PRINT_PARAM(val);
+    
+    cv::Mat dst_gold;
+    cv::add(mat1, val, dst_gold);
 
-    const int types[] = {CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1};
-    const char* type_names[] = {"CV_8UC1 ", "CV_8UC3 ", "CV_8UC4 ", "CV_32FC1"};
-    const int type_count = sizeof(types)/sizeof(types[0]);
+    cv::Mat dst;
 
-    //run tests
-    for (int t = 0; t < type_count; ++t)
-    {
-        ts->printf(cvtest::TS::LOG, "Start testing %s", type_names[t]);
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat gpuRes;
 
-        if (cvtest::TS::OK == test(types[t]))
-            ts->printf(cvtest::TS::LOG, "SUCCESS\n");
-        else
-        {
-            ts->printf(cvtest::TS::LOG, "FAIL\n");
-            testResult = cvtest::TS::FAIL_MISMATCH;
-        }
-    }
+        cv::gpu::add(cv::gpu::GpuMat(mat1), val, gpuRes);
+
+        gpuRes.download(dst);
+    );
 
-    ts->set_failed_test_info(testResult);
+    EXPECT_MAT_NEAR(dst_gold, dst, 1e-5);
 }
 
+INSTANTIATE_TEST_CASE_P(Arithm, AddScalar, testing::Combine(
+                        testing::ValuesIn(devices()),
+                        testing::Values(CV_32FC1, CV_32FC2)));
+
 ////////////////////////////////////////////////////////////////////////////////
-// Add
+// subtract
+
+struct SubtractArray : ArithmTest {};
 
-struct CV_GpuNppImageAddTest : public CV_GpuArithmTest
+TEST_P(SubtractArray, Accuracy) 
 {
-    CV_GpuNppImageAddTest() : CV_GpuArithmTest( "GPU-NppImageAdd", "add" ) {}
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+    PRINT_PARAM(size);
+    
+    cv::Mat dst_gold;
+    cv::subtract(mat1, mat2, dst_gold);
 
-        virtual int test(const Mat& mat1, const Mat& mat2)
-    {
-        if (mat1.type() != CV_8UC1 && mat1.type() != CV_8UC4 && mat1.type() != CV_32FC1)
-        {
-            ts->printf(cvtest::TS::LOG, "\tUnsupported type\t");
-            return cvtest::TS::OK;
-        }
+    cv::Mat dst;
 
-        cv::Mat cpuRes;
-        cv::add(mat1, mat2, cpuRes);
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat gpuRes;
 
-        GpuMat gpu1(mat1);
-        GpuMat gpu2(mat2);
-        GpuMat gpuRes;
-        cv::gpu::add(gpu1, gpu2, gpuRes);
+        cv::gpu::subtract(cv::gpu::GpuMat(mat1), cv::gpu::GpuMat(mat2), gpuRes);
 
-        return CheckNorm(cpuRes, gpuRes);
-    }
-};
+        gpuRes.download(dst);
+    );
 
-////////////////////////////////////////////////////////////////////////////////
-// Sub
-struct CV_GpuNppImageSubtractTest : public CV_GpuArithmTest
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+}
+
+INSTANTIATE_TEST_CASE_P(Arithm, SubtractArray, testing::Combine(
+                        testing::ValuesIn(devices()),
+                        testing::Values(CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1)));
+
+struct SubtractScalar : ArithmTest {};
+
+TEST_P(SubtractScalar, Accuracy) 
 {
-    CV_GpuNppImageSubtractTest() : CV_GpuArithmTest( "GPU-NppImageSubtract", "subtract" ) {}
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+    PRINT_PARAM(size);
 
-    int test( const Mat& mat1, const Mat& mat2 )
-    {
-        if (mat1.type() != CV_8UC1 && mat1.type() != CV_8UC4 && mat1.type() != CV_32FC1)
-        {
-            ts->printf(cvtest::TS::LOG, "\tUnsupported type\t");
-            return cvtest::TS::OK;
-        }
+    cv::RNG& rng = cvtest::TS::ptr()->get_rng();
 
-        cv::Mat cpuRes;
-        cv::subtract(mat1, mat2, cpuRes);
+    cv::Scalar val(rng.uniform(0.1, 3.0), rng.uniform(0.1, 3.0));
 
-        GpuMat gpu1(mat1);
-        GpuMat gpu2(mat2);
-        GpuMat gpuRes;
-        cv::gpu::subtract(gpu1, gpu2, gpuRes);
+    PRINT_PARAM(val);
+    
+    cv::Mat dst_gold;
+    cv::subtract(mat1, val, dst_gold);
 
-        return CheckNorm(cpuRes, gpuRes);
-    }
-};
+    cv::Mat dst;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat gpuRes;
+
+        cv::gpu::subtract(cv::gpu::GpuMat(mat1), val, gpuRes);
+
+        gpuRes.download(dst);
+    );
+
+    ASSERT_LE(checkNorm(dst_gold, dst), 1e-5);
+}
+
+INSTANTIATE_TEST_CASE_P(Arithm, SubtractScalar, testing::Combine(
+                        testing::ValuesIn(devices()),
+                        testing::Values(CV_32FC1, CV_32FC2)));
 
 ////////////////////////////////////////////////////////////////////////////////
 // multiply
-struct CV_GpuNppImageMultiplyTest : public CV_GpuArithmTest
+
+struct MultiplyArray : ArithmTest {};
+
+TEST_P(MultiplyArray, Accuracy) 
 {
-    CV_GpuNppImageMultiplyTest() : CV_GpuArithmTest( "GPU-NppImageMultiply", "multiply" ) {}
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+    PRINT_PARAM(size);
+    
+    cv::Mat dst_gold;
+    cv::multiply(mat1, mat2, dst_gold);
 
-    int test( const Mat& mat1, const Mat& mat2 )
-    {
-        if (mat1.type() != CV_8UC1 && mat1.type() != CV_8UC4 && mat1.type() != CV_32FC1)
-        {
-            ts->printf(cvtest::TS::LOG, "\tUnsupported type\t");
-            return cvtest::TS::OK;
-        }
+    cv::Mat dst;
 
-	    cv::Mat cpuRes;
-	    cv::multiply(mat1, mat2, cpuRes);
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat gpuRes;
 
-	    GpuMat gpu1(mat1);
-	    GpuMat gpu2(mat2);
-	    GpuMat gpuRes;
-	    cv::gpu::multiply(gpu1, gpu2, gpuRes);
+        cv::gpu::multiply(cv::gpu::GpuMat(mat1), cv::gpu::GpuMat(mat2), gpuRes);
 
-            return CheckNorm(cpuRes, gpuRes);
-    }
-};
+        gpuRes.download(dst);
+    );
 
-////////////////////////////////////////////////////////////////////////////////
-// divide
-struct CV_GpuNppImageDivideTest : public CV_GpuArithmTest
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+}
+
+INSTANTIATE_TEST_CASE_P(Arithm, MultiplyArray, testing::Combine(
+                        testing::ValuesIn(devices()),
+                        testing::Values(CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1)));
+
+struct MultiplyScalar : ArithmTest {};
+
+TEST_P(MultiplyScalar, Accuracy) 
 {
-    CV_GpuNppImageDivideTest() : CV_GpuArithmTest( "GPU-NppImageDivide", "divide" ) {}
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+    PRINT_PARAM(size);
 
-    int test( const Mat& mat1, const Mat& mat2 )
-    {
-        if (mat1.type() != CV_8UC1 && mat1.type() != CV_8UC4 && mat1.type() != CV_32FC1)
-        {
-            ts->printf(cvtest::TS::LOG, "\tUnsupported type\t");
-            return cvtest::TS::OK;
-        }
+    cv::RNG& rng = cvtest::TS::ptr()->get_rng();
 
-	    cv::Mat cpuRes;
-	    cv::divide(mat1, mat2, cpuRes);
+    cv::Scalar val(rng.uniform(0.1, 3.0), rng.uniform(0.1, 3.0));
 
-	    GpuMat gpu1(mat1);
-	    GpuMat gpu2(mat2);
-	    GpuMat gpuRes;
-	    cv::gpu::divide(gpu1, gpu2, gpuRes);
+    PRINT_PARAM(val);
+    
+    cv::Mat dst_gold;
+    cv::multiply(mat1, val, dst_gold);
 
-        return CheckNorm(cpuRes, gpuRes, 1.01f);
-    }
-};
+    cv::Mat dst;
 
-////////////////////////////////////////////////////////////////////////////////
-// transpose
-struct CV_GpuNppImageTransposeTest : public CV_GpuArithmTest
-{
-    CV_GpuNppImageTransposeTest() : CV_GpuArithmTest( "GPU-NppImageTranspose", "transpose" ) {}
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat gpuRes;
 
-    int test( const Mat& mat1, const Mat& )
-    {
-        if (mat1.type() != CV_8UC1 && mat1.type() != CV_8UC4 && mat1.type() != CV_32FC1)
-        {
-            ts->printf(cvtest::TS::LOG, "\tUnsupported type\t");
-            return cvtest::TS::OK;
-        }
+        cv::gpu::multiply(cv::gpu::GpuMat(mat1), val, gpuRes);
 
-        cv::Mat cpuRes;
-        cv::transpose(mat1, cpuRes);
+        gpuRes.download(dst);
+    );
 
-        GpuMat gpu1(mat1);
-        GpuMat gpuRes;
-        cv::gpu::transpose(gpu1, gpuRes);
+    EXPECT_MAT_NEAR(dst_gold, dst, 1e-5);
+}
 
-        return CheckNorm(cpuRes, gpuRes);
-    }
-};
+INSTANTIATE_TEST_CASE_P(Arithm, MultiplyScalar, testing::Combine(
+                        testing::ValuesIn(devices()),
+                        testing::Values(CV_32FC1)));
 
 ////////////////////////////////////////////////////////////////////////////////
-// absdiff
-struct CV_GpuNppImageAbsdiffTest : public CV_GpuArithmTest
+// divide
+
+struct DivideArray : ArithmTest {};
+
+TEST_P(DivideArray, Accuracy) 
 {
-    CV_GpuNppImageAbsdiffTest() : CV_GpuArithmTest( "GPU-NppImageAbsdiff", "absdiff" ) {}
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+    PRINT_PARAM(size);
+    
+    cv::Mat dst_gold;
+    cv::divide(mat1, mat2, dst_gold);
 
-    int test( const Mat& mat1, const Mat& mat2 )
-    {
-        if (mat1.type() != CV_8UC1 && mat1.type() != CV_8UC4 && mat1.type() != CV_32FC1)
-        {
-            ts->printf(cvtest::TS::LOG, "\tUnsupported type\t");
-            return cvtest::TS::OK;
-        }
+    cv::Mat dst;
 
-        cv::Mat cpuRes;
-        cv::absdiff(mat1, mat2, cpuRes);
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat gpuRes;
 
-        GpuMat gpu1(mat1);
-        GpuMat gpu2(mat2);
-        GpuMat gpuRes;
-        cv::gpu::absdiff(gpu1, gpu2, gpuRes);
+        cv::gpu::divide(cv::gpu::GpuMat(mat1), cv::gpu::GpuMat(mat2), gpuRes);
 
-        return CheckNorm(cpuRes, gpuRes);
-    }
-};
+        gpuRes.download(dst);
+    );
 
-////////////////////////////////////////////////////////////////////////////////
-// compare
-struct CV_GpuNppImageCompareTest : public CV_GpuArithmTest
+    EXPECT_MAT_NEAR(dst_gold, dst, 1.0);
+}
+
+INSTANTIATE_TEST_CASE_P(Arithm, DivideArray, testing::Combine(
+                        testing::ValuesIn(devices()),
+                        testing::Values(CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1)));
+
+struct DivideScalar : ArithmTest {};
+
+TEST_P(DivideScalar, Accuracy) 
 {
-    CV_GpuNppImageCompareTest() : CV_GpuArithmTest( "GPU-NppImageCompare", "compare" ) {}
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+    PRINT_PARAM(size);
 
-    int test( const Mat& mat1, const Mat& mat2 )
-    {
-        if (mat1.type() != CV_32FC1)
-        {
-            ts->printf(cvtest::TS::LOG, "\tUnsupported type\t");
-            return cvtest::TS::OK;
-        }
+    cv::RNG& rng = cvtest::TS::ptr()->get_rng();
 
-        int cmp_codes[] = {CMP_EQ, CMP_GT, CMP_GE, CMP_LT, CMP_LE, CMP_NE};
-        const char* cmp_str[] = {"CMP_EQ", "CMP_GT", "CMP_GE", "CMP_LT", "CMP_LE", "CMP_NE"};
-        int cmp_num = sizeof(cmp_codes) / sizeof(int);
+    cv::Scalar val(rng.uniform(0.1, 3.0), rng.uniform(0.1, 3.0));
 
-        int test_res = cvtest::TS::OK;
+    PRINT_PARAM(val);
+    
+    cv::Mat dst_gold;
+    cv::divide(mat1, val, dst_gold);
 
-        for (int i = 0; i < cmp_num; ++i)
-        {
-            ts->printf(cvtest::TS::LOG, "\nCompare operation: %s\n", cmp_str[i]);
+    cv::Mat dst;
 
-            cv::Mat cpuRes;
-            cv::compare(mat1, mat2, cpuRes, cmp_codes[i]);
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat gpuRes;
 
-            GpuMat gpu1(mat1);
-            GpuMat gpu2(mat2);
-            GpuMat gpuRes;
-            cv::gpu::compare(gpu1, gpu2, gpuRes, cmp_codes[i]);
+        cv::gpu::divide(cv::gpu::GpuMat(mat1), val, gpuRes);
 
-            if (CheckNorm(cpuRes, gpuRes) != cvtest::TS::OK)
-                test_res = cvtest::TS::FAIL_GENERIC;
-        }
+        gpuRes.download(dst);
+    );
 
-        return test_res;
-    }
-};
+    EXPECT_MAT_NEAR(dst_gold, dst, 1e-5);
+}
+
+INSTANTIATE_TEST_CASE_P(Arithm, DivideScalar, testing::Combine(
+                        testing::ValuesIn(devices()),
+                        testing::Values(CV_32FC1)));
 
 ////////////////////////////////////////////////////////////////////////////////
-// meanStdDev
-struct CV_GpuNppImageMeanStdDevTest : public CV_GpuArithmTest
-{
-    CV_GpuNppImageMeanStdDevTest() : CV_GpuArithmTest( "GPU-NppImageMeanStdDev", "meanStdDev" ) {}
+// transpose
 
-    int test( const Mat& mat1, const Mat& )
-    {
-        if (mat1.type() != CV_8UC1)
-        {
-            ts->printf(cvtest::TS::LOG, "\tUnsupported type\t");
-            return cvtest::TS::OK;
-        }
+struct Transpose : ArithmTest {};
 
-        Scalar cpumean;
-        Scalar cpustddev;
-        cv::meanStdDev(mat1, cpumean, cpustddev);
+TEST_P(Transpose, Accuracy) 
+{
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+    PRINT_PARAM(size);
 
-        GpuMat gpu1(mat1);
-        Scalar gpumean;
-        Scalar gpustddev;
-        cv::gpu::meanStdDev(gpu1, gpumean, gpustddev);
+    cv::Mat dst_gold;
+    cv::transpose(mat1, dst_gold);
 
-        int test_res = cvtest::TS::OK;
+    cv::Mat dst;
+    
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat gpuRes;
 
-        if (CheckNorm(cpumean, gpumean) != cvtest::TS::OK)
-        {
-            ts->printf(cvtest::TS::LOG, "\nMean FAILED\n");
-            test_res = cvtest::TS::FAIL_GENERIC;
-        }
+        cv::gpu::transpose(cv::gpu::GpuMat(mat1), gpuRes);
 
-        if (CheckNorm(cpustddev, gpustddev) != cvtest::TS::OK)
-        {
-            ts->printf(cvtest::TS::LOG, "\nStdDev FAILED\n");
-            test_res = cvtest::TS::FAIL_GENERIC;
-        }
+        gpuRes.download(dst);
+    );
 
-        return test_res;
-    }
-};
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+}
+
+INSTANTIATE_TEST_CASE_P(Arithm, Transpose, testing::Combine(
+                        testing::ValuesIn(devices()),
+                        testing::Values(CV_8UC1, CV_8UC4, CV_8SC1, CV_8SC4, CV_16UC2, CV_16SC2, CV_32SC1, CV_32SC2, CV_32FC1, CV_32FC2, CV_64FC1)));
 
 ////////////////////////////////////////////////////////////////////////////////
-// norm
-struct CV_GpuNppImageNormTest : public CV_GpuArithmTest
-{
-    CV_GpuNppImageNormTest() : CV_GpuArithmTest( "GPU-NppImageNorm", "norm" ) {}
+// absdiff
 
-    int test( const Mat& mat1, const Mat& mat2 )
-    {
-        if (mat1.type() != CV_8UC1)
-        {
-            ts->printf(cvtest::TS::LOG, "\tUnsupported type\t");
-            return cvtest::TS::OK;
-        }
+struct AbsdiffArray : ArithmTest {};
 
-        int norms[] = {NORM_INF, NORM_L1, NORM_L2};
-        const char* norms_str[] = {"NORM_INF", "NORM_L1", "NORM_L2"};
-        int norms_num = sizeof(norms) / sizeof(int);
+TEST_P(AbsdiffArray, Accuracy) 
+{
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+    PRINT_PARAM(size);
+    
+    cv::Mat dst_gold;
+    cv::absdiff(mat1, mat2, dst_gold);
 
-        int test_res = cvtest::TS::OK;
+    cv::Mat dst;
 
-        for (int i = 0; i < norms_num; ++i)
-        {
-            ts->printf(cvtest::TS::LOG, "\nNorm type: %s\n", norms_str[i]);
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat gpuRes;
 
-            double cpu_norm = cv::norm(mat1, mat2, norms[i]);
+        cv::gpu::absdiff(cv::gpu::GpuMat(mat1), cv::gpu::GpuMat(mat2), gpuRes);
 
-            GpuMat gpu1(mat1);
-            GpuMat gpu2(mat2);
-            double gpu_norm = cv::gpu::norm(gpu1, gpu2, norms[i]);
+        gpuRes.download(dst);
+    );
 
-            if (CheckNorm(cpu_norm, gpu_norm) != cvtest::TS::OK)
-                test_res = cvtest::TS::FAIL_GENERIC;
-        }
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+}
 
-        return test_res;
-    }
-};
+INSTANTIATE_TEST_CASE_P(Arithm, AbsdiffArray, testing::Combine(
+                        testing::ValuesIn(devices()),
+                        testing::Values(CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1)));
 
-////////////////////////////////////////////////////////////////////////////////
-// flip
-struct CV_GpuNppImageFlipTest : public CV_GpuArithmTest
+struct AbsdiffScalar : ArithmTest {};
+
+TEST_P(AbsdiffScalar, Accuracy) 
 {
-    CV_GpuNppImageFlipTest() : CV_GpuArithmTest( "GPU-NppImageFlip", "flip" ) {}
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+    PRINT_PARAM(size);
 
-    int test( const Mat& mat1, const Mat& )
-    {
-        if (mat1.type() != CV_8UC1 && mat1.type() != CV_8UC4)
-        {
-            ts->printf(cvtest::TS::LOG, "\tUnsupported type\t");
-            return cvtest::TS::OK;
-        }
+    cv::RNG& rng = cvtest::TS::ptr()->get_rng();
 
-        int flip_codes[] = {0, 1, -1};
-        const char* flip_axis[] = {"X", "Y", "Both"};
-        int flip_codes_num = sizeof(flip_codes) / sizeof(int);
+    cv::Scalar val(rng.uniform(0.1, 3.0), rng.uniform(0.1, 3.0));
 
-        int test_res = cvtest::TS::OK;
+    PRINT_PARAM(val);
+    
+    cv::Mat dst_gold;
+    cv::absdiff(mat1, val, dst_gold);
 
-        for (int i = 0; i < flip_codes_num; ++i)
-        {
-            ts->printf(cvtest::TS::LOG, "\nFlip Axis: %s\n", flip_axis[i]);
+    cv::Mat dst;
 
-            Mat cpu_res;
-            cv::flip(mat1, cpu_res, flip_codes[i]);
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat gpuRes;
 
-            GpuMat gpu1(mat1);
-            GpuMat gpu_res;
-            cv::gpu::flip(gpu1, gpu_res, flip_codes[i]);
+        cv::gpu::absdiff(cv::gpu::GpuMat(mat1), val, gpuRes);
 
-            if (CheckNorm(cpu_res, gpu_res) != cvtest::TS::OK)
-                test_res = cvtest::TS::FAIL_GENERIC;
-        }
+        gpuRes.download(dst);
+    );
 
-        return test_res;
-    }
-};
+    EXPECT_MAT_NEAR(dst_gold, dst, 1e-5);
+}
+
+INSTANTIATE_TEST_CASE_P(Arithm, AbsdiffScalar, testing::Combine(
+                        testing::ValuesIn(devices()),
+                        testing::Values(CV_32FC1)));
 
 ////////////////////////////////////////////////////////////////////////////////
-// LUT
-struct CV_GpuNppImageLUTTest : public CV_GpuArithmTest
+// compare
+
+struct Compare : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> > 
 {
-    CV_GpuNppImageLUTTest() : CV_GpuArithmTest( "GPU-NppImageLUT", "LUT" ) {}
+    cv::gpu::DeviceInfo devInfo;
+    int cmp_code;
+
+    cv::Size size;
+    cv::Mat mat1, mat2;
 
-    int test( const Mat& mat1, const Mat& )
+    cv::Mat dst_gold;
+        
+    virtual void SetUp()
     {
-        if (mat1.type() != CV_8UC1 && mat1.type() != CV_8UC3)
-        {
-            ts->printf(cvtest::TS::LOG, "\tUnsupported type\t");
-            return cvtest::TS::OK;
-        }
+        devInfo = std::tr1::get<0>(GetParam());
+        cmp_code = std::tr1::get<1>(GetParam());
 
-        cv::Mat lut(1, 256, CV_8UC1);
-        cv::RNG& rng = ts->get_rng();
-        rng.fill(lut, cv::RNG::UNIFORM, cv::Scalar::all(100), cv::Scalar::all(200));
+        cv::gpu::setDevice(devInfo.deviceID());
 
-        cv::Mat cpuRes;
-        cv::LUT(mat1, lut, cpuRes);
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
 
-        cv::gpu::GpuMat gpuRes;
-        cv::gpu::LUT(GpuMat(mat1), lut, gpuRes);
+        size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
+        
+        mat1 = cvtest::randomMat(rng, size, CV_32FC1, 1, 16, false);
+        mat2 = cvtest::randomMat(rng, size, CV_32FC1, 1, 16, false);
 
-        return CheckNorm(cpuRes, gpuRes);
+        cv::compare(mat1, mat2, dst_gold, cmp_code);
     }
 };
 
-////////////////////////////////////////////////////////////////////////////////
-// exp
-struct CV_GpuNppImageExpTest : public CV_GpuArithmTest
+TEST_P(Compare, Accuracy) 
 {
-    CV_GpuNppImageExpTest() : CV_GpuArithmTest( "GPU-NppImageExp", "exp" ) {}
+    static const char* cmp_codes[] = {"CMP_EQ", "CMP_GT", "CMP_GE", "CMP_LT", "CMP_LE", "CMP_NE"};
+    const char* cmpCodeStr = cmp_codes[cmp_code];
 
-    int test( const Mat& mat1, const Mat& )
-    {
-        if (mat1.type() != CV_32FC1)
-        {
-            ts->printf(cvtest::TS::LOG, "\tUnsupported type\t");
-            return cvtest::TS::OK;
-        }
+    PRINT_PARAM(devInfo);
+    PRINT_PARAM(size);
+    PRINT_PARAM(cmpCodeStr);
+
+    cv::Mat dst;
+    
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat gpuRes;
 
-        cv::Mat cpuRes;
-        cv::exp(mat1, cpuRes);
+        cv::gpu::compare(cv::gpu::GpuMat(mat1), cv::gpu::GpuMat(mat2), gpuRes, cmp_code);
 
-        GpuMat gpu1(mat1);
-        GpuMat gpuRes;
-        cv::gpu::exp(gpu1, gpuRes);
+        gpuRes.download(dst);
+    );
 
-        return CheckNorm(cpuRes, gpuRes);
-    }
-};
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+}
+
+INSTANTIATE_TEST_CASE_P(Arithm, Compare, testing::Combine(
+                        testing::ValuesIn(devices()),
+                        testing::Values(cv::CMP_EQ, cv::CMP_GT, cv::CMP_GE, cv::CMP_LT, cv::CMP_LE, cv::CMP_NE)));
 
 ////////////////////////////////////////////////////////////////////////////////
-// log
-struct CV_GpuNppImageLogTest : public CV_GpuArithmTest
+// meanStdDev
+
+struct MeanStdDev : testing::TestWithParam<cv::gpu::DeviceInfo>
 {
-    CV_GpuNppImageLogTest() : CV_GpuArithmTest( "GPU-NppImageLog", "log" ) {}
+    cv::gpu::DeviceInfo devInfo;
 
-    int test( const Mat& mat1, const Mat& )
+    cv::Size size;
+    cv::Mat mat;
+
+    cv::Scalar mean_gold;
+    cv::Scalar stddev_gold;
+
+    virtual void SetUp() 
     {
-        if (mat1.type() != CV_32FC1)
-        {
-            ts->printf(cvtest::TS::LOG, "\tUnsupported type\t");
-            return cvtest::TS::OK;
-        }
+        devInfo = GetParam();
 
-        cv::Mat cpuRes;
-        cv::log(mat1, cpuRes);
+        cv::gpu::setDevice(devInfo.deviceID());
 
-        GpuMat gpu1(mat1);
-        GpuMat gpuRes;
-        cv::gpu::log(gpu1, gpuRes);
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
 
-        return CheckNorm(cpuRes, gpuRes);
+        size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
+        
+        mat = cvtest::randomMat(rng, size, CV_8UC1, 1, 255, false);
+
+        cv::meanStdDev(mat, mean_gold, stddev_gold);
     }
 };
 
+TEST_P(MeanStdDev, Accuracy) 
+{
+    PRINT_PARAM(devInfo);
+    PRINT_PARAM(size);
+
+    cv::Scalar mean;
+    cv::Scalar stddev;
+    
+    ASSERT_NO_THROW(
+        cv::gpu::meanStdDev(cv::gpu::GpuMat(mat), mean, stddev);
+    );
+
+    EXPECT_NEAR(mean_gold[0], mean[0], 1e-5);
+    EXPECT_NEAR(mean_gold[1], mean[1], 1e-5);
+    EXPECT_NEAR(mean_gold[2], mean[2], 1e-5);
+    EXPECT_NEAR(mean_gold[3], mean[3], 1e-5);
+
+    EXPECT_NEAR(stddev_gold[0], stddev[0], 1e-5);
+    EXPECT_NEAR(stddev_gold[1], stddev[1], 1e-5);
+    EXPECT_NEAR(stddev_gold[2], stddev[2], 1e-5);
+    EXPECT_NEAR(stddev_gold[3], stddev[3], 1e-5);
+}
+
+INSTANTIATE_TEST_CASE_P(Arithm, MeanStdDev, testing::ValuesIn(devices()));
+
 ////////////////////////////////////////////////////////////////////////////////
-// magnitude
-struct CV_GpuNppImageMagnitudeTest : public CV_GpuArithmTest
+// normDiff
+
+static const int norms[] = {cv::NORM_INF, cv::NORM_L1, cv::NORM_L2};
+static const char* norms_str[] = {"NORM_INF", "NORM_L1", "NORM_L2"};
+
+struct NormDiff : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
 {
-    CV_GpuNppImageMagnitudeTest() : CV_GpuArithmTest( "GPU-NppImageMagnitude", "magnitude" ) {}
+    cv::gpu::DeviceInfo devInfo;
+    int normIdx;
+
+    cv::Size size;
+    cv::Mat mat1, mat2;
+
+    double norm_gold;
 
-    int test( const Mat& mat1, const Mat& mat2 )
+    virtual void SetUp() 
     {
-        if (mat1.type() != CV_32FC1)
-        {
-            ts->printf(cvtest::TS::LOG, "\tUnsupported type\t");
-            return cvtest::TS::OK;
-        }
+        devInfo = std::tr1::get<0>(GetParam());
+        normIdx = std::tr1::get<1>(GetParam());
 
-        cv::Mat cpuRes;
-        cv::magnitude(mat1, mat2, cpuRes);
+        cv::gpu::setDevice(devInfo.deviceID());
 
-        GpuMat gpu1(mat1);
-        GpuMat gpu2(mat2);
-        GpuMat gpuRes;
-        cv::gpu::magnitude(gpu1, gpu2, gpuRes);
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
 
-        return CheckNorm(cpuRes, gpuRes);
+        size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
+        
+        mat1 = cvtest::randomMat(rng, size, CV_8UC1, 1, 255, false);
+        mat2 = cvtest::randomMat(rng, size, CV_8UC1, 1, 255, false);
+
+        norm_gold = cv::norm(mat1, mat2, norms[normIdx]);
     }
 };
 
+TEST_P(NormDiff, Accuracy) 
+{
+    const char* normStr = norms_str[normIdx];
+
+    PRINT_PARAM(devInfo);
+    PRINT_PARAM(size);
+    PRINT_PARAM(normStr);
+    
+    double norm;
+    
+    ASSERT_NO_THROW(
+        norm = cv::gpu::norm(cv::gpu::GpuMat(mat1), cv::gpu::GpuMat(mat2), norms[normIdx]);
+    );
+
+    EXPECT_NEAR(norm_gold, norm, 1e-6);
+}
+
+INSTANTIATE_TEST_CASE_P(Arithm, NormDiff, testing::Combine(
+                        testing::ValuesIn(devices()),
+                        testing::Range(0, 3)));
+
 ////////////////////////////////////////////////////////////////////////////////
-// phase
-struct CV_GpuNppImagePhaseTest : public CV_GpuArithmTest
+// flip
+
+struct Flip : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int, int> >
 {
-    CV_GpuNppImagePhaseTest() : CV_GpuArithmTest( "GPU-NppImagePhase", "phase" ) {}
+    cv::gpu::DeviceInfo devInfo;
+    int type;
+    int flip_code;
+
+    cv::Size size;
+    cv::Mat mat;
+
+    cv::Mat dst_gold;
 
-    int test( const Mat& mat1, const Mat& mat2 )
+    virtual void SetUp() 
     {
-        if (mat1.type() != CV_32FC1)
-        {
-            ts->printf(cvtest::TS::LOG, "\tUnsupported type\t");
-            return cvtest::TS::OK;
-        }
+        devInfo = std::tr1::get<0>(GetParam());
+        type = std::tr1::get<1>(GetParam());
+        flip_code = std::tr1::get<2>(GetParam());
 
-        cv::Mat cpuRes;
-        cv::phase(mat1, mat2, cpuRes, true);
+        cv::gpu::setDevice(devInfo.deviceID());
 
-        GpuMat gpu1(mat1);
-        GpuMat gpu2(mat2);
-        GpuMat gpuRes;
-        cv::gpu::phase(gpu1, gpu2, gpuRes, true);
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
 
-        return CheckNorm(cpuRes, gpuRes, 0.3f);
+        size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
+        
+        mat = cvtest::randomMat(rng, size, type, 1, 255, false);
+
+        cv::flip(mat, dst_gold, flip_code);
     }
 };
 
-////////////////////////////////////////////////////////////////////////////////
-// cartToPolar
-struct CV_GpuNppImageCartToPolarTest : public CV_GpuArithmTest
+TEST_P(Flip, Accuracy) 
 {
-    CV_GpuNppImageCartToPolarTest() : CV_GpuArithmTest( "GPU-NppImageCartToPolar", "cartToPolar" ) {}
+    static const char* flip_axis[] = {"Both", "X", "Y"};
+    const char* flipAxisStr = flip_axis[flip_code + 1];
 
-    int test( const Mat& mat1, const Mat& mat2 )
-    {
-        if (mat1.type() != CV_32FC1)
-        {
-            ts->printf(cvtest::TS::LOG, "\tUnsupported type\t");
-            return cvtest::TS::OK;
-        }
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+    PRINT_PARAM(size);
+    PRINT_PARAM(flipAxisStr);
+    
+    cv::Mat dst;
+    
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat gpu_res;
 
-        cv::Mat cpuMag, cpuAngle;
-        cv::cartToPolar(mat1, mat2, cpuMag, cpuAngle);
+        cv::gpu::flip(cv::gpu::GpuMat(mat), gpu_res, flip_code);
 
-        GpuMat gpu1(mat1);
-        GpuMat gpu2(mat2);
-        GpuMat gpuMag, gpuAngle;
-        cv::gpu::cartToPolar(gpu1, gpu2, gpuMag, gpuAngle);
+        gpu_res.download(dst);
+    );
 
-        int magRes = CheckNorm(cpuMag, gpuMag);
-        int angleRes = CheckNorm(cpuAngle, gpuAngle, 0.005f);
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+}
 
-        return magRes == cvtest::TS::OK && angleRes == cvtest::TS::OK ? cvtest::TS::OK : cvtest::TS::FAIL_GENERIC;
-    }
-};
+INSTANTIATE_TEST_CASE_P(Arithm, Flip, testing::Combine(
+                        testing::ValuesIn(devices()),
+                        testing::Values(CV_8UC1, CV_8UC4),
+                        testing::Values(0, 1, -1)));
 
 ////////////////////////////////////////////////////////////////////////////////
-// polarToCart
-struct CV_GpuNppImagePolarToCartTest : public CV_GpuArithmTest
+// LUT
+
+struct LUT : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
 {
-    CV_GpuNppImagePolarToCartTest() : CV_GpuArithmTest( "GPU-NppImagePolarToCart", "polarToCart" ) {}
+    cv::gpu::DeviceInfo devInfo;
+    int type;
 
-    int test( const Mat& mat1, const Mat& mat2 )
+    cv::Size size;
+    cv::Mat mat;
+    cv::Mat lut;
+
+    cv::Mat dst_gold;
+
+    virtual void SetUp() 
     {
-        if (mat1.type() != CV_32FC1)
-        {
-            ts->printf(cvtest::TS::LOG, "\tUnsupported type\t");
-            return cvtest::TS::OK;
-        }
+        devInfo = std::tr1::get<0>(GetParam());
+        type = std::tr1::get<1>(GetParam());
 
-        cv::Mat cpuX, cpuY;
-        cv::polarToCart(mat1, mat2, cpuX, cpuY);
+        cv::gpu::setDevice(devInfo.deviceID());
 
-        GpuMat gpu1(mat1);
-        GpuMat gpu2(mat2);
-        GpuMat gpuX, gpuY;
-        cv::gpu::polarToCart(gpu1, gpu2, gpuX, gpuY);
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
 
-        int xRes = CheckNorm(cpuX, gpuX);
-        int yRes = CheckNorm(cpuY, gpuY);
+        size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
+        
+        mat = cvtest::randomMat(rng, size, type, 1, 255, false);
+        lut = cvtest::randomMat(rng, cv::Size(256, 1), CV_8UC1, 100, 200, false);
 
-        return xRes == cvtest::TS::OK && yRes == cvtest::TS::OK ? cvtest::TS::OK : cvtest::TS::FAIL_GENERIC;
+        cv::LUT(mat, lut, dst_gold);
     }
 };
 
-////////////////////////////////////////////////////////////////////////////////
-// Min max
-
-struct CV_GpuMinMaxTest: public cvtest::BaseTest
+TEST_P(LUT, Accuracy) 
 {
-    CV_GpuMinMaxTest() {}
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+    PRINT_PARAM(size);
 
-    cv::gpu::GpuMat buf;
+    cv::Mat dst;
+    
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat gpu_res;
 
-    void run(int)
-    {
-        bool double_ok = gpu::TargetArchs::builtWith(gpu::NATIVE_DOUBLE) &&
-                         gpu::DeviceInfo().supports(gpu::NATIVE_DOUBLE);
-        int depth_end = double_ok ? CV_64F : CV_32F;
+        cv::gpu::LUT(cv::gpu::GpuMat(mat), lut, gpu_res);
 
-        for (int depth = CV_8U; depth <= depth_end; ++depth)
-        {
-            for (int i = 0; i < 3; ++i)
-            {
-                int rows = 1 + rand() % 1000;
-                int cols = 1 + rand() % 1000;
-                test(rows, cols, 1, depth);
-                test_masked(rows, cols, 1, depth);
-            }
-        }
-    }
+        gpu_res.download(dst);
+    );
 
-    void test(int rows, int cols, int cn, int depth)
-    {
-        cv::Mat src(rows, cols, CV_MAKE_TYPE(depth, cn));
-        cv::RNG& rng = ts->get_rng();
-        rng.fill(src, RNG::UNIFORM, Scalar(0), Scalar(255));
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+}
 
-        double minVal, maxVal;
-        cv::Point minLoc, maxLoc;
+INSTANTIATE_TEST_CASE_P(Arithm, LUT, testing::Combine(
+                        testing::ValuesIn(devices()),
+                        testing::Values(CV_8UC1, CV_8UC3)));
 
-        if (depth != CV_8S)
-        {
-            cv::minMaxLoc(src, &minVal, &maxVal, &minLoc, &maxLoc);
-        }
-        else 
-        {
-            minVal = std::numeric_limits<double>::max();
-            maxVal = -std::numeric_limits<double>::max();
-            for (int i = 0; i < src.rows; ++i)
-                for (int j = 0; j < src.cols; ++j)
-                {
-                    signed char val = src.at<signed char>(i, j);
-                    if (val < minVal) minVal = val;
-                    if (val > maxVal) maxVal = val;
-                }
-        }
+////////////////////////////////////////////////////////////////////////////////
+// exp
 
-        double minVal_, maxVal_;
-        cv::gpu::minMax(cv::gpu::GpuMat(src), &minVal_, &maxVal_, cv::gpu::GpuMat(), buf);
-       
-        if (abs(minVal - minVal_) > 1e-3f)
-        {
-            ts->printf(cvtest::TS::CONSOLE, "\nfail: minVal=%f minVal_=%f rows=%d cols=%d depth=%d cn=%d\n", minVal, minVal_, rows, cols, depth, cn);
-            ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-        }
-        if (abs(maxVal - maxVal_) > 1e-3f)
-        {
-            ts->printf(cvtest::TS::CONSOLE, "\nfail: maxVal=%f maxVal_=%f rows=%d cols=%d depth=%d cn=%d\n", maxVal, maxVal_, rows, cols, depth, cn);
-            ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-        }
-    }  
+struct Exp : testing::TestWithParam<cv::gpu::DeviceInfo>
+{
+    cv::gpu::DeviceInfo devInfo;
+
+    cv::Size size;
+    cv::Mat mat;
 
-    void test_masked(int rows, int cols, int cn, int depth)
+    cv::Mat dst_gold;
+
+    virtual void SetUp() 
     {
-        cv::Mat src(rows, cols, CV_MAKE_TYPE(depth, cn));
-        cv::RNG& rng = ts->get_rng();
-        rng.fill(src, RNG::UNIFORM, Scalar(0), Scalar(255));
+        devInfo = GetParam();
 
-        cv::Mat mask(src.size(), CV_8U);
-        rng.fill(mask, RNG::UNIFORM, Scalar(0), Scalar(2));
+        cv::gpu::setDevice(devInfo.deviceID());
 
-        double minVal, maxVal;
-        cv::Point minLoc, maxLoc;
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
 
-        Mat src_ = src.reshape(1);
-        if (depth != CV_8S)
-        {
-            cv::minMaxLoc(src_, &minVal, &maxVal, &minLoc, &maxLoc, mask);
-        }
-        else 
-        {
-            // OpenCV's minMaxLoc doesn't support CV_8S type 
-            minVal = std::numeric_limits<double>::max();
-            maxVal = -std::numeric_limits<double>::max();
-            for (int i = 0; i < src_.rows; ++i)
-                for (int j = 0; j < src_.cols; ++j)
-                {
-                    char val = src_.at<char>(i, j);
-                    if (mask.at<unsigned char>(i, j)) { if (val < minVal) minVal = val; }
-                    if (mask.at<unsigned char>(i, j)) { if (val > maxVal) maxVal = val; }
-                }
-        }
+        size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
 
-        double minVal_, maxVal_;
-        cv::Point minLoc_, maxLoc_;        
-        cv::gpu::minMax(cv::gpu::GpuMat(src), &minVal_, &maxVal_, cv::gpu::GpuMat(mask), buf);
-       
-        if (abs(minVal - minVal_) > 1e-3f)
-        {
-            ts->printf(cvtest::TS::CONSOLE, "\nfail: minVal=%f minVal_=%f rows=%d cols=%d depth=%d cn=%d\n", minVal, minVal_, rows, cols, depth, cn);
-            ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-        }
-        if (abs(maxVal - maxVal_) > 1e-3f)
-        {
-            ts->printf(cvtest::TS::CONSOLE, "\nfail: maxVal=%f maxVal_=%f rows=%d cols=%d depth=%d cn=%d\n", maxVal, maxVal_, rows, cols, depth, cn);
-            ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-        }
-    }  
+        mat = cvtest::randomMat(rng, size, CV_32FC1, -10.0, 2.0, false);        
+
+        cv::exp(mat, dst_gold);
+    }
+};
+
+TEST_P(Exp, Accuracy) 
+{
+    PRINT_PARAM(devInfo);
+    PRINT_PARAM(size);
+
+    cv::Mat dst;
+    
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat gpu_res;
+
+        cv::gpu::exp(cv::gpu::GpuMat(mat), gpu_res);
+
+        gpu_res.download(dst);
+    );
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 1e-5);
+}
+
+INSTANTIATE_TEST_CASE_P(Arithm, Exp, testing::ValuesIn(devices()));
+
+////////////////////////////////////////////////////////////////////////////////
+// log
+
+struct Log : testing::TestWithParam<cv::gpu::DeviceInfo>
+{
+    cv::gpu::DeviceInfo devInfo;
+
+    cv::Size size;
+    cv::Mat mat;
+
+    cv::Mat dst_gold;
+
+    virtual void SetUp() 
+    {
+        devInfo = GetParam();
+
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+        size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
+
+        mat = cvtest::randomMat(rng, size, CV_32FC1, 0.0, 100.0, false);        
+
+        cv::log(mat, dst_gold);
+    }
+};
+
+TEST_P(Log, Accuracy) 
+{
+    PRINT_PARAM(devInfo);
+    PRINT_PARAM(size);
+
+    cv::Mat dst;
+    
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat gpu_res;
+
+        cv::gpu::log(cv::gpu::GpuMat(mat), gpu_res);
+
+        gpu_res.download(dst);
+    );
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 1e-5);
+}
+
+INSTANTIATE_TEST_CASE_P(Arithm, Log, testing::ValuesIn(devices()));
+
+////////////////////////////////////////////////////////////////////////////////
+// magnitude
+
+struct Magnitude : testing::TestWithParam<cv::gpu::DeviceInfo>
+{
+    cv::gpu::DeviceInfo devInfo;
+
+    cv::Size size;
+    cv::Mat mat1, mat2;
+
+    cv::Mat dst_gold;
+
+    virtual void SetUp() 
+    {
+        devInfo = GetParam();
+
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+        size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
+
+        mat1 = cvtest::randomMat(rng, size, CV_32FC1, 0.0, 100.0, false);
+        mat2 = cvtest::randomMat(rng, size, CV_32FC1, 0.0, 100.0, false);       
+
+        cv::magnitude(mat1, mat2, dst_gold);
+    }
+};
+
+TEST_P(Magnitude, Accuracy) 
+{
+    PRINT_PARAM(devInfo);
+    PRINT_PARAM(size);
+
+    cv::Mat dst;
+    
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat gpu_res;
+
+        cv::gpu::magnitude(cv::gpu::GpuMat(mat1), cv::gpu::GpuMat(mat2), gpu_res);
+
+        gpu_res.download(dst);
+    );
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 1e-4);
+}
+
+INSTANTIATE_TEST_CASE_P(Arithm, Magnitude, testing::ValuesIn(devices()));
+
+////////////////////////////////////////////////////////////////////////////////
+// phase
+
+struct Phase : testing::TestWithParam<cv::gpu::DeviceInfo>
+{
+    cv::gpu::DeviceInfo devInfo;
+
+    cv::Size size;
+    cv::Mat mat1, mat2;
+
+    cv::Mat dst_gold;
+
+    virtual void SetUp() 
+    {
+        devInfo = GetParam();
+
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+        size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
+
+        mat1 = cvtest::randomMat(rng, size, CV_32FC1, 0.0, 100.0, false);
+        mat2 = cvtest::randomMat(rng, size, CV_32FC1, 0.0, 100.0, false);       
+
+        cv::phase(mat1, mat2, dst_gold);
+    }
+};
+
+TEST_P(Phase, Accuracy) 
+{
+    PRINT_PARAM(devInfo);
+    PRINT_PARAM(size);
+
+    cv::Mat dst;
+    
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat gpu_res;
+
+        cv::gpu::phase(cv::gpu::GpuMat(mat1), cv::gpu::GpuMat(mat2), gpu_res);
+
+        gpu_res.download(dst);
+    );
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 1e-3);
+}
+
+INSTANTIATE_TEST_CASE_P(Arithm, Phase, testing::ValuesIn(devices()));
+
+////////////////////////////////////////////////////////////////////////////////
+// cartToPolar
+
+struct CartToPolar : testing::TestWithParam<cv::gpu::DeviceInfo>
+{
+    cv::gpu::DeviceInfo devInfo;
+
+    cv::Size size;
+    cv::Mat mat1, mat2;
+
+    cv::Mat mag_gold;
+    cv::Mat angle_gold;
+
+    virtual void SetUp() 
+    {
+        devInfo = GetParam();
+
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+        size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
+
+        mat1 = cvtest::randomMat(rng, size, CV_32FC1, -100.0, 100.0, false);
+        mat2 = cvtest::randomMat(rng, size, CV_32FC1, -100.0, 100.0, false);       
+
+        cv::cartToPolar(mat1, mat2, mag_gold, angle_gold);
+    }
+};
+
+TEST_P(CartToPolar, Accuracy) 
+{
+    PRINT_PARAM(devInfo);
+    PRINT_PARAM(size);
+
+    cv::Mat mag, angle;
+    
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat gpuMag;
+        cv::gpu::GpuMat gpuAngle;
+
+        cv::gpu::cartToPolar(cv::gpu::GpuMat(mat1), cv::gpu::GpuMat(mat2), gpuMag, gpuAngle);
+
+        gpuMag.download(mag);
+        gpuAngle.download(angle);
+    );
+
+    EXPECT_MAT_NEAR(mag_gold, mag, 1e-4);
+    EXPECT_MAT_NEAR(angle_gold, angle, 1e-3);
+}
+
+INSTANTIATE_TEST_CASE_P(Arithm, CartToPolar, testing::ValuesIn(devices()));
+
+////////////////////////////////////////////////////////////////////////////////
+// polarToCart
+
+struct PolarToCart : testing::TestWithParam<cv::gpu::DeviceInfo>
+{
+    cv::gpu::DeviceInfo devInfo;
+
+    cv::Size size;
+    cv::Mat mag;
+    cv::Mat angle;
+
+    cv::Mat x_gold;
+    cv::Mat y_gold;
+
+    virtual void SetUp() 
+    {
+        devInfo = GetParam();
+
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+        size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
+
+        mag = cvtest::randomMat(rng, size, CV_32FC1, -100.0, 100.0, false);
+        angle = cvtest::randomMat(rng, size, CV_32FC1, 0.0, 2.0 * CV_PI, false);       
+
+        cv::polarToCart(mag, angle, x_gold, y_gold);
+    }
 };
 
+TEST_P(PolarToCart, Accuracy) 
+{
+    PRINT_PARAM(devInfo);
+    PRINT_PARAM(size);
+
+    cv::Mat x, y;
+    
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat gpuX;
+        cv::gpu::GpuMat gpuY;
+
+        cv::gpu::polarToCart(cv::gpu::GpuMat(mag), cv::gpu::GpuMat(angle), gpuX, gpuY);
+
+        gpuX.download(x);
+        gpuY.download(y);
+    );
+
+    EXPECT_MAT_NEAR(x_gold, x, 1e-4);
+    EXPECT_MAT_NEAR(y_gold, y, 1e-4);
+}
+
+INSTANTIATE_TEST_CASE_P(Arithm, PolarToCart, testing::ValuesIn(devices()));
 
 ////////////////////////////////////////////////////////////////////////////////
-// Min max loc
+// minMax
 
-struct CV_GpuMinMaxLocTest: public cvtest::BaseTest
+struct MinMax : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
 {
-    CV_GpuMinMaxLocTest() {}
+    cv::gpu::DeviceInfo devInfo;
+    int type;
+
+    cv::Size size;
+    cv::Mat mat;
+    cv::Mat mask;
 
-    GpuMat valbuf, locbuf;
+    double minVal_gold;
+    double maxVal_gold;
 
-    void run(int)
+    virtual void SetUp() 
     {
-        bool double_ok = gpu::TargetArchs::builtWith(gpu::NATIVE_DOUBLE) &&
-                         gpu::DeviceInfo().supports(gpu::NATIVE_DOUBLE);
-        int depth_end = double_ok ? CV_64F : CV_32F;
+        devInfo = std::tr1::get<0>(GetParam());
+        type = std::tr1::get<1>(GetParam());
+
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+        size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
 
-        for (int depth = CV_8U; depth <= depth_end; ++depth)
+        mat = cvtest::randomMat(rng, size, type, 0.0, 127.0, false);
+        mask = cvtest::randomMat(rng, size, CV_8UC1, 0, 2, false);
+
+        if (type != CV_8S)
+        {
+            cv::minMaxLoc(mat, &minVal_gold, &maxVal_gold, 0, 0, mask);
+        }
+        else 
         {
-            int rows = 1, cols = 3;
-            test(rows, cols, depth);
-            for (int i = 0; i < 4; ++i)
+            // OpenCV's minMaxLoc doesn't support CV_8S type 
+            minVal_gold = std::numeric_limits<double>::max();
+            maxVal_gold = -std::numeric_limits<double>::max();
+            for (int i = 0; i < mat.rows; ++i)
             {
-                int rows = 1 + rand() % 1000;
-                int cols = 1 + rand() % 1000;
-                test(rows, cols, depth);
+                const signed char* mat_row = mat.ptr<signed char>(i);
+                const unsigned char* mask_row = mask.ptr<unsigned char>(i);
+                for (int j = 0; j < mat.cols; ++j)
+                {
+                    if (mask_row[j]) 
+                    { 
+                        signed char val = mat_row[j];
+                        if (val < minVal_gold) minVal_gold = val;
+                        if (val > maxVal_gold) maxVal_gold = val; 
+                    }
+                }
             }
         }
     }
+};
+
+TEST_P(MinMax, Accuracy) 
+{
+    if (type == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+        return;
 
-    void test(int rows, int cols, int depth)
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type)
+    PRINT_PARAM(size);
+
+    double minVal, maxVal;
+    
+    ASSERT_NO_THROW(
+        cv::gpu::minMax(cv::gpu::GpuMat(mat), &minVal, &maxVal, cv::gpu::GpuMat(mask));
+    );
+
+    EXPECT_DOUBLE_EQ(minVal_gold, minVal);
+    EXPECT_DOUBLE_EQ(maxVal_gold, maxVal);
+}
+
+INSTANTIATE_TEST_CASE_P(Arithm, MinMax, testing::Combine(
+                        testing::ValuesIn(devices()),
+                        testing::Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F)));
+
+////////////////////////////////////////////////////////////////////////////////
+// minMaxLoc
+
+struct MinMaxLoc : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
+{
+    cv::gpu::DeviceInfo devInfo;
+    int type;
+
+    cv::Size size;
+    cv::Mat mat;
+    cv::Mat mask;
+
+    double minVal_gold;
+    double maxVal_gold;
+    cv::Point minLoc_gold;
+    cv::Point maxLoc_gold;
+
+    virtual void SetUp() 
     {
-        cv::Mat src(rows, cols, depth);
-        cv::RNG& rng = ts->get_rng();
-        rng.fill(src, RNG::UNIFORM, Scalar(0), Scalar(255));
+        devInfo = std::tr1::get<0>(GetParam());
+        type = std::tr1::get<1>(GetParam());
 
-        cv::Mat mask(src.size(), CV_8U);
-        rng.fill(mask, RNG::UNIFORM, Scalar(0), Scalar(2));
+        cv::gpu::setDevice(devInfo.deviceID());
 
-        // At least one of the mask elements must be non zero as OpenCV returns 0
-        // in such case, when our implementation returns maximum or minimum value
-        mask.at<unsigned char>(0, 0) = 1;
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
 
-        double minVal, maxVal;
-        cv::Point minLoc, maxLoc;
+        size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
 
-        if (depth != CV_8S)       
-            cv::minMaxLoc(src, &minVal, &maxVal, &minLoc, &maxLoc, mask);
+        mat = cvtest::randomMat(rng, size, type, 0.0, 127.0, false);
+        mask = cvtest::randomMat(rng, size, CV_8UC1, 0, 2, false);
+
+        if (type != CV_8S)
+        {
+            cv::minMaxLoc(mat, &minVal_gold, &maxVal_gold, &minLoc_gold, &maxLoc_gold, mask);
+        }
         else 
         {
             // OpenCV's minMaxLoc doesn't support CV_8S type 
-            minVal = std::numeric_limits<double>::max();
-            maxVal = -std::numeric_limits<double>::max();
-            for (int i = 0; i < src.rows; ++i)
-                for (int j = 0; j < src.cols; ++j)
+            minVal_gold = std::numeric_limits<double>::max();
+            maxVal_gold = -std::numeric_limits<double>::max();
+            for (int i = 0; i < mat.rows; ++i)
+            {
+                const signed char* mat_row = mat.ptr<signed char>(i);
+                const unsigned char* mask_row = mask.ptr<unsigned char>(i);
+                for (int j = 0; j < mat.cols; ++j)
                 {
-                    char val = src.at<char>(i, j);
-                    if (mask.at<unsigned char>(i, j))
-                    {
-                        if (val < minVal) { minVal = val; minLoc = cv::Point(j, i); }
-                        if (val > maxVal) { maxVal = val; maxLoc = cv::Point(j, i); }
+                    if (mask_row[j]) 
+                    { 
+                        signed char val = mat_row[j];
+                        if (val < minVal_gold) { minVal_gold = val; minLoc_gold = cv::Point(j, i); }
+                        if (val > maxVal_gold) { maxVal_gold = val; maxLoc_gold = cv::Point(j, i); }
                     }
                 }
+            }
         }
-
-        double minVal_, maxVal_;
-        cv::Point minLoc_, maxLoc_;        
-        cv::gpu::minMaxLoc(cv::gpu::GpuMat(src), &minVal_, &maxVal_, &minLoc_, &maxLoc_, cv::gpu::GpuMat(mask), valbuf, locbuf);
-
-        CHECK(minVal == minVal_, cvtest::TS::FAIL_INVALID_OUTPUT);
-        CHECK(maxVal == maxVal_, cvtest::TS::FAIL_INVALID_OUTPUT);
-        CHECK(0 == memcmp(src.ptr(minLoc.y) + minLoc.x * src.elemSize(), src.ptr(minLoc_.y) + minLoc_.x * src.elemSize(), src.elemSize()),  
-              cvtest::TS::FAIL_INVALID_OUTPUT);
-        CHECK(0 == memcmp(src.ptr(maxLoc.y) + maxLoc.x * src.elemSize(), src.ptr(maxLoc_.y) + maxLoc_.x * src.elemSize(), src.elemSize()),  
-              cvtest::TS::FAIL_INVALID_OUTPUT);
-    }  
+    }
 };
 
+TEST_P(MinMaxLoc, Accuracy) 
+{
+    if (type == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+        return;
+
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type)
+    PRINT_PARAM(size);
+
+    double minVal, maxVal;
+    cv::Point minLoc, maxLoc;
+    
+    ASSERT_NO_THROW(
+        cv::gpu::minMaxLoc(cv::gpu::GpuMat(mat), &minVal, &maxVal, &minLoc, &maxLoc, cv::gpu::GpuMat(mask));
+    );
+
+    EXPECT_DOUBLE_EQ(minVal_gold, minVal);
+    EXPECT_DOUBLE_EQ(maxVal_gold, maxVal);
+
+    int cmpMinVals = memcmp(mat.data + minLoc_gold.y * mat.step + minLoc_gold.x * mat.elemSize(), 
+                            mat.data + minLoc.y * mat.step + minLoc.x * mat.elemSize(), 
+                            mat.elemSize());
+    int cmpMaxVals = memcmp(mat.data + maxLoc_gold.y * mat.step + maxLoc_gold.x * mat.elemSize(), 
+                            mat.data + maxLoc.y * mat.step + maxLoc.x * mat.elemSize(), 
+                            mat.elemSize());
+
+    EXPECT_EQ(0, cmpMinVals);
+    EXPECT_EQ(0, cmpMaxVals);
+}
+
+INSTANTIATE_TEST_CASE_P(Arithm, MinMaxLoc, testing::Combine(
+                        testing::ValuesIn(devices()),
+                        testing::Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F)));
+
 ////////////////////////////////////////////////////////////////////////////
-// Count non zero
-struct CV_GpuCountNonZeroTest: cvtest::BaseTest 
+// countNonZero
+
+struct CountNonZero : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
 {
-    CV_GpuCountNonZeroTest(){}
+    cv::gpu::DeviceInfo devInfo;
+    int type;
+
+    cv::Size size;
+    cv::Mat mat;
+
+    int n_gold;
 
-    void run(int) 
+    virtual void SetUp() 
     {
-        int depth_end;
-        if (cv::gpu::DeviceInfo().supports(cv::gpu::NATIVE_DOUBLE))
-            depth_end = CV_64F;
-        else
-            depth_end = CV_32F;
-        for (int depth = CV_8U; depth <= CV_32F; ++depth)
-        {
-            for (int i = 0; i < 4; ++i)
-            {
-                int rows = 1 + rand() % 1000;
-                int cols = 1 + rand() % 1000;
-                test(rows, cols, depth);
-            }
-        }
+        devInfo = std::tr1::get<0>(GetParam());
+        type = std::tr1::get<1>(GetParam());
+
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+        size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
+
+        cv::Mat matBase = cvtest::randomMat(rng, size, CV_8U, 0.0, 1.0, false);
+        matBase.convertTo(mat, type);
+
+        n_gold = cv::countNonZero(mat);
     }
+};
+
+TEST_P(CountNonZero, Accuracy) 
+{
+    if (type == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+        return;
+
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type)
+    PRINT_PARAM(size);
+
+    int n;
+    
+    ASSERT_NO_THROW(
+        n = cv::gpu::countNonZero(cv::gpu::GpuMat(mat));
+    );
+
+    ASSERT_EQ(n_gold, n);
+}
 
-    void test(int rows, int cols, int depth)
+INSTANTIATE_TEST_CASE_P(Arithm, CountNonZero, testing::Combine(
+                        testing::ValuesIn(devices()),
+                        testing::Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F)));
+
+//////////////////////////////////////////////////////////////////////////////
+// sum
+
+struct Sum : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
+{
+    cv::gpu::DeviceInfo devInfo;
+    int type;
+
+    cv::Size size;
+    cv::Mat mat;
+
+    cv::Scalar sum_gold;
+
+    virtual void SetUp() 
     {
-        cv::Mat src(rows, cols, depth);
-        cv::RNG rng;
-        if (depth == 5)
-            rng.fill(src, RNG::UNIFORM, Scalar(-1000.f), Scalar(1000.f));
-        else if (depth == 6)
-            rng.fill(src, RNG::UNIFORM, Scalar(-1000.), Scalar(1000.));
-        else
-            for (int i = 0; i < src.rows; ++i)
-            { 
-                Mat row(1, src.cols * src.elemSize(), CV_8U, src.ptr(i));
-                rng.fill(row, RNG::UNIFORM, Scalar(0), Scalar(256));
-            }
+        devInfo = std::tr1::get<0>(GetParam());
+        type = std::tr1::get<1>(GetParam());
+
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+        size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
+
+        mat = cvtest::randomMat(rng, size, CV_8U, 0.0, 10.0, false);
+
+        sum_gold = cv::sum(mat);
+    }
+};
+
+TEST_P(Sum, Accuracy) 
+{
+    if (type == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+        return;
+
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type)
+    PRINT_PARAM(size);
+
+    cv::Scalar sum;
+    
+    ASSERT_NO_THROW(
+        sum = cv::gpu::sum(cv::gpu::GpuMat(mat));
+    );
+
+    EXPECT_NEAR(sum[0], sum_gold[0], mat.size().area() * 1e-5);
+    EXPECT_NEAR(sum[1], sum_gold[1], mat.size().area() * 1e-5);
+    EXPECT_NEAR(sum[2], sum_gold[2], mat.size().area() * 1e-5);
+    EXPECT_NEAR(sum[3], sum_gold[3], mat.size().area() * 1e-5);
+}
+
+INSTANTIATE_TEST_CASE_P(Arithm, Sum, testing::Combine(
+                        testing::ValuesIn(devices()),
+                        testing::Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F)));
+
+struct AbsSum : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
+{
+    cv::gpu::DeviceInfo devInfo;
+    int type;
+
+    cv::Size size;
+    cv::Mat mat;
+
+    cv::Scalar sum_gold;
+
+    virtual void SetUp() 
+    {
+        devInfo = std::tr1::get<0>(GetParam());
+        type = std::tr1::get<1>(GetParam());
+
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+        size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
+
+        mat = cvtest::randomMat(rng, size, CV_8U, 0.0, 10.0, false);
+
+        sum_gold = cv::norm(mat, cv::NORM_L1);
+    }
+};
+
+TEST_P(AbsSum, Accuracy) 
+{
+    if (type == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+        return;
+
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type)
+    PRINT_PARAM(size);
+
+    cv::Scalar sum;
+    
+    ASSERT_NO_THROW(
+        sum = cv::gpu::absSum(cv::gpu::GpuMat(mat));
+    );
+
+    EXPECT_NEAR(sum[0], sum_gold[0], mat.size().area() * 1e-5);
+    EXPECT_NEAR(sum[1], sum_gold[1], mat.size().area() * 1e-5);
+    EXPECT_NEAR(sum[2], sum_gold[2], mat.size().area() * 1e-5);
+    EXPECT_NEAR(sum[3], sum_gold[3], mat.size().area() * 1e-5);
+}
+
+INSTANTIATE_TEST_CASE_P(Arithm, AbsSum, testing::Combine(
+                        testing::ValuesIn(devices()),
+                        testing::Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F)));
+
+struct SqrSum : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
+{
+    cv::gpu::DeviceInfo devInfo;
+    int type;
+
+    cv::Size size;
+    cv::Mat mat;
+
+    cv::Scalar sum_gold;
+
+    virtual void SetUp() 
+    {
+        devInfo = std::tr1::get<0>(GetParam());
+        type = std::tr1::get<1>(GetParam());
+
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+        size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
+
+        mat = cvtest::randomMat(rng, size, CV_8U, 0.0, 10.0, false);
+ 
+        cv::Mat sqrmat;
+        cv::multiply(mat, mat, sqrmat);
+        sum_gold = cv::sum(sqrmat);
+    }
+};
+
+TEST_P(SqrSum, Accuracy) 
+{
+    if (type == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+        return;
+
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type)
+    PRINT_PARAM(size);
+
+    cv::Scalar sum;
+    
+    ASSERT_NO_THROW(
+        sum = cv::gpu::sqrSum(cv::gpu::GpuMat(mat));
+    );
+
+    EXPECT_NEAR(sum[0], sum_gold[0], mat.size().area() * 1e-5);
+    EXPECT_NEAR(sum[1], sum_gold[1], mat.size().area() * 1e-5);
+    EXPECT_NEAR(sum[2], sum_gold[2], mat.size().area() * 1e-5);
+    EXPECT_NEAR(sum[3], sum_gold[3], mat.size().area() * 1e-5);
+}
+
+INSTANTIATE_TEST_CASE_P(Arithm, SqrSum, testing::Combine(
+                        testing::ValuesIn(devices()),
+                        testing::Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F)));
+
+//////////////////////////////////////////////////////////////////////////////
+// bitwise
+
+struct BitwiseNot : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
+{
+    cv::gpu::DeviceInfo devInfo;
+    int type;
+
+    cv::Size size;
+    cv::Mat mat;
+
+    cv::Mat dst_gold;
+
+    virtual void SetUp() 
+    {
+        devInfo = std::tr1::get<0>(GetParam());
+        type = std::tr1::get<1>(GetParam());
+
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
 
-        int n_gold = cv::countNonZero(src);
-        int n = cv::gpu::countNonZero(cv::gpu::GpuMat(src));
+        size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
 
-        if (n != n_gold)
+        mat.create(size, type);
+        
+        for (int i = 0; i < mat.rows; ++i)
         {
-            ts->printf(cvtest::TS::LOG, "%d %d %d %d %d\n", n, n_gold, depth, cols, rows);
-            n_gold = cv::countNonZero(src);
+            cv::Mat row(1, mat.cols * mat.elemSize(), CV_8U, (void*)mat.ptr(i));
+            rng.fill(row, cv::RNG::UNIFORM, cv::Scalar(0), cv::Scalar(255));
         }
 
-        CHECK(n == n_gold, cvtest::TS::FAIL_INVALID_OUTPUT);
+        dst_gold = ~mat;
     }
 };
 
+TEST_P(BitwiseNot, Accuracy) 
+{
+    if (mat.depth() == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+        return;
 
-//////////////////////////////////////////////////////////////////////////////
-// sum
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type)
+    PRINT_PARAM(size);
+
+    cv::Mat dst;
+    
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat dev_dst;
+
+        cv::gpu::bitwise_not(cv::gpu::GpuMat(mat), dev_dst);
+
+        dev_dst.download(dst);
+    );
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+}
 
-struct CV_GpuSumTest: cvtest::BaseTest 
+INSTANTIATE_TEST_CASE_P(Arithm, BitwiseNot, testing::Combine(
+                        testing::ValuesIn(devices()),
+                        testing::ValuesIn(all_types())));
+
+struct BitwiseOr : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
 {
-    CV_GpuSumTest() {}
+    cv::gpu::DeviceInfo devInfo;
+    int type;
+
+    cv::Size size;
+    cv::Mat mat1;
+    cv::Mat mat2;
+
+    cv::Mat dst_gold;
 
-    void run(int) 
+    virtual void SetUp() 
     {
-        Mat src;
-        Scalar a, b;
-        double max_err = 1e-5;
+        devInfo = std::tr1::get<0>(GetParam());
+        type = std::tr1::get<1>(GetParam());
+
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+        size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
 
-        int typemax = CV_32F;
-        for (int type = CV_8U; type <= typemax; ++type)
+        mat1.create(size, type);
+        mat2.create(size, type);
+        
+        for (int i = 0; i < mat1.rows; ++i)
         {
-            //
-            // sum
-            //
-
-            gen(1 + rand() % 500, 1 + rand() % 500, CV_MAKETYPE(type, 1), src);
-            a = sum(src);
-            b = sum(GpuMat(src));
-            if (abs(a[0] - b[0]) > src.size().area() * max_err)
-            {
-                ts->printf(cvtest::TS::CONSOLE, "1 cols: %d, rows: %d, expected: %f, actual: %f\n", src.cols, src.rows, a[0], b[0]);
-                ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-                return;
-            }
+            cv::Mat row1(1, mat1.cols * mat1.elemSize(), CV_8U, (void*)mat1.ptr(i));
+            rng.fill(row1, cv::RNG::UNIFORM, cv::Scalar(0), cv::Scalar(255));
 
-            gen(1 + rand() % 500, 1 + rand() % 500, CV_MAKETYPE(type, 2), src);
-            a = sum(src);
-            b = sum(GpuMat(src));
-            if (abs(a[0] - b[0]) + abs(a[1] - b[1]) > src.size().area() * max_err)
-            {
-                ts->printf(cvtest::TS::CONSOLE, "2 cols: %d, rows: %d, expected: %f, actual: %f\n", src.cols, src.rows, a[1], b[1]);
-                ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-                return;
-            }
+            cv::Mat row2(1, mat2.cols * mat2.elemSize(), CV_8U, (void*)mat2.ptr(i));
+            rng.fill(row2, cv::RNG::UNIFORM, cv::Scalar(0), cv::Scalar(255));
+        }
 
-            gen(1 + rand() % 500, 1 + rand() % 500, CV_MAKETYPE(type, 3), src);
-            a = sum(src);
-            b = sum(GpuMat(src));
-            if (abs(a[0] - b[0]) + abs(a[1] - b[1]) + abs(a[2] - b[2])> src.size().area() * max_err)
-            {
-                ts->printf(cvtest::TS::CONSOLE, "3 cols: %d, rows: %d, expected: %f, actual: %f\n", src.cols, src.rows, a[2], b[2]);
-                ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-                return;
-            }
+        dst_gold = mat1 | mat2;
+    }
+};
 
-            gen(1 + rand() % 500, 1 + rand() % 500, CV_MAKETYPE(type, 4), src);
-            a = sum(src);
-            b = sum(GpuMat(src));
-            if (abs(a[0] - b[0]) + abs(a[1] - b[1]) + abs(a[2] - b[2]) + abs(a[3] - b[3])> src.size().area() * max_err)
-            {
-                ts->printf(cvtest::TS::CONSOLE, "4 cols: %d, rows: %d, expected: %f, actual: %f\n", src.cols, src.rows, a[3], b[3]);
-                ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-                return;
-            }
+TEST_P(BitwiseOr, Accuracy) 
+{
+    if (mat1.depth() == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+        return;
 
-            gen(1 + rand() % 500, 1 + rand() % 500, type, src);
-            a = sum(src);
-            b = sum(GpuMat(src));
-            if (abs(a[0] - b[0]) > src.size().area() * max_err)
-            {
-                ts->printf(cvtest::TS::CONSOLE, "cols: %d, rows: %d, expected: %f, actual: %f\n", src.cols, src.rows, a[0], b[0]);
-                ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-                return;
-            }
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type)
+    PRINT_PARAM(size);
 
-            //
-            // absSum
-            //
+    cv::Mat dst;
+    
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat dev_dst;
 
-            gen(1 + rand() % 200, 1 + rand() % 200, CV_MAKETYPE(type, 1), src);
-            b = absSum(GpuMat(src));
-            a = norm(src, NORM_L1);
-            if (abs(a[0] - b[0]) > src.size().area() * max_err)
-            {
-                ts->printf(cvtest::TS::CONSOLE, "type: %d, cols: %d, rows: %d, expected: %f, actual: %f\n", type, src.cols, src.rows, a[0], b[0]);
-                ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-                return;
-            }
+        cv::gpu::bitwise_or(cv::gpu::GpuMat(mat1), cv::gpu::GpuMat(mat2), dev_dst);
 
-            //
-            // sqrSum
-            //
+        dev_dst.download(dst);
+    );
 
-            if (type != CV_8S)
-            {
-                gen(1 + rand() % 200, 1 + rand() % 200, CV_MAKETYPE(type, 1), src);
-                b = sqrSum(GpuMat(src));
-                Mat sqrsrc;
-                multiply(src, src, sqrsrc);
-                a = sum(sqrsrc);
-                if (abs(a[0] - b[0]) > src.size().area() * max_err)
-                {
-                    ts->printf(cvtest::TS::CONSOLE, "type: %d, cols: %d, rows: %d, expected: %f, actual: %f\n", type, src.cols, src.rows, a[0], b[0]);
-                    ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-                    return;
-                }
-                gen(1 + rand() % 200, 1 + rand() % 200, CV_MAKETYPE(type, 2), src);
-                b = sqrSum(GpuMat(src));
-                multiply(src, src, sqrsrc);
-                a = sum(sqrsrc);
-                if (abs(a[0] - b[0]) + abs(a[1] - b[1])> src.size().area() * max_err * 2)
-                {
-                    ts->printf(cvtest::TS::CONSOLE, "type: %d, cols: %d, rows: %d, expected: %f, actual: %f\n", type, src.cols, src.rows, a[0], b[0]);
-                    ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-                    return;
-                }
-                gen(1 + rand() % 200, 1 + rand() % 200, CV_MAKETYPE(type, 3), src);
-                b = sqrSum(GpuMat(src));
-                multiply(src, src, sqrsrc);
-                a = sum(sqrsrc);
-                if (abs(a[0] - b[0]) + abs(a[1] - b[1]) + abs(a[2] - b[2])> src.size().area() * max_err * 3)
-                {
-                    ts->printf(cvtest::TS::CONSOLE, "type: %d, cols: %d, rows: %d, expected: %f, actual: %f\n", type, src.cols, src.rows, a[0], b[0]);
-                    ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-                    return;
-                }
-                gen(1 + rand() % 200, 1 + rand() % 200, CV_MAKETYPE(type, 4), src);
-                b = sqrSum(GpuMat(src));
-                multiply(src, src, sqrsrc);
-                a = sum(sqrsrc);
-                if (abs(a[0] - b[0]) + abs(a[1] - b[1]) + abs(a[2] - b[2]) + abs(a[3] - b[3])> src.size().area() * max_err * 4)
-                {
-                    ts->printf(cvtest::TS::CONSOLE, "type: %d, cols: %d, rows: %d, expected: %f, actual: %f\n", type, src.cols, src.rows, a[0], b[0]);
-                    ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-                    return;
-                }
-            }
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+}
+
+INSTANTIATE_TEST_CASE_P(Arithm, BitwiseOr, testing::Combine(
+                        testing::ValuesIn(devices()),
+                        testing::ValuesIn(all_types())));
+
+struct BitwiseAnd : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
+{
+    cv::gpu::DeviceInfo devInfo;
+    int type;
+
+    cv::Size size;
+    cv::Mat mat1;
+    cv::Mat mat2;
+
+    cv::Mat dst_gold;
+
+    virtual void SetUp() 
+    {
+        devInfo = std::tr1::get<0>(GetParam());
+        type = std::tr1::get<1>(GetParam());
+
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+        size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
+
+        mat1.create(size, type);
+        mat2.create(size, type);
+        
+        for (int i = 0; i < mat1.rows; ++i)
+        {
+            cv::Mat row1(1, mat1.cols * mat1.elemSize(), CV_8U, (void*)mat1.ptr(i));
+            rng.fill(row1, cv::RNG::UNIFORM, cv::Scalar(0), cv::Scalar(255));
+
+            cv::Mat row2(1, mat2.cols * mat2.elemSize(), CV_8U, (void*)mat2.ptr(i));
+            rng.fill(row2, cv::RNG::UNIFORM, cv::Scalar(0), cv::Scalar(255));
         }
+
+        dst_gold = mat1 & mat2;
     }
+};
+
+TEST_P(BitwiseAnd, Accuracy) 
+{
+    if (mat1.depth() == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+        return;
+
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type)
+    PRINT_PARAM(size);
+
+    cv::Mat dst;
+    
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat dev_dst;
+
+        cv::gpu::bitwise_and(cv::gpu::GpuMat(mat1), cv::gpu::GpuMat(mat2), dev_dst);
+
+        dev_dst.download(dst);
+    );
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+}
+
+INSTANTIATE_TEST_CASE_P(Arithm, BitwiseAnd, testing::Combine(
+                        testing::ValuesIn(devices()),
+                        testing::ValuesIn(all_types())));
+
+struct BitwiseXor : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
+{
+    cv::gpu::DeviceInfo devInfo;
+    int type;
+
+    cv::Size size;
+    cv::Mat mat1;
+    cv::Mat mat2;
+
+    cv::Mat dst_gold;
 
-    void gen(int cols, int rows, int type, Mat& m)
+    virtual void SetUp() 
     {
-        m.create(rows, cols, type);
-        RNG rng;
-        rng.fill(m, RNG::UNIFORM, Scalar::all(0), Scalar::all(16));
+        devInfo = std::tr1::get<0>(GetParam());
+        type = std::tr1::get<1>(GetParam());
 
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+        size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
+
+        mat1.create(size, type);
+        mat2.create(size, type);
+        
+        for (int i = 0; i < mat1.rows; ++i)
+        {
+            cv::Mat row1(1, mat1.cols * mat1.elemSize(), CV_8U, (void*)mat1.ptr(i));
+            rng.fill(row1, cv::RNG::UNIFORM, cv::Scalar(0), cv::Scalar(255));
+
+            cv::Mat row2(1, mat2.cols * mat2.elemSize(), CV_8U, (void*)mat2.ptr(i));
+            rng.fill(row2, cv::RNG::UNIFORM, cv::Scalar(0), cv::Scalar(255));
+        }
+
+        dst_gold = mat1 ^ mat2;
     }
 };
 
-TEST(add, accuracy) { CV_GpuNppImageAddTest test; test.safe_run(); }
-TEST(subtract, accuracy) { CV_GpuNppImageSubtractTest test; test.safe_run(); }
-TEST(multiply, accuracy) { CV_GpuNppImageMultiplyTest test; test.safe_run(); }
-TEST(divide, accuracy) { CV_GpuNppImageDivideTest test; test.safe_run(); }
-TEST(transpose, accuracy) { CV_GpuNppImageTransposeTest test; test.safe_run(); }
-TEST(absdiff, accuracy) { CV_GpuNppImageAbsdiffTest test; test.safe_run(); }
-TEST(compare, accuracy) { CV_GpuNppImageCompareTest test; test.safe_run(); }
-TEST(meanStdDev, accuracy) { CV_GpuNppImageMeanStdDevTest test; test.safe_run(); }
-TEST(normDiff, accuracy) { CV_GpuNppImageNormTest test; test.safe_run(); }
-TEST(flip, accuracy) { CV_GpuNppImageFlipTest test; test.safe_run(); }
-TEST(LUT, accuracy) { CV_GpuNppImageLUTTest test; test.safe_run(); }
-TEST(exp, accuracy) { CV_GpuNppImageExpTest test; test.safe_run(); }
-TEST(log, accuracy) { CV_GpuNppImageLogTest test; test.safe_run(); }
-TEST(magnitude, accuracy) { CV_GpuNppImageMagnitudeTest test; test.safe_run(); }
-TEST(phase, accuracy) { CV_GpuNppImagePhaseTest test; test.safe_run(); }
-TEST(cartToPolar, accuracy) { CV_GpuNppImageCartToPolarTest test; test.safe_run(); }
-TEST(polarToCart, accuracy) { CV_GpuNppImagePolarToCartTest test; test.safe_run(); }
-TEST(minMax, accuracy) { CV_GpuMinMaxTest test; test.safe_run(); }
-TEST(minMaxLoc, accuracy) { CV_GpuMinMaxLocTest test; test.safe_run(); }
-TEST(countNonZero, accuracy) { CV_GpuCountNonZeroTest test; test.safe_run(); }
-TEST(sum, accuracy) { CV_GpuSumTest test; test.safe_run(); }
+TEST_P(BitwiseXor, Accuracy) 
+{
+    if (mat1.depth() == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+        return;
+
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type)
+    PRINT_PARAM(size);
+
+    cv::Mat dst;
+    
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat dev_dst;
+
+        cv::gpu::bitwise_xor(cv::gpu::GpuMat(mat1), cv::gpu::GpuMat(mat2), dev_dst);
+
+        dev_dst.download(dst);
+    );
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+}
+
+INSTANTIATE_TEST_CASE_P(Arithm, BitwiseXor, testing::Combine(
+                        testing::ValuesIn(devices()),
+                        testing::ValuesIn(all_types())));
+
+#endif // HAVE_CUDA
diff --git a/modules/gpu/test/test_bitwise_oper.cpp b/modules/gpu/test/test_bitwise_oper.cpp
deleted file mode 100644
index 6b05053e13..0000000000
--- a/modules/gpu/test/test_bitwise_oper.cpp
+++ /dev/null
@@ -1,235 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                        Intel License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000, Intel Corporation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of Intel Corporation may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include <iostream>
-#include <limits>
-#include "test_precomp.hpp"
-
-#define CHECK(pred, err) if (!(pred)) { \
-    ts->printf(cvtest::TS::CONSOLE, "Fail: \"%s\" at line: %d\n", #pred, __LINE__); \
-    ts->set_failed_test_info(err); \
-    return; }
-
-using namespace cv;
-using namespace std;
-
-struct CV_GpuBitwiseTest: public cvtest::BaseTest
-{
-    CV_GpuBitwiseTest() {}
-
-    void run(int)
-    {
-        int rows, cols;
-
-        bool double_ok = gpu::TargetArchs::builtWith(gpu::NATIVE_DOUBLE) && 
-                         gpu::DeviceInfo().supports(gpu::NATIVE_DOUBLE);
-        int depth_end = double_ok ? CV_64F : CV_32F;
-
-        for (int depth = CV_8U; depth <= depth_end; ++depth)
-            for (int cn = 1; cn <= 4; ++cn)
-                for (int attempt = 0; attempt < 3; ++attempt)
-                {
-                    rows = 1 + rand() % 100;
-                    cols = 1 + rand() % 100;
-                    test_bitwise_not(rows, cols, CV_MAKETYPE(depth, cn));
-                    test_bitwise_or(rows, cols, CV_MAKETYPE(depth, cn));
-                    test_bitwise_and(rows, cols, CV_MAKETYPE(depth, cn));
-                    test_bitwise_xor(rows, cols, CV_MAKETYPE(depth, cn));
-                }
-    }
-
-    void test_bitwise_not(int rows, int cols, int type)
-    {
-        Mat src(rows, cols, type);
-
-        RNG rng;
-        for (int i = 0; i < src.rows; ++i)
-        {
-            Mat row(1, src.cols * src.elemSize(), CV_8U, src.ptr(i));
-            rng.fill(row, RNG::UNIFORM, Scalar(0), Scalar(255));
-        }
-
-        Mat dst_gold = ~src;
-
-        gpu::GpuMat mask(src.size(), CV_8U);
-        mask.setTo(Scalar(1));
-
-        gpu::GpuMat dst;
-        gpu::bitwise_not(gpu::GpuMat(src), dst);
-
-        CHECK(dst_gold.size() == dst.size(), cvtest::TS::FAIL_INVALID_OUTPUT);
-        CHECK(dst_gold.type() == dst.type(), cvtest::TS::FAIL_INVALID_OUTPUT);        
-
-        Mat dsth(dst);
-        for (int i = 0; i < dst_gold.rows; ++i)       
-            CHECK(memcmp(dst_gold.ptr(i), dsth.ptr(i), dst_gold.cols * dst_gold.elemSize()) == 0, cvtest::TS::FAIL_INVALID_OUTPUT);
-
-        dst.setTo(Scalar::all(0));
-        gpu::bitwise_not(gpu::GpuMat(src), dst, mask);
-
-        CHECK(dst_gold.size() == dst.size(), cvtest::TS::FAIL_INVALID_OUTPUT);
-        CHECK(dst_gold.type() == dst.type(), cvtest::TS::FAIL_INVALID_OUTPUT);        
-
-        dsth = dst;
-        for (int i = 0; i < dst_gold.rows; ++i)       
-            CHECK(memcmp(dst_gold.ptr(i), dsth.ptr(i), dst_gold.cols * dst_gold.elemSize()) == 0, cvtest::TS::FAIL_INVALID_OUTPUT)
-    }
-
-    void test_bitwise_or(int rows, int cols, int type)
-    {
-        Mat src1(rows, cols, type);
-        Mat src2(rows, cols, type);
-
-        RNG rng;
-        for (int i = 0; i < src1.rows; ++i)
-        {
-            Mat row1(1, src1.cols * src1.elemSize(), CV_8U, src1.ptr(i));
-            rng.fill(row1, RNG::UNIFORM, Scalar(0), Scalar(255));
-            Mat row2(1, src2.cols * src2.elemSize(), CV_8U, src2.ptr(i));
-            rng.fill(row2, RNG::UNIFORM, Scalar(0), Scalar(255));
-        }
-
-        Mat dst_gold = src1 | src2;
-        gpu::GpuMat dst = gpu::GpuMat(src1) | gpu::GpuMat(src2);
-
-        CHECK(dst_gold.size() == dst.size(), cvtest::TS::FAIL_INVALID_OUTPUT);
-        CHECK(dst_gold.type() == dst.type(), cvtest::TS::FAIL_INVALID_OUTPUT);        
-        Mat dsth(dst);
-        for (int i = 0; i < dst_gold.rows; ++i)       
-            CHECK(memcmp(dst_gold.ptr(i), dsth.ptr(i), dst_gold.cols * dst_gold.elemSize()) == 0, cvtest::TS::FAIL_INVALID_OUTPUT)
-
-        Mat mask(src1.size(), CV_8U);
-        randu(mask, Scalar(0), Scalar(255));
-
-        Mat dst_gold2(dst_gold.size(), dst_gold.type()); dst_gold2.setTo(Scalar::all(0));
-        gpu::GpuMat dst2(dst.size(), dst.type()); dst2.setTo(Scalar::all(0));
-        bitwise_or(src1, src2, dst_gold2, mask);
-        gpu::bitwise_or(gpu::GpuMat(src1), gpu::GpuMat(src2), dst2, gpu::GpuMat(mask));
-
-        CHECK(dst_gold2.size() == dst2.size(), cvtest::TS::FAIL_INVALID_OUTPUT);
-        CHECK(dst_gold2.type() == dst2.type(), cvtest::TS::FAIL_INVALID_OUTPUT);        
-        dsth = dst2;
-        for (int i = 0; i < dst_gold.rows; ++i)       
-            CHECK(memcmp(dst_gold2.ptr(i), dsth.ptr(i), dst_gold2.cols * dst_gold2.elemSize()) == 0, cvtest::TS::FAIL_INVALID_OUTPUT)
-    }
-
-    void test_bitwise_and(int rows, int cols, int type)
-    {
-        Mat src1(rows, cols, type);
-        Mat src2(rows, cols, type);
-
-        RNG rng;
-        for (int i = 0; i < src1.rows; ++i)
-        {
-            Mat row1(1, src1.cols * src1.elemSize(), CV_8U, src1.ptr(i));
-            rng.fill(row1, RNG::UNIFORM, Scalar(0), Scalar(255));
-            Mat row2(1, src2.cols * src2.elemSize(), CV_8U, src2.ptr(i));
-            rng.fill(row2, RNG::UNIFORM, Scalar(0), Scalar(255));
-        }
-
-        Mat dst_gold = src1 & src2;
-
-        gpu::GpuMat dst = gpu::GpuMat(src1) & gpu::GpuMat(src2);
-
-        CHECK(dst_gold.size() == dst.size(), cvtest::TS::FAIL_INVALID_OUTPUT);
-        CHECK(dst_gold.type() == dst.type(), cvtest::TS::FAIL_INVALID_OUTPUT);        
-        Mat dsth(dst);
-        for (int i = 0; i < dst_gold.rows; ++i)       
-            CHECK(memcmp(dst_gold.ptr(i), dsth.ptr(i), dst_gold.cols * dst_gold.elemSize()) == 0, cvtest::TS::FAIL_INVALID_OUTPUT)
-
-
-        Mat mask(src1.size(), CV_8U);
-        randu(mask, Scalar(0), Scalar(255));
-
-        Mat dst_gold2(dst_gold.size(), dst_gold.type()); dst_gold2.setTo(Scalar::all(0));
-        gpu::GpuMat dst2(dst.size(), dst.type()); dst2.setTo(Scalar::all(0));
-        bitwise_and(src1, src2, dst_gold2, mask);
-        gpu::bitwise_and(gpu::GpuMat(src1), gpu::GpuMat(src2), dst2, gpu::GpuMat(mask));
-
-        CHECK(dst_gold2.size() == dst2.size(), cvtest::TS::FAIL_INVALID_OUTPUT);
-        CHECK(dst_gold2.type() == dst2.type(), cvtest::TS::FAIL_INVALID_OUTPUT);        
-        dsth = dst2;
-        for (int i = 0; i < dst_gold.rows; ++i)       
-            CHECK(memcmp(dst_gold2.ptr(i), dsth.ptr(i), dst_gold2.cols * dst_gold2.elemSize()) == 0, cvtest::TS::FAIL_INVALID_OUTPUT)
-    }
-
-    void test_bitwise_xor(int rows, int cols, int type)
-    {
-        Mat src1(rows, cols, type);
-        Mat src2(rows, cols, type);
-
-        RNG rng;
-        for (int i = 0; i < src1.rows; ++i)
-        {
-            Mat row1(1, src1.cols * src1.elemSize(), CV_8U, src1.ptr(i));
-            rng.fill(row1, RNG::UNIFORM, Scalar(0), Scalar(255));
-            Mat row2(1, src2.cols * src2.elemSize(), CV_8U, src2.ptr(i));
-            rng.fill(row2, RNG::UNIFORM, Scalar(0), Scalar(255));
-        }
-
-        Mat dst_gold = src1 ^ src2;
-
-        gpu::GpuMat dst = gpu::GpuMat(src1) ^ gpu::GpuMat(src2);
-
-        CHECK(dst_gold.size() == dst.size(), cvtest::TS::FAIL_INVALID_OUTPUT);
-        CHECK(dst_gold.type() == dst.type(), cvtest::TS::FAIL_INVALID_OUTPUT);        
-        Mat dsth(dst);
-        for (int i = 0; i < dst_gold.rows; ++i)       
-            CHECK(memcmp(dst_gold.ptr(i), dsth.ptr(i), dst_gold.cols * dst_gold.elemSize()) == 0, cvtest::TS::FAIL_INVALID_OUTPUT)
-
-
-        Mat mask(src1.size(), CV_8U);
-        randu(mask, Scalar(0), Scalar(255));
-
-        Mat dst_gold2(dst_gold.size(), dst_gold.type()); dst_gold2.setTo(Scalar::all(0));
-        gpu::GpuMat dst2(dst.size(), dst.type()); dst2.setTo(Scalar::all(0));
-        bitwise_xor(src1, src2, dst_gold2, mask);
-        gpu::bitwise_xor(gpu::GpuMat(src1), gpu::GpuMat(src2), dst2, gpu::GpuMat(mask));
-
-        CHECK(dst_gold2.size() == dst2.size(), cvtest::TS::FAIL_INVALID_OUTPUT);
-        CHECK(dst_gold2.type() == dst2.type(), cvtest::TS::FAIL_INVALID_OUTPUT);        
-        dsth = dst2;
-        for (int i = 0; i < dst_gold.rows; ++i)       
-            CHECK(memcmp(dst_gold2.ptr(i), dsth.ptr(i), dst_gold2.cols * dst_gold2.elemSize()) == 0, cvtest::TS::FAIL_INVALID_OUTPUT)
-    }
-};
-
-TEST(BitwiseOperations, accuracy) { CV_GpuBitwiseTest test; test.safe_run(); }
diff --git a/modules/gpu/test/test_blend.cpp b/modules/gpu/test/test_blend.cpp
deleted file mode 100644
index 947969593c..0000000000
--- a/modules/gpu/test/test_blend.cpp
+++ /dev/null
@@ -1,100 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                        Intel License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000, Intel Corporation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of Intel Corporation may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
-
-using namespace std;
-using namespace cv;
-using namespace cv::gpu;
-
-TEST(blendLinear, accuracy_on_8U)
-{
-    RNG& rng = cvtest::TS::ptr()->get_rng();
-    Size size(200 + cvtest::randInt(rng) % 1000,
-              200 + cvtest::randInt(rng) % 1000);
-    for (int cn = 1; cn <= 4; ++cn)
-    {
-        Mat img1 = cvtest::randomMat(rng, size, CV_MAKE_TYPE(CV_8U, cn), 0, 255, false);
-        Mat img2 = cvtest::randomMat(rng, size, CV_MAKE_TYPE(CV_8U, cn), 0, 255, false);
-        Mat weights1 = cvtest::randomMat(rng, size, CV_32F, 0, 1, false);
-        Mat weights2 = cvtest::randomMat(rng, size, CV_32F, 0, 1, false);
-        Mat result_gold(size, CV_MAKE_TYPE(CV_8U, cn));
-        for (int y = 0; y < size.height; ++y)
-            for (int x = 0; x < size.width * cn; ++x)
-            {
-                float w1 = weights1.at<float>(y, x / cn);
-                float w2 = weights2.at<float>(y, x / cn);
-                result_gold.at<uchar>(y, x) = static_cast<uchar>(
-                    (img1.at<uchar>(y, x) * w1 + img2.at<uchar>(y, x) * w2) / (w1 + w2 + 1e-5f));
-            }
-        GpuMat d_result;
-        blendLinear(GpuMat(img1), GpuMat(img2), GpuMat(weights1), GpuMat(weights2), d_result);
-        ASSERT_LE(cvtest::norm(result_gold, Mat(d_result), NORM_INF), 1) 
-            << "rows=" << size.height << ", cols=" << size.width << ", cn=" << cn;
-    }
-}
-
-TEST(blendLinear, accuracy_on_32F)
-{
-    RNG& rng = cvtest::TS::ptr()->get_rng();
-    Size size(200 + cvtest::randInt(rng) % 1000,
-              200 + cvtest::randInt(rng) % 1000);
-    for (int cn = 1; cn <= 4; ++cn)
-    {
-        Mat img1 = cvtest::randomMat(rng, size, CV_MAKE_TYPE(CV_32F, cn), 0, 1, false);
-        Mat img2 = cvtest::randomMat(rng, size, CV_MAKE_TYPE(CV_32F, cn), 0, 1, false);
-        Mat weights1 = cvtest::randomMat(rng, size, CV_32F, 0, 1, false);
-        Mat weights2 = cvtest::randomMat(rng, size, CV_32F, 0, 1, false);
-        Mat result_gold(size, CV_MAKE_TYPE(CV_32F, cn));
-        for (int y = 0; y < size.height; ++y)
-            for (int x = 0; x < size.width * cn; ++x)
-            {
-                float w1 = weights1.at<float>(y, x / cn);
-                float w2 = weights2.at<float>(y, x / cn);
-                result_gold.at<float>(y, x) = 
-                    (img1.at<float>(y, x) * w1 + img2.at<float>(y, x) * w2) / (w1 + w2 + 1e-5f);
-            }
-        GpuMat d_result;
-        blendLinear(GpuMat(img1), GpuMat(img2), GpuMat(weights1), GpuMat(weights2), d_result);
-        ASSERT_LE(cvtest::norm(result_gold, Mat(d_result), NORM_INF), 1e-3)
-            << "rows=" << size.height << ", cols=" << size.width << ", cn=" << cn;
-    }
-}
diff --git a/modules/gpu/test/test_brute_force_matcher.cpp b/modules/gpu/test/test_brute_force_matcher.cpp
deleted file mode 100644
index 59af9f04f9..0000000000
--- a/modules/gpu/test/test_brute_force_matcher.cpp
+++ /dev/null
@@ -1,522 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                        Intel License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000, Intel Corporation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of Intel Corporation may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
-#include <algorithm>
-#include <iterator>
-
-using namespace cv;
-using namespace cv::gpu;
-using namespace std;
-
-class CV_GpuBruteForceMatcherTest : public cvtest::BaseTest
-{
-public:
-    CV_GpuBruteForceMatcherTest()
-    {
-    }
-
-protected:
-    virtual void run(int);
-    
-    void emptyDataTest();
-    void dataTest(int dim);
-    
-    void generateData(GpuMat& query, GpuMat& train, int dim);
-
-    void matchTest(const GpuMat& query, const GpuMat& train);
-    void knnMatchTest(const GpuMat& query, const GpuMat& train);
-    void radiusMatchTest(const GpuMat& query, const GpuMat& train);
-
-private:
-    BruteForceMatcher_GPU< L2<float> > dmatcher;
-
-    static const int queryDescCount = 300; // must be even number because we split train data in some cases in two
-    static const int countFactor = 4; // do not change it
-};
-
-void CV_GpuBruteForceMatcherTest::emptyDataTest()
-{
-    GpuMat queryDescriptors, trainDescriptors, mask;
-    vector<GpuMat> trainDescriptorCollection, masks;
-    vector<DMatch> matches;
-    vector< vector<DMatch> > vmatches;
-
-    try
-    {
-        dmatcher.match(queryDescriptors, trainDescriptors, matches, mask);
-    }
-    catch(...)
-    {
-        ts->printf( cvtest::TS::LOG, "match() on empty descriptors must not generate exception (1).\n" );
-        ts->set_failed_test_info( cvtest::TS::FAIL_INVALID_OUTPUT );
-    }
-
-    try
-    {
-        dmatcher.knnMatch(queryDescriptors, trainDescriptors, vmatches, 2, mask);
-    }
-    catch(...)
-    {
-        ts->printf( cvtest::TS::LOG, "knnMatch() on empty descriptors must not generate exception (1).\n" );
-        ts->set_failed_test_info( cvtest::TS::FAIL_INVALID_OUTPUT );
-    }
-
-    try
-    {
-        dmatcher.radiusMatch(queryDescriptors, trainDescriptors, vmatches, 10.f, mask);
-    }
-    catch(...)
-    {
-        ts->printf( cvtest::TS::LOG, "radiusMatch() on empty descriptors must not generate exception (1).\n" );
-        ts->set_failed_test_info( cvtest::TS::FAIL_INVALID_OUTPUT );
-    }
-
-    try
-    {
-        dmatcher.add(trainDescriptorCollection);
-    }
-    catch(...)
-    {
-        ts->printf( cvtest::TS::LOG, "add() on empty descriptors must not generate exception.\n" );
-        ts->set_failed_test_info( cvtest::TS::FAIL_INVALID_OUTPUT );
-    }
-
-    try
-    {
-        dmatcher.match(queryDescriptors, matches, masks);
-    }
-    catch(...)
-    {
-        ts->printf( cvtest::TS::LOG, "match() on empty descriptors must not generate exception (2).\n" );
-        ts->set_failed_test_info( cvtest::TS::FAIL_INVALID_OUTPUT );
-    }
-
-    try
-    {
-        dmatcher.knnMatch(queryDescriptors, vmatches, 2, masks);
-    }
-    catch(...)
-    {
-        ts->printf( cvtest::TS::LOG, "knnMatch() on empty descriptors must not generate exception (2).\n" );
-        ts->set_failed_test_info( cvtest::TS::FAIL_INVALID_OUTPUT );
-    }
-
-    try
-    {
-        dmatcher.radiusMatch( queryDescriptors, vmatches, 10.f, masks );
-    }
-    catch(...)
-    {
-        ts->printf( cvtest::TS::LOG, "radiusMatch() on empty descriptors must not generate exception (2).\n" );
-        ts->set_failed_test_info( cvtest::TS::FAIL_INVALID_OUTPUT );
-    }
-
-}
-
-void CV_GpuBruteForceMatcherTest::generateData( GpuMat& queryGPU, GpuMat& trainGPU, int dim )
-{
-    Mat query, train;
-    RNG& rng = ts->get_rng();
-
-    // Generate query descriptors randomly.
-    // Descriptor vector elements are integer values.
-    Mat buf( queryDescCount, dim, CV_32SC1 );
-    rng.fill( buf, RNG::UNIFORM, Scalar::all(0), Scalar(3) );
-    buf.convertTo( query, CV_32FC1 );
-
-    // Generate train decriptors as follows:
-    // copy each query descriptor to train set countFactor times
-    // and perturb some one element of the copied descriptors in
-    // in ascending order. General boundaries of the perturbation
-    // are (0.f, 1.f).
-    train.create( query.rows*countFactor, query.cols, CV_32FC1 );
-    float step = 1.f / countFactor;
-    for( int qIdx = 0; qIdx < query.rows; qIdx++ )
-    {
-        Mat queryDescriptor = query.row(qIdx);
-        for( int c = 0; c < countFactor; c++ )
-        {
-            int tIdx = qIdx * countFactor + c;
-            Mat trainDescriptor = train.row(tIdx);
-            queryDescriptor.copyTo( trainDescriptor );
-            int elem = rng(dim);
-            float diff = rng.uniform( step*c, step*(c+1) );
-            trainDescriptor.at<float>(0, elem) += diff;
-        }
-    }
-
-    queryGPU.upload(query);
-    trainGPU.upload(train);
-}
-
-void CV_GpuBruteForceMatcherTest::matchTest( const GpuMat& query, const GpuMat& train )
-{
-    dmatcher.clear();
-
-    // test const version of match()
-    {
-        vector<DMatch> matches;
-        dmatcher.match( query, train, matches );
-
-        if( (int)matches.size() != queryDescCount )
-        {
-            ts->printf(cvtest::TS::LOG, "Incorrect matches count while test match() function (1).\n");
-            ts->set_failed_test_info( cvtest::TS::FAIL_INVALID_OUTPUT );
-        }
-        else
-        {
-            int badCount = 0;
-            for( size_t i = 0; i < matches.size(); i++ )
-            {
-                DMatch match = matches[i];
-                if( (match.queryIdx != (int)i) || (match.trainIdx != (int)i*countFactor) || (match.imgIdx != 0) )
-                    badCount++;
-            }
-            if (badCount > 0)
-            {
-                ts->printf( cvtest::TS::LOG, "%f - too large bad matches part while test match() function (1).\n",
-                            (float)badCount/(float)queryDescCount );
-                ts->set_failed_test_info( cvtest::TS::FAIL_INVALID_OUTPUT );
-            }
-        }
-    }
-
-    // test version of match() with add()
-    {
-        vector<DMatch> matches;
-        // make add() twice to test such case
-        dmatcher.add( vector<GpuMat>(1,train.rowRange(0, train.rows/2)) );
-        dmatcher.add( vector<GpuMat>(1,train.rowRange(train.rows/2, train.rows)) );
-        // prepare masks (make first nearest match illegal)
-        vector<GpuMat> masks(2);
-        for(int mi = 0; mi < 2; mi++ )
-        {
-            masks[mi] = GpuMat(query.rows, train.rows/2, CV_8UC1, Scalar::all(1));
-            for( int di = 0; di < queryDescCount/2; di++ )
-                masks[mi].col(di*countFactor).setTo(Scalar::all(0));
-        }
-
-        dmatcher.match( query, matches, masks );
-
-        if( (int)matches.size() != queryDescCount )
-        {
-            ts->printf(cvtest::TS::LOG, "Incorrect matches count while test match() function (2).\n");
-            ts->set_failed_test_info( cvtest::TS::FAIL_INVALID_OUTPUT );
-        }
-        else
-        {
-            int badCount = 0;
-            for( size_t i = 0; i < matches.size(); i++ )
-            {
-                DMatch match = matches[i];
-                int shift = dmatcher.isMaskSupported() ? 1 : 0;
-                {
-                    if( i < queryDescCount/2 )
-                    {
-                        if( (match.queryIdx != (int)i) || (match.trainIdx != (int)i*countFactor + shift) || (match.imgIdx != 0) )
-                            badCount++;
-                    }
-                    else
-                    {
-                        if( (match.queryIdx != (int)i) || (match.trainIdx != ((int)i-queryDescCount/2)*countFactor + shift) || (match.imgIdx != 1) )
-                            badCount++;
-                    }
-                }
-            }
-            if (badCount > 0)
-            {
-                ts->printf( cvtest::TS::LOG, "%f - too large bad matches part while test match() function (2).\n",
-                            (float)badCount/(float)queryDescCount );
-                ts->set_failed_test_info( cvtest::TS::FAIL_BAD_ACCURACY );
-            }
-        }
-    }
-}
-
-void CV_GpuBruteForceMatcherTest::knnMatchTest( const GpuMat& query, const GpuMat& train )
-{
-    dmatcher.clear();
-
-    // test const version of knnMatch()
-    {
-        const int knn = 3;
-
-        vector< vector<DMatch> > matches;
-        dmatcher.knnMatch( query, train, matches, knn );
-
-        if( (int)matches.size() != queryDescCount )
-        {
-            ts->printf(cvtest::TS::LOG, "Incorrect matches count while test knnMatch() function (1).\n");
-            ts->set_failed_test_info( cvtest::TS::FAIL_INVALID_OUTPUT );
-        }
-        else
-        {
-            int badCount = 0;
-            for( size_t i = 0; i < matches.size(); i++ )
-            {
-                if( (int)matches[i].size() != knn )
-                    badCount++;
-                else
-                {
-                    int localBadCount = 0;
-                    for( int k = 0; k < knn; k++ )
-                    {
-                        DMatch match = matches[i][k];
-                        if( (match.queryIdx != (int)i) || (match.trainIdx != (int)i*countFactor+k) || (match.imgIdx != 0) )
-                            localBadCount++;
-                    }
-                    badCount += localBadCount > 0 ? 1 : 0;
-                }
-            }
-            if (badCount > 0)
-            {
-                ts->printf( cvtest::TS::LOG, "%f - too large bad matches part while test knnMatch() function (1).\n",
-                            (float)badCount/(float)queryDescCount );
-                ts->set_failed_test_info( cvtest::TS::FAIL_INVALID_OUTPUT );
-            }
-        }
-    }
-
-    // test version of knnMatch() with add()
-    {
-        const int knn = 2;
-        vector<vector<DMatch> > matches;
-        // make add() twice to test such case
-        dmatcher.add( vector<GpuMat>(1,train.rowRange(0, train.rows/2)) );
-        dmatcher.add( vector<GpuMat>(1,train.rowRange(train.rows/2, train.rows)) );
-        // prepare masks (make first nearest match illegal)
-        vector<GpuMat> masks(2);
-        for(int mi = 0; mi < 2; mi++ )
-        {
-            masks[mi] = GpuMat(query.rows, train.rows/2, CV_8UC1, Scalar::all(1));
-            for( int di = 0; di < queryDescCount/2; di++ )
-                masks[mi].col(di*countFactor).setTo(Scalar::all(0));
-        }
-
-        dmatcher.knnMatch( query, matches, knn, masks );
-
-        if( (int)matches.size() != queryDescCount )
-        {
-            ts->printf(cvtest::TS::LOG, "Incorrect matches count while test knnMatch() function (2).\n");
-            ts->set_failed_test_info( cvtest::TS::FAIL_INVALID_OUTPUT );
-        }
-        else
-        {
-            int badCount = 0;
-            int shift = dmatcher.isMaskSupported() ? 1 : 0;
-            for( size_t i = 0; i < matches.size(); i++ )
-            {
-                if( (int)matches[i].size() != knn )
-                    badCount++;
-                else
-                {
-                    int localBadCount = 0;
-                    for( int k = 0; k < knn; k++ )
-                    {
-                        DMatch match = matches[i][k];
-                        {
-                            if( i < queryDescCount/2 )
-                            {
-                                if( (match.queryIdx != (int)i) || (match.trainIdx != (int)i*countFactor + k + shift) ||
-                                    (match.imgIdx != 0) )
-                                    localBadCount++;
-                            }
-                            else
-                            {
-                                if( (match.queryIdx != (int)i) || (match.trainIdx != ((int)i-queryDescCount/2)*countFactor + k + shift) ||
-                                    (match.imgIdx != 1) )
-                                    localBadCount++;
-                            }
-                        }
-                    }
-                    badCount += localBadCount > 0 ? 1 : 0;
-                }
-            }
-            if (badCount > 0)
-            {
-                ts->printf( cvtest::TS::LOG, "%f - too large bad matches part while test knnMatch() function (2).\n",
-                            (float)badCount/(float)queryDescCount );
-                ts->set_failed_test_info( cvtest::TS::FAIL_BAD_ACCURACY );
-            }
-        }
-    }
-}
-
-void CV_GpuBruteForceMatcherTest::radiusMatchTest( const GpuMat& query, const GpuMat& train )
-{
-    bool atomics_ok = TargetArchs::builtWith(GLOBAL_ATOMICS) && DeviceInfo().supports(GLOBAL_ATOMICS);
-    if (!atomics_ok)
-    {
-        ts->printf(cvtest::TS::CONSOLE, "\nCode and device atomics support is required for radiusMatch (CC >= 1.1)");
-        ts->set_failed_test_info(cvtest::TS::FAIL_GENERIC);
-        return;
-    }
-
-    dmatcher.clear();
-    // test const version of match()
-    {
-        const float radius = 1.f/countFactor;
-        vector< vector<DMatch> > matches;
-        dmatcher.radiusMatch( query, train, matches, radius );
-
-        if( (int)matches.size() != queryDescCount )
-        {
-            ts->printf(cvtest::TS::LOG, "Incorrect matches count while test radiusMatch() function (1).\n");
-            ts->set_failed_test_info( cvtest::TS::FAIL_INVALID_OUTPUT );
-        }
-        else
-        {
-            int badCount = 0;
-            for( size_t i = 0; i < matches.size(); i++ )
-            {
-                if( (int)matches[i].size() != 1 )
-                    badCount++;
-                else
-                {
-                    DMatch match = matches[i][0];
-                    if( (match.queryIdx != (int)i) || (match.trainIdx != (int)i*countFactor) || (match.imgIdx != 0) )
-                        badCount++;
-                }
-            }
-            if (badCount > 0)
-            {
-                ts->printf( cvtest::TS::LOG, "%f - too large bad matches part while test radiusMatch() function (1).\n",
-                            (float)badCount/(float)queryDescCount );
-                ts->set_failed_test_info( cvtest::TS::FAIL_INVALID_OUTPUT );
-            }
-        }
-    }
-
-    // test version of match() with add()
-    {
-        int n = 3;
-        const float radius = 1.f/countFactor * n;
-        vector< vector<DMatch> > matches;
-        // make add() twice to test such case
-        dmatcher.add( vector<GpuMat>(1,train.rowRange(0, train.rows/2)) );
-        dmatcher.add( vector<GpuMat>(1,train.rowRange(train.rows/2, train.rows)) );
-        // prepare masks (make first nearest match illegal)
-        vector<GpuMat> masks(2);
-        for(int mi = 0; mi < 2; mi++ )
-        {
-            masks[mi] = GpuMat(query.rows, train.rows/2, CV_8UC1, Scalar::all(1));
-            for( int di = 0; di < queryDescCount/2; di++ )
-                masks[mi].col(di*countFactor).setTo(Scalar::all(0));
-        }
-
-        dmatcher.radiusMatch( query, matches, radius, masks );
-
-        int curRes = cvtest::TS::OK;
-        if( (int)matches.size() != queryDescCount )
-        {
-            ts->printf(cvtest::TS::LOG, "Incorrect matches count while test radiusMatch() function (1).\n");
-            ts->set_failed_test_info( cvtest::TS::FAIL_INVALID_OUTPUT );
-        }
-
-        int badCount = 0;
-        int shift = dmatcher.isMaskSupported() ? 1 : 0;
-        int needMatchCount = dmatcher.isMaskSupported() ? n-1 : n;
-        for( size_t i = 0; i < matches.size(); i++ )
-        {
-            if( (int)matches[i].size() != needMatchCount )
-                badCount++;
-            else
-            {
-                int localBadCount = 0;
-                for( int k = 0; k < needMatchCount; k++ )
-                {
-                    DMatch match = matches[i][k];
-                    {
-                        if( i < queryDescCount/2 )
-                        {
-                            if( (match.queryIdx != (int)i) || (match.trainIdx != (int)i*countFactor + k + shift) ||
-                                (match.imgIdx != 0) )
-                                localBadCount++;
-                        }
-                        else
-                        {
-                            if( (match.queryIdx != (int)i) || (match.trainIdx != ((int)i-queryDescCount/2)*countFactor + k + shift) ||
-                                (match.imgIdx != 1) )
-                                localBadCount++;
-                        }
-                    }
-                }
-                badCount += localBadCount > 0 ? 1 : 0;
-            }
-        }
-
-        if (badCount > 0)
-        {
-            curRes = cvtest::TS::FAIL_INVALID_OUTPUT;
-            ts->printf( cvtest::TS::LOG, "%f - too large bad matches part while test radiusMatch() function (2).\n",
-                        (float)badCount/(float)queryDescCount );
-            ts->set_failed_test_info( cvtest::TS::FAIL_BAD_ACCURACY );
-        }
-    }
-}
-
-void CV_GpuBruteForceMatcherTest::dataTest(int dim)
-{
-    GpuMat query, train;
-    generateData(query, train, dim);
-
-    matchTest(query, train);
-    knnMatchTest(query, train);
-    radiusMatchTest(query, train);
-
-    dmatcher.clear();
-}
-
-void CV_GpuBruteForceMatcherTest::run(int)
-{
-    emptyDataTest();
-
-    dataTest(50);
-    dataTest(64);
-    dataTest(100);
-    dataTest(128);
-    dataTest(200);
-    dataTest(256);
-    dataTest(300);
-}
-
-TEST(BruteForceMatcher, accuracy) { CV_GpuBruteForceMatcherTest test; test.safe_run(); }
diff --git a/modules/gpu/test/test_calib3d.cpp b/modules/gpu/test/test_calib3d.cpp
index 3fdcf47d83..45404b26f5 100644
--- a/modules/gpu/test/test_calib3d.cpp
+++ b/modules/gpu/test/test_calib3d.cpp
@@ -1,135 +1,363 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                        Intel License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000, Intel Corporation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of Intel Corporation may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
-
-using namespace cv;
-using namespace cv::gpu;
-using namespace cvtest;
-
-TEST(projectPoints, accuracy)
-{
-    RNG& rng = TS::ptr()->get_rng();
-    Mat src = randomMat(rng, Size(1000, 1), CV_32FC3, 0, 10, false);
-    Mat rvec = randomMat(rng, Size(3, 1), CV_32F, 0, 1, false);
-    Mat tvec = randomMat(rng, Size(3, 1), CV_32F, 0, 1, false);
-    Mat camera_mat = randomMat(rng, Size(3, 3), CV_32F, 0, 1, false);
-    camera_mat.at<float>(0, 1) = 0.f;
-    camera_mat.at<float>(1, 0) = 0.f;
-    camera_mat.at<float>(2, 0) = 0.f;
-    camera_mat.at<float>(2, 1) = 0.f;
-
-    vector<Point2f> dst;
-    projectPoints(src, rvec, tvec, camera_mat, Mat(1, 8, CV_32F, Scalar::all(0)), dst);
-
-    GpuMat d_dst;
-    projectPoints(GpuMat(src), rvec, tvec, camera_mat, Mat(), d_dst);
-
-    ASSERT_EQ(dst.size(), (size_t)d_dst.cols);
-    ASSERT_EQ(1, d_dst.rows);
-    ASSERT_EQ(CV_32FC2, d_dst.type());
-
-    Mat h_dst(d_dst);
-    for (size_t i = 0; i < dst.size(); ++i)
-    {
-        Point2f res_gold = dst[i];
-        Point2f res_actual = h_dst.at<Point2f>(0, i);
-        Point2f err = res_actual - res_gold;
-        ASSERT_LT(err.dot(err) / res_gold.dot(res_gold), 1e-3f);
-    }
-}
-
-
-TEST(transformPoints, accuracy)
-{
-    RNG& rng = TS::ptr()->get_rng();
-    Mat src = randomMat(rng, Size(1000, 1), CV_32FC3, 0, 10, false);
-    Mat rvec = randomMat(rng, Size(3, 1), CV_32F, 0, 1, false);
-    Mat tvec = randomMat(rng, Size(3, 1), CV_32F, 0, 1, false);
-
-    GpuMat d_dst;
-    transformPoints(GpuMat(src), rvec, tvec, d_dst);
-    ASSERT_TRUE(src.size() == d_dst.size());
-    ASSERT_EQ(src.type(), d_dst.type());
-
-    Mat h_dst(d_dst);
-    Mat rot;
-    Rodrigues(rvec, rot);
-    for (int i = 0; i < h_dst.cols; ++i)
-    {
-        Point3f p = src.at<Point3f>(0, i);
-        Point3f res_gold(
-                rot.at<float>(0, 0) * p.x + rot.at<float>(0, 1) * p.y + rot.at<float>(0, 2) * p.z + tvec.at<float>(0, 0),
-                rot.at<float>(1, 0) * p.x + rot.at<float>(1, 1) * p.y + rot.at<float>(1, 2) * p.z + tvec.at<float>(0, 1),
-                rot.at<float>(2, 0) * p.x + rot.at<float>(2, 1) * p.y + rot.at<float>(2, 2) * p.z + tvec.at<float>(0, 2));
-        Point3f res_actual = h_dst.at<Point3f>(0, i);
-        Point3f err = res_actual - res_gold;
-        ASSERT_LT(err.dot(err) / res_gold.dot(res_gold), 1e-3f);
-    }
-}
-
-
-TEST(solvePnPRansac, accuracy)
-{
-    RNG& rng = TS::ptr()->get_rng();
-
-    const int num_points = 5000;
-    Mat object = randomMat(rng, Size(num_points, 1), CV_32FC3, 0, 100, false);
-    Mat camera_mat = randomMat(rng, Size(3, 3), CV_32F, 0.5, 1, false);
-    camera_mat.at<float>(0, 1) = 0.f;
-    camera_mat.at<float>(1, 0) = 0.f;
-    camera_mat.at<float>(2, 0) = 0.f;
-    camera_mat.at<float>(2, 1) = 0.f;
-
-    Mat rvec_gold = randomMat(rng, Size(3, 1), CV_32F, 0, 1, false);
-    Mat tvec_gold = randomMat(rng, Size(3, 1), CV_32F, 0, 1, false);
-
-    vector<Point2f> image_vec;
-    projectPoints(object, rvec_gold, tvec_gold, camera_mat, Mat(1, 8, CV_32F, Scalar::all(0)), image_vec);
-    Mat image(1, image_vec.size(), CV_32FC2, &image_vec[0]);
-
-    Mat rvec, tvec;
-    vector<int> inliers;
-    gpu::solvePnPRansac(object, image, camera_mat, Mat(1, 8, CV_32F, Scalar::all(0)), rvec, tvec, false, 200, 2.f, 100, &inliers);
-
-    ASSERT_LE(norm(rvec - rvec_gold), 1e-3f);
-    ASSERT_LE(norm(tvec - tvec_gold), 1e-3f);
-}
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                        Intel License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of Intel Corporation may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+
+#ifdef HAVE_CUDA
+
+struct StereoTest : testing::TestWithParam<cv::gpu::DeviceInfo>
+{
+    static cv::Mat img_l;
+    static cv::Mat img_r;
+    static cv::Mat img_template;
+
+    static void TearDownTestCase() 
+    {
+        img_l.release();
+        img_r.release();
+        img_template.release();
+    }
+
+    cv::gpu::DeviceInfo devInfo;
+
+    virtual void SetUp() 
+    {
+        devInfo = GetParam();
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+cv::Mat StereoTest::img_l;
+cv::Mat StereoTest::img_r;
+cv::Mat StereoTest::img_template;
+
+//////////////////////////////////////////////////////////////////////////
+// BlockMatching
+
+struct StereoBlockMatching : StereoTest
+{
+    static void SetUpTestCase() 
+    {
+        img_l = readImage("stereobm/aloe-L.png", CV_LOAD_IMAGE_GRAYSCALE);
+        img_r = readImage("stereobm/aloe-R.png", CV_LOAD_IMAGE_GRAYSCALE);
+        img_template = readImage("stereobm/aloe-disp.png", CV_LOAD_IMAGE_GRAYSCALE);
+    }
+};
+
+TEST_P(StereoBlockMatching, Regression) 
+{
+    ASSERT_TRUE(!img_l.empty() && !img_r.empty() && !img_template.empty());
+
+    PRINT_PARAM(devInfo);
+
+    cv::gpu::GpuMat disp;
+    cv::gpu::StereoBM_GPU bm(0, 128, 19);
+
+    bm(cv::gpu::GpuMat(img_l), cv::gpu::GpuMat(img_r), disp);
+
+    disp.convertTo(disp, img_template.type());
+
+    ASSERT_EQ(img_template.size(), disp.size());
+    double norm = cv::norm(img_template, (cv::Mat)disp, cv::NORM_INF);
+    ASSERT_EQ(0.0, norm);
+}
+
+INSTANTIATE_TEST_CASE_P(Calib3D, StereoBlockMatching, testing::ValuesIn(devices()));
+
+//////////////////////////////////////////////////////////////////////////
+// BeliefPropagation
+
+struct StereoBeliefPropagation : StereoTest
+{
+    static void SetUpTestCase() 
+    {
+        img_l = readImage("stereobp/aloe-L.png");
+        img_r = readImage("stereobp/aloe-R.png");
+        img_template = readImage("stereobp/aloe-disp.png", CV_LOAD_IMAGE_GRAYSCALE);
+    }
+};
+
+TEST_P(StereoBeliefPropagation, Regression) 
+{
+    ASSERT_TRUE(!img_l.empty() && !img_r.empty() && !img_template.empty());
+
+    PRINT_PARAM(devInfo);
+
+    cv::gpu::GpuMat disp;
+    cv::gpu::StereoBeliefPropagation bpm(64, 8, 2, 25, 0.1f, 15, 1, CV_16S);
+
+    bpm(cv::gpu::GpuMat(img_l), cv::gpu::GpuMat(img_r), disp);
+
+    disp.convertTo(disp, img_template.type());
+
+    ASSERT_EQ(img_template.size(), disp.size());
+    double norm = cv::norm(img_template, (cv::Mat)disp, cv::NORM_INF);
+    ASSERT_EQ(0.0, norm);
+}
+
+INSTANTIATE_TEST_CASE_P(Calib3D, StereoBeliefPropagation, testing::ValuesIn(devices()));
+
+//////////////////////////////////////////////////////////////////////////
+// ConstantSpaceBP
+
+struct StereoConstantSpaceBP : StereoTest
+{
+    static void SetUpTestCase() 
+    {
+        img_l = readImage("csstereobp/aloe-L.png");
+        img_r = readImage("csstereobp/aloe-R.png");
+    }
+
+    virtual void SetUp() 
+    {
+        StereoTest::SetUp();
+
+        if (supportFeature(GetParam(), cv::gpu::FEATURE_SET_COMPUTE_20))
+            img_template = readImage("csstereobp/aloe-disp.png", CV_LOAD_IMAGE_GRAYSCALE);
+        else
+            img_template = readImage("csstereobp/aloe-disp_CC1X.png", CV_LOAD_IMAGE_GRAYSCALE);
+    }
+};
+
+TEST_P(StereoConstantSpaceBP, Regression) 
+{
+    ASSERT_TRUE(!img_l.empty() && !img_r.empty() && !img_template.empty());
+
+    PRINT_PARAM(devInfo);
+
+    cv::gpu::GpuMat disp;
+    cv::gpu::StereoConstantSpaceBP bpm(128, 16, 4, 4);
+
+    bpm(cv::gpu::GpuMat(img_l), cv::gpu::GpuMat(img_r), disp);
+
+    disp.convertTo(disp, img_template.type());
+
+    ASSERT_EQ(img_template.size(), disp.size());
+    double norm = cv::norm(img_template, (cv::Mat)disp, cv::NORM_INF);
+    ASSERT_EQ(0.0, norm);
+}
+
+INSTANTIATE_TEST_CASE_P(Calib3D, StereoConstantSpaceBP, testing::ValuesIn(devices()));
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////
+// projectPoints
+
+struct ProjectPoints : testing::TestWithParam<cv::gpu::DeviceInfo>
+{
+    cv::gpu::DeviceInfo devInfo;
+    
+    cv::Mat src;
+    cv::Mat rvec;
+    cv::Mat tvec;
+    cv::Mat camera_mat;
+    
+    std::vector<cv::Point2f> dst_gold;
+
+    virtual void SetUp()
+    {
+        devInfo = GetParam();
+
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+        src = cvtest::randomMat(rng, cv::Size(1000, 1), CV_32FC3, 0, 10, false);
+        rvec = cvtest::randomMat(rng, cv::Size(3, 1), CV_32F, 0, 1, false);
+        tvec = cvtest::randomMat(rng, cv::Size(3, 1), CV_32F, 0, 1, false);
+        camera_mat = cvtest::randomMat(rng, cv::Size(3, 3), CV_32F, 0, 1, false);
+        camera_mat.at<float>(0, 1) = 0.f;
+        camera_mat.at<float>(1, 0) = 0.f;
+        camera_mat.at<float>(2, 0) = 0.f;
+        camera_mat.at<float>(2, 1) = 0.f;
+
+        cv::projectPoints(src, rvec, tvec, camera_mat, cv::Mat(1, 8, CV_32F, cv::Scalar::all(0)), dst_gold);
+    }
+};
+
+TEST_P(ProjectPoints, Accuracy) 
+{
+    PRINT_PARAM(devInfo);
+
+    cv::Mat dst;
+
+    ASSERT_NO_THROW(   
+        cv::gpu::GpuMat d_dst;
+
+        cv::gpu::projectPoints(cv::gpu::GpuMat(src), rvec, tvec, camera_mat, cv::Mat(), d_dst);
+
+        d_dst.download(dst);
+    );
+
+    ASSERT_EQ(dst_gold.size(), dst.cols);
+    ASSERT_EQ(1, dst.rows);
+    ASSERT_EQ(CV_32FC2, dst.type());
+
+    for (size_t i = 0; i < dst_gold.size(); ++i)
+    {
+        cv::Point2f res_gold = dst_gold[i];
+        cv::Point2f res_actual = dst.at<cv::Point2f>(0, i);
+        cv::Point2f err = res_actual - res_gold;
+
+        ASSERT_LE(err.dot(err) / res_gold.dot(res_gold), 1e-3f);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Calib3D, ProjectPoints, testing::ValuesIn(devices()));
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////
+// transformPoints
+
+struct TransformPoints : testing::TestWithParam<cv::gpu::DeviceInfo>
+{
+    cv::gpu::DeviceInfo devInfo;
+
+    cv::Mat src;
+    cv::Mat rvec;
+    cv::Mat tvec;
+    cv::Mat rot;
+        
+    virtual void SetUp()
+    {
+        devInfo = GetParam();
+
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+        src = cvtest::randomMat(rng, cv::Size(1000, 1), CV_32FC3, 0, 10, false);
+        rvec = cvtest::randomMat(rng, cv::Size(3, 1), CV_32F, 0, 1, false);
+        tvec = cvtest::randomMat(rng, cv::Size(3, 1), CV_32F, 0, 1, false);
+
+        cv::Rodrigues(rvec, rot);
+    }
+};
+
+TEST_P(TransformPoints, Accuracy)
+{
+    PRINT_PARAM(devInfo);
+
+    cv::Mat dst;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat d_dst;
+
+        cv::gpu::transformPoints(cv::gpu::GpuMat(src), rvec, tvec, d_dst);
+
+        d_dst.download(dst);
+    );
+    
+    ASSERT_EQ(src.size(), dst.size());
+    ASSERT_EQ(src.type(), dst.type());
+
+    for (int i = 0; i < dst.cols; ++i)
+    {
+        cv::Point3f p = src.at<cv::Point3f>(0, i);
+        cv::Point3f res_gold(
+                rot.at<float>(0, 0) * p.x + rot.at<float>(0, 1) * p.y + rot.at<float>(0, 2) * p.z + tvec.at<float>(0, 0),
+                rot.at<float>(1, 0) * p.x + rot.at<float>(1, 1) * p.y + rot.at<float>(1, 2) * p.z + tvec.at<float>(0, 1),
+                rot.at<float>(2, 0) * p.x + rot.at<float>(2, 1) * p.y + rot.at<float>(2, 2) * p.z + tvec.at<float>(0, 2));
+        cv::Point3f res_actual = dst.at<cv::Point3f>(0, i);
+        cv::Point3f err = res_actual - res_gold;
+
+        ASSERT_LE(err.dot(err) / res_gold.dot(res_gold), 1e-3f);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Calib3D, TransformPoints, testing::ValuesIn(devices()));
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////
+// solvePnPRansac
+
+struct SolvePnPRansac : testing::TestWithParam<cv::gpu::DeviceInfo>
+{
+    static const int num_points = 5000;
+
+    cv::gpu::DeviceInfo devInfo;
+
+    cv::Mat object;
+    cv::Mat camera_mat;
+    std::vector<cv::Point2f> image_vec;
+
+    cv::Mat rvec_gold;
+    cv::Mat tvec_gold;
+        
+    virtual void SetUp()
+    {
+        devInfo = GetParam();
+
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+        object = cvtest::randomMat(rng, cv::Size(num_points, 1), CV_32FC3, 0, 100, false);
+        camera_mat = cvtest::randomMat(rng, cv::Size(3, 3), CV_32F, 0.5, 1, false);
+        camera_mat.at<float>(0, 1) = 0.f;
+        camera_mat.at<float>(1, 0) = 0.f;
+        camera_mat.at<float>(2, 0) = 0.f;
+        camera_mat.at<float>(2, 1) = 0.f;
+
+        rvec_gold = cvtest::randomMat(rng, cv::Size(3, 1), CV_32F, 0, 1, false);
+        tvec_gold = cvtest::randomMat(rng, cv::Size(3, 1), CV_32F, 0, 1, false);
+
+        cv::projectPoints(object, rvec_gold, tvec_gold, camera_mat, cv::Mat(1, 8, CV_32F, cv::Scalar::all(0)), image_vec);
+    }
+};
+
+TEST_P(SolvePnPRansac, Accuracy)
+{
+    PRINT_PARAM(devInfo);
+
+    cv::Mat rvec, tvec;
+    std::vector<int> inliers;
+
+    ASSERT_NO_THROW(
+        cv::gpu::solvePnPRansac(object, cv::Mat(1, image_vec.size(), CV_32FC2, &image_vec[0]), camera_mat, 
+                                cv::Mat(1, 8, CV_32F, cv::Scalar::all(0)), rvec, tvec, false, 200, 2.f, 100, &inliers);
+    );
+
+    ASSERT_LE(cv::norm(rvec - rvec_gold), 1e-3f);
+    ASSERT_LE(cv::norm(tvec - tvec_gold), 1e-3f);
+}
+
+INSTANTIATE_TEST_CASE_P(Calib3D, SolvePnPRansac, testing::ValuesIn(devices()));
+
+#endif // HAVE_CUDA
diff --git a/modules/gpu/test/test_dft_routines.cpp b/modules/gpu/test/test_dft_routines.cpp
deleted file mode 100644
index f346bef428..0000000000
--- a/modules/gpu/test/test_dft_routines.cpp
+++ /dev/null
@@ -1,399 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                        Intel License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000, Intel Corporation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of Intel Corporation may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
-
-using namespace cv;
-using namespace cv::gpu;
-using namespace std;
-
-struct CV_GpuMulSpectrumsTest: cvtest::BaseTest
-{
-    CV_GpuMulSpectrumsTest() {}
-
-    void run(int)
-    {
-        test(0);
-        testConj(0);
-        testScaled(0);
-        testScaledConj(0);
-        test(DFT_ROWS);
-        testConj(DFT_ROWS);
-        testScaled(DFT_ROWS);
-        testScaledConj(DFT_ROWS);
-    }
-
-    void gen(int cols, int rows, Mat& mat)
-    {
-        RNG rng;
-        mat.create(rows, cols, CV_32FC2);
-        rng.fill(mat, RNG::UNIFORM, Scalar::all(0.f), Scalar::all(10.f));
-    }
-
-    bool cmp(const Mat& gold, const Mat& mine, float max_err=1e-3f)
-    {
-        if (gold.size() != mine.size())
-        {
-            ts->printf(cvtest::TS::CONSOLE, "bad sizes: gold: %d d%, mine: %d %d\n", gold.cols, gold.rows, mine.cols, mine.rows);
-            ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-            return false;
-        }
-        if (gold.type() != mine.type())
-        {
-            ts->printf(cvtest::TS::CONSOLE, "bad types: gold=%d, mine=%d\n", gold.type(), mine.type());
-            ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-            return false;
-        }
-        for (int i = 0; i < gold.rows; ++i)
-        {
-            for (int j = 0; j < gold.cols * 2; ++j)
-            {
-                float gold_ = gold.at<float>(i, j);
-                float mine_ = mine.at<float>(i, j);
-                if (fabs(gold_ - mine_) > max_err)
-                {
-                    ts->printf(cvtest::TS::CONSOLE, "bad values at %d %d: gold=%f, mine=%f\n", j, i, gold_, mine_);
-                    ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-                    return false;
-                }
-            }
-        }
-        return true;
-    }
-
-    bool cmpScaled(const Mat& gold, const Mat& mine, float scale, float max_err=1e-3f)
-    {
-        if (gold.size() != mine.size())
-        {
-            ts->printf(cvtest::TS::CONSOLE, "bad sizes: gold: %d d%, mine: %d %d\n", gold.cols, gold.rows, mine.cols, mine.rows);
-            ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-            return false;
-        }
-        if (gold.type() != mine.type())
-        {
-            ts->printf(cvtest::TS::CONSOLE, "bad types: gold=%d, mine=%d\n", gold.type(), mine.type());
-            ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-            return false;
-        }
-        for (int i = 0; i < gold.rows; ++i)
-        {
-            for (int j = 0; j < gold.cols * 2; ++j)
-            {
-                float gold_ = gold.at<float>(i, j) * scale;
-                float mine_ = mine.at<float>(i, j);
-                if (fabs(gold_ - mine_) > max_err)
-                {
-                    ts->printf(cvtest::TS::CONSOLE, "bad values at %d %d: gold=%f, mine=%f\n", j, i, gold_, mine_);
-                    ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-                    return false;
-                }
-            }
-        }
-        return true;
-    }
-
-    void test(int flags)
-    {
-        int cols = 1 + rand() % 100, rows = 1 + rand() % 1000;
-
-        Mat a, b;
-        gen(cols, rows, a);
-        gen(cols, rows, b);
-
-        Mat c_gold;
-        mulSpectrums(a, b, c_gold, flags, false);
-
-        GpuMat d_c;
-        mulSpectrums(GpuMat(a), GpuMat(b), d_c, flags, false);
-
-        if (!cmp(c_gold, Mat(d_c)))
-            ts->printf(cvtest::TS::CONSOLE, "test failed: cols=%d, rows=%d, flags=%d\n", cols, rows, flags);
-    }
-
-    void testConj(int flags)
-    {
-        int cols = 1 + rand() % 100, rows = 1 + rand() % 1000;
-
-        Mat a, b;
-        gen(cols, rows, a);
-        gen(cols, rows, b);
-
-        Mat c_gold;
-        mulSpectrums(a, b, c_gold, flags, true);
-
-        GpuMat d_c;
-        mulSpectrums(GpuMat(a), GpuMat(b), d_c, flags, true);
-
-        if (!cmp(c_gold, Mat(d_c)))
-            ts->printf(cvtest::TS::CONSOLE, "testConj failed: cols=%d, rows=%d, flags=%d\n", cols, rows, flags);
-    }
-
-    void testScaled(int flags)
-    {
-        int cols = 1 + rand() % 100, rows = 1 + rand() % 1000;
-
-        Mat a, b;
-        gen(cols, rows, a);
-        gen(cols, rows, b);
-        float scale = 1.f / a.size().area();
-
-        Mat c_gold;
-        mulSpectrums(a, b, c_gold, flags, false);
-
-        GpuMat d_c;
-        mulAndScaleSpectrums(GpuMat(a), GpuMat(b), d_c, flags, scale, false);
-
-        if (!cmpScaled(c_gold, Mat(d_c), scale))
-            ts->printf(cvtest::TS::CONSOLE, "testScaled failed: cols=%d, rows=%d, flags=%d\n", cols, rows, flags);
-    }
-
-    void testScaledConj(int flags)
-    {
-        int cols = 1 + rand() % 100, rows = 1 + rand() % 1000;
-
-        Mat a, b;
-        gen(cols, rows, a);
-        gen(cols, rows, b);
-        float scale = 1.f / a.size().area();
-
-        Mat c_gold;
-        mulSpectrums(a, b, c_gold, flags, true);
-
-        GpuMat d_c;
-        mulAndScaleSpectrums(GpuMat(a), GpuMat(b), d_c, flags, scale, true);
-
-        if (!cmpScaled(c_gold, Mat(d_c), scale))
-            ts->printf(cvtest::TS::CONSOLE, "testScaledConj failed: cols=%d, rows=%d, flags=%D\n", cols, rows, flags);
-    }
-} CV_GpuMulSpectrumsTest_inst;
-
-
-struct CV_GpuDftTest: cvtest::BaseTest
-{
-    CV_GpuDftTest() {}
-
-    void run(int)
-    {
-        srand(0);
-        int cols = 2 + rand() % 100, rows = 2 + rand() % 100;
-
-        for (int i = 0; i < 2; ++i)
-        {
-            bool inplace = i != 0;
-            testC2C("no flags", cols, rows, 0, inplace);
-            testC2C("no flags 0 1", cols, rows + 1, 0, inplace);
-            testC2C("no flags 1 0", cols, rows + 1, 0, inplace);
-            testC2C("no flags 1 1", cols + 1, rows, 0, inplace);
-            testC2C("DFT_INVERSE", cols, rows, DFT_INVERSE, inplace);
-            testC2C("DFT_ROWS", cols, rows, DFT_ROWS, inplace);
-            testC2C("single col", 1, rows, 0, inplace);
-            testC2C("single row", cols, 1, 0, inplace);
-            testC2C("single col inversed", 1, rows, DFT_INVERSE, inplace);
-            testC2C("single row inversed", cols, 1, DFT_INVERSE, inplace);
-            testC2C("single row DFT_ROWS", cols, 1, DFT_ROWS, inplace);
-            testC2C("size 1 2", 1, 2, 0, inplace);
-            testC2C("size 2 1", 2, 1, 0, inplace);
-        }
-
-        testR2CThenC2R("sanity", cols, rows);
-        testR2CThenC2R("sanity 0 1", cols, rows + 1);
-        testR2CThenC2R("sanity 1 0", cols + 1, rows);
-        testR2CThenC2R("sanity 1 1", cols + 1, rows + 1);
-        testR2CThenC2R("single col", 1, rows);
-        testR2CThenC2R("single col 1", 1, rows + 1);
-        testR2CThenC2R("single row", cols, 1);
-        testR2CThenC2R("single row 1", cols + 1, 1);
-
-        testR2CThenC2R("sanity", cols, rows, true);
-        testR2CThenC2R("sanity 0 1", cols, rows + 1, true);
-        testR2CThenC2R("sanity 1 0", cols + 1, rows, true);
-        testR2CThenC2R("sanity 1 1", cols + 1, rows + 1, true);
-        testR2CThenC2R("single row", cols, 1, true);
-        testR2CThenC2R("single row 1", cols + 1, 1, true);
-}
-
-    void gen(int cols, int rows, int cn, Mat& mat)
-    {
-        RNG rng(1);
-        mat.create(rows, cols, CV_MAKETYPE(CV_32F, cn));
-        rng.fill(mat, RNG::UNIFORM, Scalar::all(0.f), Scalar::all(10.f));
-    }
-
-    bool cmp(const Mat& gold, const Mat& mine, float max_err=1e-3f)
-    {
-        if (gold.size() != mine.size())
-        {
-            ts->printf(cvtest::TS::CONSOLE, "bad sizes: gold: %d %d, mine: %d %d\n", gold.cols, gold.rows, mine.cols, mine.rows);
-            ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-            return false;
-        }
-        if (gold.depth() != mine.depth())
-        {
-            ts->printf(cvtest::TS::CONSOLE, "bad depth: gold=%d, mine=%d\n", gold.depth(), mine.depth());
-            ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-            return false;
-        }
-        if (gold.channels() != mine.channels())
-        {
-            ts->printf(cvtest::TS::CONSOLE, "bad channel count: gold=%d, mine=%d\n", gold.channels(), mine.channels());
-            ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-            return false;
-        }
-        for (int i = 0; i < gold.rows; ++i)
-        {
-            for (int j = 0; j < gold.cols * gold.channels(); ++j)
-            {
-                float gold_ = gold.at<float>(i, j);
-                float mine_ = mine.at<float>(i, j);
-                if (fabs(gold_ - mine_) > max_err)
-                {
-                    ts->printf(cvtest::TS::CONSOLE, "bad values at %d %d: gold=%f, mine=%f\n", j / gold.channels(), i, gold_, mine_);
-                    ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-                    return false;
-                }
-            }
-        }
-        return true;
-    }
-
-    void testC2C(const std::string& hint, int cols, int rows, int flags, bool inplace=false)
-    {
-        Mat a;
-        gen(cols, rows, 2, a);
-
-        Mat b_gold;
-        dft(a, b_gold, flags);
-
-        GpuMat d_b;
-        GpuMat d_b_data;
-        if (inplace)
-        {
-            d_b_data.create(1, a.size().area(), CV_32FC2);
-            d_b = GpuMat(a.rows, a.cols, CV_32FC2, d_b_data.ptr(), a.cols * d_b_data.elemSize());
-        }
-
-        dft(GpuMat(a), d_b, Size(cols, rows), flags);
-
-        bool ok = true;
-        if (ok && inplace && d_b.ptr() != d_b_data.ptr())
-        {
-            ts->printf(cvtest::TS::CONSOLE, "unnecessary reallocation was done\n");
-            ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-            ok = false;
-        }
-        if (ok && d_b.depth() != CV_32F)
-        {
-            ts->printf(cvtest::TS::CONSOLE, "bad depth: %d\n", d_b.depth());
-            ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-            ok = false;
-        }
-        if (ok && d_b.channels() != 2)
-        {
-            ts->printf(cvtest::TS::CONSOLE, "bad channel count: %d\n", d_b.channels());
-            ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-            ok = false;
-        }
-        if (ok) ok = cmp(b_gold, Mat(d_b), rows * cols * 1e-4f);
-        if (!ok) 
-            ts->printf(cvtest::TS::CONSOLE, "testC2C failed: hint=%s, cols=%d, rows=%d, flags=%d, inplace=%d\n", 
-                       hint.c_str(), cols, rows, flags, inplace);
-    }
-
-    void testR2CThenC2R(const std::string& hint, int cols, int rows, bool inplace=false)
-    {
-        Mat a;
-        gen(cols, rows, 1, a);
-
-        bool ok = true;
-
-        GpuMat d_b, d_c;
-        GpuMat d_b_data, d_c_data;
-        if (inplace)
-        {
-            if (a.cols == 1)
-            {
-                d_b_data.create(1, (a.rows / 2 + 1) * a.cols, CV_32FC2);
-                d_b = GpuMat(a.rows / 2 + 1, a.cols, CV_32FC2, d_b_data.ptr(), a.cols * d_b_data.elemSize());
-            }
-            else
-            {
-                d_b_data.create(1, a.rows * (a.cols / 2 + 1), CV_32FC2);
-                d_b = GpuMat(a.rows, a.cols / 2 + 1, CV_32FC2, d_b_data.ptr(), (a.cols / 2 + 1) * d_b_data.elemSize());
-            }
-            d_c_data.create(1, a.size().area(), CV_32F);
-            d_c = GpuMat(a.rows, a.cols, CV_32F, d_c_data.ptr(), a.cols * d_c_data.elemSize());
-        }
-
-        dft(GpuMat(a), d_b, Size(cols, rows), 0);
-        dft(d_b, d_c, Size(cols, rows), DFT_REAL_OUTPUT | DFT_SCALE);
-
-        if (ok && inplace && d_b.ptr() != d_b_data.ptr())
-        {
-            ts->printf(cvtest::TS::CONSOLE, "unnecessary reallocation was done for b\n");
-            ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-            ok = false;
-        }
-        if (ok && inplace && d_c.ptr() != d_c_data.ptr())
-        {
-            ts->printf(cvtest::TS::CONSOLE, "unnecessary reallocation was done for c\n");
-            ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-            ok = false;
-        }
-        if (ok && d_c.depth() != CV_32F)
-        {
-            ts->printf(cvtest::TS::CONSOLE, "bad depth: %d\n", d_c.depth());
-            ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-            ok = false;
-        }
-        if (ok && d_c.channels() != 1)
-        {
-            ts->printf(cvtest::TS::CONSOLE, "bad channel count: %d\n", d_c.channels());
-            ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-            ok = false;
-        }
-        if (ok) ok = cmp(a, Mat(d_c), rows * cols * 1e-5f);
-        if (!ok) 
-            ts->printf(cvtest::TS::CONSOLE, "testR2CThenC2R failed: hint=%s, cols=%d, rows=%d, inplace=%d\n", 
-                       hint.c_str(), cols, rows, inplace);
-    }
-};
-
-TEST(dft, accuracy) { CV_GpuDftTest test; test.safe_run(); }
diff --git a/modules/gpu/test/test_features2d.cpp b/modules/gpu/test/test_features2d.cpp
index 12f71965c9..4b0c699391 100644
--- a/modules/gpu/test/test_features2d.cpp
+++ b/modules/gpu/test/test_features2d.cpp
@@ -40,156 +40,1295 @@
 //M*/
 
 #include "test_precomp.hpp"
-#include <string>
 
-using namespace cv;
-using namespace cv::gpu;
-using namespace std;
+#ifdef HAVE_CUDA
 
-const string FEATURES2D_DIR = "features2d";
-const string IMAGE_FILENAME = "aloe.png";
+/////////////////////////////////////////////////////////////////////////////////////////////////
+// SURF
 
-class CV_GPU_SURFTest : public cvtest::BaseTest
+struct SURF : testing::TestWithParam<cv::gpu::DeviceInfo>
 {
-public:
-    CV_GPU_SURFTest()
+    static cv::Mat image;
+    static cv::Mat mask;
+    static std::vector<cv::KeyPoint> keypoints_gold;
+    static std::vector<float> descriptors_gold;
+
+    static void SetUpTestCase() 
+    {
+        image = readImage("features2d/aloe.png", CV_LOAD_IMAGE_GRAYSCALE);        
+        
+        mask = cv::Mat(image.size(), CV_8UC1, cv::Scalar::all(1));
+        mask(cv::Range(0, image.rows / 2), cv::Range(0, image.cols / 2)).setTo(cv::Scalar::all(0));
+                
+        cv::SURF fdetector_gold; fdetector_gold.extended = false;
+        fdetector_gold(image, mask, keypoints_gold, descriptors_gold);
+    }
+
+    static void TearDownTestCase() 
     {
+        image.release();
+        mask.release();
+        keypoints_gold.clear();
+        descriptors_gold.clear();
     }
 
-protected:
-    bool isSimilarKeypoints(const KeyPoint& p1, const KeyPoint& p2);
-    int getValidCount(const vector<KeyPoint>& keypoints1, const vector<KeyPoint>& keypoints2, const vector<DMatch>& matches);
-    void compareKeypointSets(const vector<KeyPoint>& validKeypoints, const vector<KeyPoint>& calcKeypoints,
-                             const Mat& validDescriptors, const Mat& calcDescriptors);
+    cv::gpu::DeviceInfo devInfo;
+    
+    virtual void SetUp()
+    {
+        devInfo = GetParam();
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
 
-    void emptyDataTest();
-    void accuracyTest();
+    bool isSimilarKeypoints(const cv::KeyPoint& p1, const cv::KeyPoint& p2)
+    {
+        const float maxPtDif = 1.f;
+        const float maxSizeDif = 1.f;
+        const float maxAngleDif = 2.f;
+        const float maxResponseDif = 0.1f;
 
-    virtual void run(int);
+        float dist = (float)cv::norm(p1.pt - p2.pt);
+        return (dist < maxPtDif &&
+                fabs(p1.size - p2.size) < maxSizeDif &&
+                abs(p1.angle - p2.angle) < maxAngleDif &&
+                abs(p1.response - p2.response) < maxResponseDif &&
+                p1.octave == p2.octave &&
+                p1.class_id == p2.class_id );
+    }
 };
 
-void CV_GPU_SURFTest::emptyDataTest()
+cv::Mat SURF::image;
+cv::Mat SURF::mask;
+std::vector<cv::KeyPoint> SURF::keypoints_gold;
+std::vector<float> SURF::descriptors_gold;
+
+TEST_P(SURF, EmptyDataTest)
 {
-    SURF_GPU fdetector;
+    PRINT_PARAM(devInfo);
 
-    GpuMat image;
-    vector<KeyPoint> keypoints;
-    vector<float> descriptors;
-    try
-    {
-        fdetector(image, GpuMat(), keypoints, descriptors);
-    }
-    catch(...)
+    cv::gpu::SURF_GPU fdetector;
+
+    cv::gpu::GpuMat image;
+    std::vector<cv::KeyPoint> keypoints;
+    std::vector<float> descriptors;
+
+    ASSERT_NO_THROW(
+        fdetector(image, cv::gpu::GpuMat(), keypoints, descriptors);
+    );
+
+    EXPECT_TRUE(keypoints.empty());
+    EXPECT_TRUE(descriptors.empty());
+}
+
+TEST_P(SURF, Accuracy)
+{
+    ASSERT_TRUE(!image.empty());
+
+    PRINT_PARAM(devInfo);
+
+    // Compute keypoints.
+    std::vector<cv::KeyPoint> keypoints;
+    cv::Mat descriptors;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat dev_descriptors;
+        cv::gpu::SURF_GPU fdetector; fdetector.extended = false;
+
+        fdetector(cv::gpu::GpuMat(image), cv::gpu::GpuMat(mask), keypoints, dev_descriptors);
+
+        dev_descriptors.download(descriptors);
+    );
+
+    cv::BruteForceMatcher< cv::L2<float> > matcher;
+    std::vector<cv::DMatch> matches;
+
+    matcher.match(cv::Mat(keypoints_gold.size(), 64, CV_32FC1, &descriptors_gold[0]), descriptors, matches);
+
+    int validCount = 0;
+    
+    for (size_t i = 0; i < matches.size(); ++i)
     {
-        ts->printf( cvtest::TS::LOG, "detect() on empty image must not generate exception (1).\n" );
-        ts->set_failed_test_info( cvtest::TS::FAIL_INVALID_OUTPUT );
+        const cv::DMatch& m = matches[i];
+
+        const cv::KeyPoint& p1 = keypoints_gold[m.queryIdx];
+        const cv::KeyPoint& p2 = keypoints[m.trainIdx];
+
+        const float maxPtDif = 1.f;
+        const float maxSizeDif = 1.f;
+        const float maxAngleDif = 2.f;
+        const float maxResponseDif = 0.1f;
+
+        float dist = (float)cv::norm(p1.pt - p2.pt);
+        if (dist < maxPtDif &&
+                fabs(p1.size - p2.size) < maxSizeDif &&
+                abs(p1.angle - p2.angle) < maxAngleDif &&
+                abs(p1.response - p2.response) < maxResponseDif &&
+                p1.octave == p2.octave &&
+                p1.class_id == p2.class_id )
+        {
+            ++validCount;
+        }
     }
 
-    if( !keypoints.empty() )
+    double validRatio = (double)validCount / matches.size();
+
+    EXPECT_GT(validRatio, 0.5);
+}
+
+INSTANTIATE_TEST_CASE_P(Features2D, SURF, testing::ValuesIn(devices(cv::gpu::GLOBAL_ATOMICS)));
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+// BruteForceMatcher
+
+static const char* dists[] = {"L1Dist", "L2Dist", "HammingDist"};
+
+struct BruteForceMatcher : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, cv::gpu::BruteForceMatcher_GPU_base::DistType, int> >
+{
+    static const int queryDescCount = 300; // must be even number because we split train data in some cases in two
+    static const int countFactor = 4; // do not change it
+
+    cv::gpu::DeviceInfo devInfo;
+    cv::gpu::BruteForceMatcher_GPU_base::DistType distType;
+    int dim;
+    
+    cv::Mat query, train;
+
+    virtual void SetUp() 
     {
-        ts->printf( cvtest::TS::LOG, "detect() on empty image must return empty keypoints vector (1).\n" );
-        ts->set_failed_test_info( cvtest::TS::FAIL_INVALID_OUTPUT );
-        return;
+        devInfo = std::tr1::get<0>(GetParam());
+        distType = std::tr1::get<1>(GetParam());
+        dim = std::tr1::get<2>(GetParam());
+
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+        cv::Mat queryBuf, trainBuf;
+
+        // Generate query descriptors randomly.
+        // Descriptor vector elements are integer values.
+        queryBuf.create(queryDescCount, dim, CV_32SC1);
+        rng.fill(queryBuf, cv::RNG::UNIFORM, cv::Scalar::all(0), cv::Scalar::all(3));
+        queryBuf.convertTo(queryBuf, CV_32FC1);
+
+        // Generate train decriptors as follows:
+        // copy each query descriptor to train set countFactor times
+        // and perturb some one element of the copied descriptors in
+        // in ascending order. General boundaries of the perturbation
+        // are (0.f, 1.f).
+        trainBuf.create(queryDescCount * countFactor, dim, CV_32FC1);
+        float step = 1.f / countFactor;
+        for (int qIdx = 0; qIdx < queryDescCount; qIdx++)
+        {
+            cv::Mat queryDescriptor = queryBuf.row(qIdx);
+            for (int c = 0; c < countFactor; c++)
+            {
+                int tIdx = qIdx * countFactor + c;
+                cv::Mat trainDescriptor = trainBuf.row(tIdx);
+                queryDescriptor.copyTo(trainDescriptor);
+                int elem = rng(dim);
+                float diff = rng.uniform(step * c, step * (c + 1));
+                trainDescriptor.at<float>(0, elem) += diff;
+            }
+        }
+
+        queryBuf.convertTo(query, CV_32F);
+        trainBuf.convertTo(train, CV_32F);
     }
+};
+
+TEST_P(BruteForceMatcher, Match)
+{
+    const char* distStr = dists[distType];
+
+    PRINT_PARAM(devInfo);
+    PRINT_PARAM(distStr);
+    PRINT_PARAM(dim);
 
-    if( !descriptors.empty() )
+    std::vector<cv::DMatch> matches;
+
+    ASSERT_NO_THROW(
+        cv::gpu::BruteForceMatcher_GPU_base matcher(distType);
+
+        matcher.match(cv::gpu::GpuMat(query), cv::gpu::GpuMat(train), matches);
+    );
+
+    ASSERT_EQ(queryDescCount, matches.size());
+
+    int badCount = 0;
+    for (size_t i = 0; i < matches.size(); i++)
     {
-        ts->printf( cvtest::TS::LOG, "detect() on empty image must return empty descriptors vector (1).\n" );
-        ts->set_failed_test_info( cvtest::TS::FAIL_INVALID_OUTPUT );
-        return;
+        cv::DMatch match = matches[i];
+        if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor) || (match.imgIdx != 0))
+            badCount++;
     }
+
+    ASSERT_EQ(0, badCount);
 }
 
-bool CV_GPU_SURFTest::isSimilarKeypoints(const KeyPoint& p1, const KeyPoint& p2)
+TEST_P(BruteForceMatcher, MatchAdd)
 {
-    const float maxPtDif = 1.f;
-    const float maxSizeDif = 1.f;
-    const float maxAngleDif = 2.f;
-    const float maxResponseDif = 0.1f;
-
-    float dist = (float)norm( p1.pt - p2.pt );
-    return (dist < maxPtDif &&
-            fabs(p1.size - p2.size) < maxSizeDif &&
-            abs(p1.angle - p2.angle) < maxAngleDif &&
-            abs(p1.response - p2.response) < maxResponseDif &&
-            p1.octave == p2.octave &&
-            p1.class_id == p2.class_id );
+    const char* distStr = dists[distType];
+
+    PRINT_PARAM(devInfo);
+    PRINT_PARAM(distStr);
+    PRINT_PARAM(dim);
+
+    std::vector<cv::DMatch> matches;
+
+    bool isMaskSupported;
+
+    ASSERT_NO_THROW(
+        cv::gpu::BruteForceMatcher_GPU_base matcher(distType);
+
+        cv::gpu::GpuMat d_train(train);
+
+        // make add() twice to test such case
+        matcher.add(std::vector<cv::gpu::GpuMat>(1, d_train.rowRange(0, train.rows/2)));
+        matcher.add(std::vector<cv::gpu::GpuMat>(1, d_train.rowRange(train.rows/2, train.rows)));
+
+        // prepare masks (make first nearest match illegal)
+        std::vector<cv::gpu::GpuMat> masks(2);
+        for (int mi = 0; mi < 2; mi++)
+        {
+            masks[mi] = cv::gpu::GpuMat(query.rows, train.rows/2, CV_8UC1, cv::Scalar::all(1));
+            for (int di = 0; di < queryDescCount/2; di++)
+                masks[mi].col(di * countFactor).setTo(cv::Scalar::all(0));
+        }
+
+        matcher.match(cv::gpu::GpuMat(query), matches, masks);
+
+        isMaskSupported = matcher.isMaskSupported();
+    );
+
+    ASSERT_EQ(queryDescCount, matches.size());
+
+    int badCount = 0;
+    for (size_t i = 0; i < matches.size(); i++)
+    {
+        cv::DMatch match = matches[i];
+        int shift = isMaskSupported ? 1 : 0;
+        {
+            if (i < queryDescCount / 2)
+            {
+                if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor + shift) || (match.imgIdx != 0))
+                    badCount++;
+            }
+            else
+            {
+                if ((match.queryIdx != (int)i) || (match.trainIdx != ((int)i - queryDescCount / 2) * countFactor + shift) || (match.imgIdx != 1))
+                    badCount++;
+            }
+        }
+    }
+
+    ASSERT_EQ(0, badCount);
 }
 
-int CV_GPU_SURFTest::getValidCount(const vector<KeyPoint>& keypoints1, const vector<KeyPoint>& keypoints2,
-                     const vector<DMatch>& matches)
+TEST_P(BruteForceMatcher, KnnMatch)
 {
-    int count = 0;
+    const char* distStr = dists[distType];
 
-    for (size_t i = 0; i < matches.size(); ++i)
-    {
-        const DMatch& m = matches[i];
+    PRINT_PARAM(devInfo);
+    PRINT_PARAM(distStr);
+    PRINT_PARAM(dim);
+
+    const int knn = 3;
+
+    std::vector< std::vector<cv::DMatch> > matches;
 
-        const KeyPoint& kp1 = keypoints1[m.queryIdx];
-        const KeyPoint& kp2 = keypoints2[m.trainIdx];
+    ASSERT_NO_THROW(
+        cv::gpu::BruteForceMatcher_GPU_base matcher(distType);
+        matcher.knnMatch(cv::gpu::GpuMat(query), cv::gpu::GpuMat(train), matches, knn);
+    );
 
-        if (isSimilarKeypoints(kp1, kp2))
-            ++count;
+    ASSERT_EQ(queryDescCount, matches.size());
+
+    int badCount = 0;
+    for (size_t i = 0; i < matches.size(); i++)
+    {
+        if ((int)matches[i].size() != knn)
+            badCount++;
+        else
+        {
+            int localBadCount = 0;
+            for (int k = 0; k < knn; k++)
+            {
+                cv::DMatch match = matches[i][k];
+                if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor + k) || (match.imgIdx != 0))
+                    localBadCount++;
+            }
+            badCount += localBadCount > 0 ? 1 : 0;
+        }
     }
 
-    return count;
+    ASSERT_EQ(0, badCount);
 }
 
-void CV_GPU_SURFTest::compareKeypointSets(const vector<KeyPoint>& validKeypoints, const vector<KeyPoint>& calcKeypoints, 
-                                          const Mat& validDescriptors, const Mat& calcDescriptors)
+TEST_P(BruteForceMatcher, KnnMatchAdd)
 {
-    BruteForceMatcher< L2<float> > matcher;
-    vector<DMatch> matches;
+    const char* distStr = dists[distType];
+
+    PRINT_PARAM(devInfo);
+    PRINT_PARAM(distStr);
+    PRINT_PARAM(dim);
 
-    matcher.match(validDescriptors, calcDescriptors, matches);
+    const int knn = 2;
+    std::vector< std::vector<cv::DMatch> > matches;
 
-    int validCount = getValidCount(validKeypoints, calcKeypoints, matches);
-    float validRatio = (float)validCount / matches.size();
+    bool isMaskSupported;
 
-    if (validRatio < 0.5f)
+    ASSERT_NO_THROW(
+        cv::gpu::BruteForceMatcher_GPU_base matcher(distType);
+
+        cv::gpu::GpuMat d_train(train);
+
+        // make add() twice to test such case
+        matcher.add(std::vector<cv::gpu::GpuMat>(1, d_train.rowRange(0, train.rows / 2)));
+        matcher.add(std::vector<cv::gpu::GpuMat>(1, d_train.rowRange(train.rows / 2, train.rows)));
+
+        // prepare masks (make first nearest match illegal)
+        std::vector<cv::gpu::GpuMat> masks(2);
+        for (int mi = 0; mi < 2; mi++ )
+        {
+            masks[mi] = cv::gpu::GpuMat(query.rows, train.rows / 2, CV_8UC1, cv::Scalar::all(1));
+            for (int di = 0; di < queryDescCount / 2; di++)
+                masks[mi].col(di * countFactor).setTo(cv::Scalar::all(0));
+        }
+
+        matcher.knnMatch(cv::gpu::GpuMat(query), matches, knn, masks);
+
+        isMaskSupported = matcher.isMaskSupported();
+    );
+
+    ASSERT_EQ(queryDescCount, matches.size());
+
+    int badCount = 0;
+    int shift = isMaskSupported ? 1 : 0;
+    for (size_t i = 0; i < matches.size(); i++)
     {
-        ts->printf(cvtest::TS::LOG, "Bad accuracy - %f.\n", validRatio);
-        ts->set_failed_test_info( cvtest::TS::FAIL_BAD_ACCURACY );
-        return;
+        if ((int)matches[i].size() != knn)
+            badCount++;
+        else
+        {
+            int localBadCount = 0;
+            for (int k = 0; k < knn; k++)
+            {
+                cv::DMatch match = matches[i][k];
+                {
+                    if (i < queryDescCount / 2)
+                    {
+                        if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor + k + shift) || (match.imgIdx != 0) )
+                            localBadCount++;
+                    }
+                    else
+                    {
+                        if ((match.queryIdx != (int)i) || (match.trainIdx != ((int)i - queryDescCount / 2) * countFactor + k + shift) || (match.imgIdx != 1) )
+                            localBadCount++;
+                    }
+                }
+            }
+            badCount += localBadCount > 0 ? 1 : 0;
+        }
     }
+
+    ASSERT_EQ(0, badCount);
 }
 
-void CV_GPU_SURFTest::accuracyTest()
+TEST_P(BruteForceMatcher, RadiusMatch)
 {
-    string imgFilename = string(ts->get_data_path()) + FEATURES2D_DIR + "/" + IMAGE_FILENAME;
+    if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS))
+        return;
+
+    const char* distStr = dists[distType];
+
+    PRINT_PARAM(devInfo);
+    PRINT_PARAM(distStr);
+    PRINT_PARAM(dim);
+
+    const float radius = 1.f / countFactor;
+
+    std::vector< std::vector<cv::DMatch> > matches;
 
-    // Read the test image.
-    Mat image = imread(imgFilename, 0);
-    if (image.empty())
+    ASSERT_NO_THROW(
+        cv::gpu::BruteForceMatcher_GPU_base matcher(distType);
+
+        matcher.radiusMatch(cv::gpu::GpuMat(query), cv::gpu::GpuMat(train), matches, radius);
+    );
+
+    ASSERT_EQ(queryDescCount, matches.size());
+
+    int badCount = 0;
+    for (size_t i = 0; i < matches.size(); i++)
     {
-        ts->printf( cvtest::TS::LOG, "Image %s can not be read.\n", imgFilename.c_str() );
-        ts->set_failed_test_info( cvtest::TS::FAIL_INVALID_TEST_DATA );
-        return;
+        if ((int)matches[i].size() != 1)
+            badCount++;
+        else
+        {
+            cv::DMatch match = matches[i][0];
+            if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i*countFactor) || (match.imgIdx != 0))
+                badCount++;
+        }
     }
-    
-    Mat mask(image.size(), CV_8UC1, Scalar::all(1));
-    mask(Range(0, image.rows / 2), Range(0, image.cols / 2)).setTo(Scalar::all(0));
 
-    // Compute keypoints.
-    vector<KeyPoint> calcKeypoints;
-    GpuMat calcDescriptors;
-    SURF_GPU fdetector; fdetector.extended = false;
-    fdetector(GpuMat(image), GpuMat(mask), calcKeypoints, calcDescriptors);
-
-    // Calc validation keypoints set.
-    vector<KeyPoint> validKeypoints;
-    vector<float> validDescriptors;
-    SURF fdetector_gold; fdetector_gold.extended = false;
-    fdetector_gold(image, mask, validKeypoints, validDescriptors);
-
-    compareKeypointSets(validKeypoints, calcKeypoints, 
-        Mat(validKeypoints.size(), fdetector_gold.descriptorSize(), CV_32F, &validDescriptors[0]), calcDescriptors);
+    ASSERT_EQ(0, badCount);
 }
 
-void CV_GPU_SURFTest::run( int /*start_from*/ )
+TEST_P(BruteForceMatcher, RadiusMatchAdd)
 {
-    emptyDataTest();
-    accuracyTest();
+    if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS))
+        return;
+
+    const char* distStr = dists[distType];
+
+    PRINT_PARAM(devInfo);
+    PRINT_PARAM(distStr);
+    PRINT_PARAM(dim);
+
+    int n = 3;
+    const float radius = 1.f / countFactor * n;
+
+    std::vector< std::vector<cv::DMatch> > matches;
+
+    bool isMaskSupported;
+
+    ASSERT_NO_THROW(
+        cv::gpu::BruteForceMatcher_GPU_base matcher(distType);
+
+        cv::gpu::GpuMat d_train(train);
+
+        // make add() twice to test such case
+        matcher.add(std::vector<cv::gpu::GpuMat>(1, d_train.rowRange(0, train.rows / 2)));
+        matcher.add(std::vector<cv::gpu::GpuMat>(1, d_train.rowRange(train.rows / 2, train.rows)));
+
+        // prepare masks (make first nearest match illegal)
+        std::vector<cv::gpu::GpuMat> masks(2);
+        for (int mi = 0; mi < 2; mi++)
+        {
+            masks[mi] = cv::gpu::GpuMat(query.rows, train.rows / 2, CV_8UC1, cv::Scalar::all(1));
+            for (int di = 0; di < queryDescCount / 2; di++)
+                masks[mi].col(di * countFactor).setTo(cv::Scalar::all(0));
+        }
+
+        matcher.radiusMatch(cv::gpu::GpuMat(query), matches, radius, masks);
+
+        isMaskSupported = matcher.isMaskSupported();
+    );
+
+    ASSERT_EQ(queryDescCount, matches.size());
+
+    int badCount = 0;
+    int shift = isMaskSupported ? 1 : 0;
+    int needMatchCount = isMaskSupported ? n-1 : n;
+    for (size_t i = 0; i < matches.size(); i++)
+    {
+        if ((int)matches[i].size() != needMatchCount)
+            badCount++;
+        else
+        {
+            int localBadCount = 0;
+            for (int k = 0; k < needMatchCount; k++)
+            {
+                cv::DMatch match = matches[i][k];
+                {
+                    if (i < queryDescCount / 2)
+                    {
+                        if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor + k + shift) || (match.imgIdx != 0) )
+                            localBadCount++;
+                    }
+                    else
+                    {
+                        if ((match.queryIdx != (int)i) || (match.trainIdx != ((int)i - queryDescCount / 2) * countFactor + k + shift) || (match.imgIdx != 1) )
+                            localBadCount++;
+                    }
+                }
+            }
+            badCount += localBadCount > 0 ? 1 : 0;
+        }
+    }
+
+    ASSERT_EQ(0, badCount);
 }
 
-TEST(SURF, empty_data_and_accuracy) { CV_GPU_SURFTest test; test.safe_run(); }
+INSTANTIATE_TEST_CASE_P(Features2D, BruteForceMatcher, testing::Combine(
+                        testing::ValuesIn(devices()),
+                        testing::Values(cv::gpu::BruteForceMatcher_GPU_base::L1Dist, cv::gpu::BruteForceMatcher_GPU_base::L2Dist),
+                        testing::Values(57, 64, 83, 128, 179, 256, 304)));
+
+#endif // HAVE_CUDA
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+//struct CV_GpuBFMTest : CV_GpuTestBase
+//{
+//    void run_gpu_test();
+//       
+//    void generateData(GpuMat& query, GpuMat& train, int dim, int depth);
+//
+//    virtual void test(const GpuMat& query, const GpuMat& train, BruteForceMatcher_GPU_base& matcher) = 0;
+//
+//    static const int queryDescCount = 300; // must be even number because we split train data in some cases in two
+//    static const int countFactor = 4; // do not change it
+//};
+//
+//void CV_GpuBFMTest::run_gpu_test()
+//{
+//    BruteForceMatcher_GPU_base::DistType dists[] = {BruteForceMatcher_GPU_base::L1Dist, BruteForceMatcher_GPU_base::L2Dist, BruteForceMatcher_GPU_base::HammingDist};
+//    const char* dists_str[] = {"L1Dist", "L2Dist", "HammingDist"};
+//    int dists_count = sizeof(dists) / sizeof(dists[0]);
+//
+//    RNG rng = ts->get_rng();
+//
+//    int dims[] = {rng.uniform(30, 60), 64, rng.uniform(70, 110), 128, rng.uniform(130, 250), 256, rng.uniform(260, 350)};
+//    int dims_count = sizeof(dims) / sizeof(dims[0]);
+//
+//    for (int dist = 0; dist < dists_count; ++dist)
+//    {
+//        int depth_end = dists[dist] == BruteForceMatcher_GPU_base::HammingDist ? CV_32S : CV_32F;
+//
+//        for (int depth = CV_8U; depth <= depth_end; ++depth)
+//        {
+//            for (int dim = 0; dim < dims_count; ++dim)
+//            {
+//                PRINT_ARGS("dist=%s depth=%s dim=%d", dists_str[dist], getTypeName(depth), dims[dim]);
+//                
+//                BruteForceMatcher_GPU_base matcher(dists[dist]);
+//
+//                GpuMat query, train;
+//                generateData(query, train, dim, depth);
+//
+//                test(query, train, matcher);
+//            }
+//        }
+//    }
+//}
+//
+//void CV_GpuBFMTest::generateData(GpuMat& queryGPU, GpuMat& trainGPU, int dim, int depth)
+//{
+//    RNG& rng = ts->get_rng();
+//
+//    Mat queryBuf, trainBuf;
+//
+//    // Generate query descriptors randomly.
+//    // Descriptor vector elements are integer values.
+//    queryBuf.create(queryDescCount, dim, CV_32SC1);
+//    rng.fill(queryBuf, RNG::UNIFORM, Scalar::all(0), Scalar(3));
+//    queryBuf.convertTo(queryBuf, CV_32FC1);
+//
+//    // Generate train decriptors as follows:
+//    // copy each query descriptor to train set countFactor times
+//    // and perturb some one element of the copied descriptors in
+//    // in ascending order. General boundaries of the perturbation
+//    // are (0.f, 1.f).
+//    trainBuf.create(queryDescCount * countFactor, dim, CV_32FC1);
+//    float step = 1.f / countFactor;
+//    for (int qIdx = 0; qIdx < queryDescCount; qIdx++)
+//    {
+//        Mat queryDescriptor = queryBuf.row(qIdx);
+//        for (int c = 0; c < countFactor; c++)
+//        {
+//            int tIdx = qIdx * countFactor + c;
+//            Mat trainDescriptor = trainBuf.row(tIdx);
+//            queryDescriptor.copyTo(trainDescriptor);
+//            int elem = rng(dim);
+//            float diff = rng.uniform(step * c, step * (c + 1));
+//            trainDescriptor.at<float>(0, elem) += diff;
+//        }
+//    }
+//
+//    Mat query, train;
+//    queryBuf.convertTo(query, depth);
+//    trainBuf.convertTo(train, depth);
+//
+//    queryGPU.upload(query);
+//    trainGPU.upload(train);
+//}
+//
+//#define GPU_BFM_TEST(test_name) \
+//    struct CV_GpuBFM_ ##test_name ## _Test : CV_GpuBFMTest \
+//    { \
+//        void test(const GpuMat& query, const GpuMat& train, BruteForceMatcher_GPU_base& matcher); \
+//    }; \
+//    TEST(BruteForceMatcher, test_name) { CV_GpuBFM_ ##test_name ## _Test test; test.safe_run(); } \
+//    void CV_GpuBFM_ ##test_name ## _Test::test(const GpuMat& query, const GpuMat& train, BruteForceMatcher_GPU_base& matcher)
+//
+/////////////////////////////////////////////////////////////////////////////////////////////////////////
+//// match
+//
+//GPU_BFM_TEST(match)
+//{
+//    vector<DMatch> matches;
+//
+//    matcher.match(query, train, matches);
+//
+//    CHECK((int)matches.size() == queryDescCount, TS::FAIL_INVALID_OUTPUT);
+//
+//    int badCount = 0;
+//    for (size_t i = 0; i < matches.size(); i++)
+//    {
+//        DMatch match = matches[i];
+//        if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor) || (match.imgIdx != 0))
+//            badCount++;
+//    }
+//
+//    CHECK(badCount == 0, TS::FAIL_INVALID_OUTPUT);
+//}
+//
+//GPU_BFM_TEST(match_add)
+//{
+//    vector<DMatch> matches;
+//
+//    // make add() twice to test such case
+//    matcher.add(vector<GpuMat>(1, train.rowRange(0, train.rows/2)));
+//    matcher.add(vector<GpuMat>(1, train.rowRange(train.rows/2, train.rows)));
+//
+//    // prepare masks (make first nearest match illegal)
+//    vector<GpuMat> masks(2);
+//    for (int mi = 0; mi < 2; mi++)
+//    {
+//        masks[mi] = GpuMat(query.rows, train.rows/2, CV_8UC1, Scalar::all(1));
+//        for (int di = 0; di < queryDescCount/2; di++)
+//            masks[mi].col(di * countFactor).setTo(Scalar::all(0));
+//    }
+//
+//    matcher.match(query, matches, masks);
+//
+//    CHECK((int)matches.size() == queryDescCount, TS::FAIL_INVALID_OUTPUT);
+//
+//    int badCount = 0;
+//    for (size_t i = 0; i < matches.size(); i++)
+//    {
+//        DMatch match = matches[i];
+//        int shift = matcher.isMaskSupported() ? 1 : 0;
+//        {
+//            if (i < queryDescCount / 2)
+//            {
+//                if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor + shift) || (match.imgIdx != 0))
+//                    badCount++;
+//            }
+//            else
+//            {
+//                if ((match.queryIdx != (int)i) || (match.trainIdx != ((int)i - queryDescCount / 2) * countFactor + shift) || (match.imgIdx != 1))
+//                    badCount++;
+//            }
+//        }
+//    }
+//
+//    CHECK(badCount == 0, TS::FAIL_INVALID_OUTPUT);
+//}
+//
+/////////////////////////////////////////////////////////////////////////////////////////////////////////
+//// knnMatch
+//
+//GPU_BFM_TEST(knnMatch)
+//{
+//    const int knn = 3;
+//
+//    vector< vector<DMatch> > matches;
+//
+//    matcher.knnMatch(query, train, matches, knn);
+//
+//    CHECK((int)matches.size() == queryDescCount, TS::FAIL_INVALID_OUTPUT);
+//
+//    int badCount = 0;
+//    for (size_t i = 0; i < matches.size(); i++)
+//    {
+//        if ((int)matches[i].size() != knn)
+//            badCount++;
+//        else
+//        {
+//            int localBadCount = 0;
+//            for (int k = 0; k < knn; k++)
+//            {
+//                DMatch match = matches[i][k];
+//                if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor + k) || (match.imgIdx != 0))
+//                    localBadCount++;
+//            }
+//            badCount += localBadCount > 0 ? 1 : 0;
+//        }
+//    }
+//
+//    CHECK(badCount == 0, TS::FAIL_INVALID_OUTPUT);
+//}
+//
+//GPU_BFM_TEST(knnMatch_add)
+//{
+//    const int knn = 2;
+//    vector<vector<DMatch> > matches;
+//
+//    // make add() twice to test such case
+//    matcher.add(vector<GpuMat>(1,train.rowRange(0, train.rows / 2)));
+//    matcher.add(vector<GpuMat>(1,train.rowRange(train.rows / 2, train.rows)));
+//
+//    // prepare masks (make first nearest match illegal)
+//    vector<GpuMat> masks(2);
+//    for (int mi = 0; mi < 2; mi++ )
+//    {
+//        masks[mi] = GpuMat(query.rows, train.rows / 2, CV_8UC1, Scalar::all(1));
+//        for (int di = 0; di < queryDescCount / 2; di++)
+//            masks[mi].col(di * countFactor).setTo(Scalar::all(0));
+//    }
+//
+//    matcher.knnMatch(query, matches, knn, masks);
+//
+//    CHECK((int)matches.size() == queryDescCount, TS::FAIL_INVALID_OUTPUT);
+//
+//    int badCount = 0;
+//    int shift = matcher.isMaskSupported() ? 1 : 0;
+//    for (size_t i = 0; i < matches.size(); i++)
+//    {
+//        if ((int)matches[i].size() != knn)
+//            badCount++;
+//        else
+//        {
+//            int localBadCount = 0;
+//            for (int k = 0; k < knn; k++)
+//            {
+//                DMatch match = matches[i][k];
+//                {
+//                    if (i < queryDescCount / 2)
+//                    {
+//                        if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor + k + shift) || (match.imgIdx != 0) )
+//                            localBadCount++;
+//                    }
+//                    else
+//                    {
+//                        if ((match.queryIdx != (int)i) || (match.trainIdx != ((int)i - queryDescCount / 2) * countFactor + k + shift) || (match.imgIdx != 1) )
+//                            localBadCount++;
+//                    }
+//                }
+//            }
+//            badCount += localBadCount > 0 ? 1 : 0;
+//        }
+//    }
+//
+//    CHECK(badCount == 0, TS::FAIL_INVALID_OUTPUT);
+//}
+//
+/////////////////////////////////////////////////////////////////////////////////////////////////////////
+//// radiusMatch
+//
+//GPU_BFM_TEST(radiusMatch)
+//{
+//    CHECK_RETURN(support(GLOBAL_ATOMICS), TS::SKIPPED);
+//
+//    const float radius = 1.f / countFactor;
+//
+//    vector< vector<DMatch> > matches;
+//
+//    matcher.radiusMatch(query, train, matches, radius);
+//
+//    CHECK((int)matches.size() == queryDescCount, TS::FAIL_INVALID_OUTPUT);
+//
+//    int badCount = 0;
+//    for (size_t i = 0; i < matches.size(); i++)
+//    {
+//        if ((int)matches[i].size() != 1)
+//            badCount++;
+//        else
+//        {
+//            DMatch match = matches[i][0];
+//            if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i*countFactor) || (match.imgIdx != 0))
+//                badCount++;
+//        }
+//    }
+//
+//    CHECK(badCount == 0, TS::FAIL_INVALID_OUTPUT);
+//}
+//
+//GPU_BFM_TEST(radiusMatch_add)
+//{
+//    CHECK_RETURN(support(GLOBAL_ATOMICS), TS::SKIPPED);
+//
+//    int n = 3;
+//    const float radius = 1.f / countFactor * n;
+//    vector< vector<DMatch> > matches;
+//
+//    // make add() twice to test such case
+//    matcher.add(vector<GpuMat>(1,train.rowRange(0, train.rows / 2)));
+//    matcher.add(vector<GpuMat>(1,train.rowRange(train.rows / 2, train.rows)));
+//
+//    // prepare masks (make first nearest match illegal)
+//    vector<GpuMat> masks(2);
+//    for (int mi = 0; mi < 2; mi++)
+//    {
+//        masks[mi] = GpuMat(query.rows, train.rows / 2, CV_8UC1, Scalar::all(1));
+//        for (int di = 0; di < queryDescCount / 2; di++)
+//            masks[mi].col(di * countFactor).setTo(Scalar::all(0));
+//    }
+//
+//    matcher.radiusMatch(query, matches, radius, masks);
+//
+//    CHECK((int)matches.size() == queryDescCount, TS::FAIL_INVALID_OUTPUT);
+//
+//    int badCount = 0;
+//    int shift = matcher.isMaskSupported() ? 1 : 0;
+//    int needMatchCount = matcher.isMaskSupported() ? n-1 : n;
+//    for (size_t i = 0; i < matches.size(); i++)
+//    {
+//        if ((int)matches[i].size() != needMatchCount)
+//            badCount++;
+//        else
+//        {
+//            int localBadCount = 0;
+//            for (int k = 0; k < needMatchCount; k++)
+//            {
+//                DMatch match = matches[i][k];
+//                {
+//                    if (i < queryDescCount / 2)
+//                    {
+//                        if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor + k + shift) || (match.imgIdx != 0) )
+//                            localBadCount++;
+//                    }
+//                    else
+//                    {
+//                        if ((match.queryIdx != (int)i) || (match.trainIdx != ((int)i - queryDescCount / 2) * countFactor + k + shift) || (match.imgIdx != 1) )
+//                            localBadCount++;
+//                    }
+//                }
+//            }
+//            badCount += localBadCount > 0 ? 1 : 0;
+//        }
+//    }
+//
+//    CHECK(badCount == 0, TS::FAIL_INVALID_OUTPUT);
+//}
+//
+//
+//
+//
+//
+//
+//
+//
+//
+//
+//
+//
+//
+//
+//
+//
+//
+//
+//
+//
+//
+////struct CV_GpuBruteForceMatcherTest : CV_GpuTestBase
+////{
+////    void run_gpu_test();
+////    
+////    void emptyDataTest();
+////    void dataTest(int dim);
+////    
+////    void generateData(GpuMat& query, GpuMat& train, int dim);
+////
+////    void matchTest(const GpuMat& query, const GpuMat& train);
+////    void knnMatchTest(const GpuMat& query, const GpuMat& train);
+////    void radiusMatchTest(const GpuMat& query, const GpuMat& train);
+////
+////    BruteForceMatcher_GPU< L2<float> > dmatcher;
+////
+////    static const int queryDescCount = 300; // must be even number because we split train data in some cases in two
+////    static const int countFactor = 4; // do not change it
+////};
+////
+////void CV_GpuBruteForceMatcherTest::emptyDataTest()
+////{
+////    GpuMat queryDescriptors, trainDescriptors, mask;
+////    vector<GpuMat> trainDescriptorCollection, masks;
+////    vector<DMatch> matches;
+////    vector< vector<DMatch> > vmatches;
+////
+////    try
+////    {
+////        dmatcher.match(queryDescriptors, trainDescriptors, matches, mask);
+////    }
+////    catch(...)
+////    {
+////        PRINTLN("match() on empty descriptors must not generate exception (1)");
+////        ts->set_failed_test_info(TS::FAIL_EXCEPTION);
+////    }
+////
+////    try
+////    {
+////        dmatcher.knnMatch(queryDescriptors, trainDescriptors, vmatches, 2, mask);
+////    }
+////    catch(...)
+////    {
+////        PRINTLN("knnMatch() on empty descriptors must not generate exception (1)");
+////        ts->set_failed_test_info(TS::FAIL_EXCEPTION);
+////    }
+////
+////    try
+////    {
+////        dmatcher.radiusMatch(queryDescriptors, trainDescriptors, vmatches, 10.f, mask);
+////    }
+////    catch(...)
+////    {
+////        PRINTLN("radiusMatch() on empty descriptors must not generate exception (1)");
+////        ts->set_failed_test_info(TS::FAIL_EXCEPTION);
+////    }
+////
+////    try
+////    {
+////        dmatcher.add(trainDescriptorCollection);
+////    }
+////    catch(...)
+////    {
+////        PRINTLN("add() on empty descriptors must not generate exception");
+////        ts->set_failed_test_info(TS::FAIL_EXCEPTION);
+////    }
+////
+////    try
+////    {
+////        dmatcher.match(queryDescriptors, matches, masks);
+////    }
+////    catch(...)
+////    {
+////        PRINTLN("match() on empty descriptors must not generate exception (2)");
+////        ts->set_failed_test_info(TS::FAIL_EXCEPTION);
+////    }
+////
+////    try
+////    {
+////        dmatcher.knnMatch(queryDescriptors, vmatches, 2, masks);
+////    }
+////    catch(...)
+////    {
+////        PRINTLN("knnMatch() on empty descriptors must not generate exception (2)");
+////        ts->set_failed_test_info(TS::FAIL_EXCEPTION);
+////    }
+////
+////    try
+////    {
+////        dmatcher.radiusMatch( queryDescriptors, vmatches, 10.f, masks );
+////    }
+////    catch(...)
+////    {
+////        PRINTLN("radiusMatch() on empty descriptors must not generate exception (2)");
+////        ts->set_failed_test_info(TS::FAIL_EXCEPTION);
+////    }
+////
+////}
+////
+////void CV_GpuBruteForceMatcherTest::generateData(GpuMat& queryGPU, GpuMat& trainGPU, int dim)
+////{
+////    Mat query, train;
+////    RNG& rng = ts->get_rng();
+////
+////    // Generate query descriptors randomly.
+////    // Descriptor vector elements are integer values.
+////    Mat buf(queryDescCount, dim, CV_32SC1);
+////    rng.fill(buf, RNG::UNIFORM, Scalar::all(0), Scalar(3));
+////    buf.convertTo(query, CV_32FC1);
+////
+////    // Generate train decriptors as follows:
+////    // copy each query descriptor to train set countFactor times
+////    // and perturb some one element of the copied descriptors in
+////    // in ascending order. General boundaries of the perturbation
+////    // are (0.f, 1.f).
+////    train.create( query.rows*countFactor, query.cols, CV_32FC1 );
+////    float step = 1.f / countFactor;
+////    for (int qIdx = 0; qIdx < query.rows; qIdx++)
+////    {
+////        Mat queryDescriptor = query.row(qIdx);
+////        for (int c = 0; c < countFactor; c++)
+////        {
+////            int tIdx = qIdx * countFactor + c;
+////            Mat trainDescriptor = train.row(tIdx);
+////            queryDescriptor.copyTo(trainDescriptor);
+////            int elem = rng(dim);
+////            float diff = rng.uniform(step * c, step * (c + 1));
+////            trainDescriptor.at<float>(0, elem) += diff;
+////        }
+////    }
+////
+////    queryGPU.upload(query);
+////    trainGPU.upload(train);
+////}
+////
+////void CV_GpuBruteForceMatcherTest::matchTest(const GpuMat& query, const GpuMat& train)
+////{
+////    dmatcher.clear();
+////
+////    // test const version of match()
+////    {
+////        vector<DMatch> matches;
+////        dmatcher.match(query, train, matches);
+////
+////        CHECK((int)matches.size() == queryDescCount, TS::FAIL_INVALID_OUTPUT);
+////
+////        int badCount = 0;
+////        for (size_t i = 0; i < matches.size(); i++)
+////        {
+////            DMatch match = matches[i];
+////            if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor) || (match.imgIdx != 0))
+////                badCount++;
+////        }
+////
+////        CHECK(badCount == 0, TS::FAIL_INVALID_OUTPUT);
+////    }
+////
+////    // test version of match() with add()
+////    {
+////        vector<DMatch> matches;
+////
+////        // make add() twice to test such case
+////        dmatcher.add(vector<GpuMat>(1, train.rowRange(0, train.rows/2)));
+////        dmatcher.add(vector<GpuMat>(1, train.rowRange(train.rows/2, train.rows)));
+////
+////        // prepare masks (make first nearest match illegal)
+////        vector<GpuMat> masks(2);
+////        for (int mi = 0; mi < 2; mi++)
+////        {
+////            masks[mi] = GpuMat(query.rows, train.rows/2, CV_8UC1, Scalar::all(1));
+////            for (int di = 0; di < queryDescCount/2; di++)
+////                masks[mi].col(di * countFactor).setTo(Scalar::all(0));
+////        }
+////
+////        dmatcher.match(query, matches, masks);
+////
+////        CHECK((int)matches.size() == queryDescCount, TS::FAIL_INVALID_OUTPUT);
+////
+////        int badCount = 0;
+////        for (size_t i = 0; i < matches.size(); i++)
+////        {
+////            DMatch match = matches[i];
+////            int shift = dmatcher.isMaskSupported() ? 1 : 0;
+////            {
+////                if (i < queryDescCount / 2)
+////                {
+////                    if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor + shift) || (match.imgIdx != 0))
+////                        badCount++;
+////                }
+////                else
+////                {
+////                    if ((match.queryIdx != (int)i) || (match.trainIdx != ((int)i - queryDescCount / 2) * countFactor + shift) || (match.imgIdx != 1))
+////                        badCount++;
+////                }
+////            }
+////        }
+////
+////        CHECK(badCount == 0, TS::FAIL_INVALID_OUTPUT);
+////    }
+////}
+////
+////void CV_GpuBruteForceMatcherTest::knnMatchTest(const GpuMat& query, const GpuMat& train)
+////{
+////    dmatcher.clear();
+////
+////    // test const version of knnMatch()
+////    {
+////        const int knn = 3;
+////
+////        vector< vector<DMatch> > matches;
+////        dmatcher.knnMatch(query, train, matches, knn);
+////
+////        CHECK((int)matches.size() == queryDescCount, TS::FAIL_INVALID_OUTPUT);
+////
+////        int badCount = 0;
+////        for (size_t i = 0; i < matches.size(); i++)
+////        {
+////            if ((int)matches[i].size() != knn)
+////                badCount++;
+////            else
+////            {
+////                int localBadCount = 0;
+////                for (int k = 0; k < knn; k++)
+////                {
+////                    DMatch match = matches[i][k];
+////                    if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor + k) || (match.imgIdx != 0))
+////                        localBadCount++;
+////                }
+////                badCount += localBadCount > 0 ? 1 : 0;
+////            }
+////        }
+////
+////        CHECK(badCount == 0, TS::FAIL_INVALID_OUTPUT);
+////    }
+////
+////    // test version of knnMatch() with add()
+////    {
+////        const int knn = 2;
+////        vector<vector<DMatch> > matches;
+////
+////        // make add() twice to test such case
+////        dmatcher.add(vector<GpuMat>(1,train.rowRange(0, train.rows / 2)));
+////        dmatcher.add(vector<GpuMat>(1,train.rowRange(train.rows / 2, train.rows)));
+////
+////        // prepare masks (make first nearest match illegal)
+////        vector<GpuMat> masks(2);
+////        for (int mi = 0; mi < 2; mi++ )
+////        {
+////            masks[mi] = GpuMat(query.rows, train.rows / 2, CV_8UC1, Scalar::all(1));
+////            for (int di = 0; di < queryDescCount / 2; di++)
+////                masks[mi].col(di * countFactor).setTo(Scalar::all(0));
+////        }
+////
+////        dmatcher.knnMatch(query, matches, knn, masks);
+////
+////        CHECK((int)matches.size() == queryDescCount, TS::FAIL_INVALID_OUTPUT);
+////
+////        int badCount = 0;
+////        int shift = dmatcher.isMaskSupported() ? 1 : 0;
+////        for (size_t i = 0; i < matches.size(); i++)
+////        {
+////            if ((int)matches[i].size() != knn)
+////                badCount++;
+////            else
+////            {
+////                int localBadCount = 0;
+////                for (int k = 0; k < knn; k++)
+////                {
+////                    DMatch match = matches[i][k];
+////                    {
+////                        if (i < queryDescCount / 2)
+////                        {
+////                            if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor + k + shift) || (match.imgIdx != 0) )
+////                                localBadCount++;
+////                        }
+////                        else
+////                        {
+////                            if ((match.queryIdx != (int)i) || (match.trainIdx != ((int)i - queryDescCount / 2) * countFactor + k + shift) || (match.imgIdx != 1) )
+////                                localBadCount++;
+////                        }
+////                    }
+////                }
+////                badCount += localBadCount > 0 ? 1 : 0;
+////            }
+////        }
+////
+////        CHECK(badCount == 0, TS::FAIL_INVALID_OUTPUT);
+////    }
+////}
+////
+////void CV_GpuBruteForceMatcherTest::radiusMatchTest(const GpuMat& query, const GpuMat& train)
+////{
+////    CHECK_RETURN(support(GLOBAL_ATOMICS), TS::SKIPPED);
+////
+////    dmatcher.clear();
+////
+////    // test const version of match()
+////    {
+////        const float radius = 1.f / countFactor;
+////
+////        vector< vector<DMatch> > matches;
+////        dmatcher.radiusMatch(query, train, matches, radius);
+////
+////        CHECK((int)matches.size() == queryDescCount, TS::FAIL_INVALID_OUTPUT);
+////
+////        int badCount = 0;
+////        for (size_t i = 0; i < matches.size(); i++)
+////        {
+////            if ((int)matches[i].size() != 1)
+////                badCount++;
+////            else
+////            {
+////                DMatch match = matches[i][0];
+////                if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i*countFactor) || (match.imgIdx != 0))
+////                    badCount++;
+////            }
+////        }
+////
+////        CHECK(badCount == 0, TS::FAIL_INVALID_OUTPUT);
+////    }
+////
+////    // test version of match() with add()
+////    {
+////        int n = 3;
+////        const float radius = 1.f / countFactor * n;
+////        vector< vector<DMatch> > matches;
+////
+////        // make add() twice to test such case
+////        dmatcher.add(vector<GpuMat>(1,train.rowRange(0, train.rows / 2)));
+////        dmatcher.add(vector<GpuMat>(1,train.rowRange(train.rows / 2, train.rows)));
+////
+////        // prepare masks (make first nearest match illegal)
+////        vector<GpuMat> masks(2);
+////        for (int mi = 0; mi < 2; mi++)
+////        {
+////            masks[mi] = GpuMat(query.rows, train.rows / 2, CV_8UC1, Scalar::all(1));
+////            for (int di = 0; di < queryDescCount / 2; di++)
+////                masks[mi].col(di * countFactor).setTo(Scalar::all(0));
+////        }
+////
+////        dmatcher.radiusMatch(query, matches, radius, masks);
+////
+////        CHECK((int)matches.size() == queryDescCount, TS::FAIL_INVALID_OUTPUT);
+////
+////        int badCount = 0;
+////        int shift = dmatcher.isMaskSupported() ? 1 : 0;
+////        int needMatchCount = dmatcher.isMaskSupported() ? n-1 : n;
+////        for (size_t i = 0; i < matches.size(); i++)
+////        {
+////            if ((int)matches[i].size() != needMatchCount)
+////                badCount++;
+////            else
+////            {
+////                int localBadCount = 0;
+////                for (int k = 0; k < needMatchCount; k++)
+////                {
+////                    DMatch match = matches[i][k];
+////                    {
+////                        if (i < queryDescCount / 2)
+////                        {
+////                            if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor + k + shift) || (match.imgIdx != 0) )
+////                                localBadCount++;
+////                        }
+////                        else
+////                        {
+////                            if ((match.queryIdx != (int)i) || (match.trainIdx != ((int)i - queryDescCount / 2) * countFactor + k + shift) || (match.imgIdx != 1) )
+////                                localBadCount++;
+////                        }
+////                    }
+////                }
+////                badCount += localBadCount > 0 ? 1 : 0;
+////            }
+////        }
+////
+////        CHECK(badCount == 0, TS::FAIL_INVALID_OUTPUT);
+////    }
+////}
+////
+////void CV_GpuBruteForceMatcherTest::dataTest(int dim)
+////{
+////    GpuMat query, train;
+////    generateData(query, train, dim);
+////
+////    matchTest(query, train);
+////    knnMatchTest(query, train);
+////    radiusMatchTest(query, train);
+////
+////    dmatcher.clear();
+////}
+////
+////void CV_GpuBruteForceMatcherTest::run_gpu_test()
+////{
+////    emptyDataTest();
+////
+////    dataTest(50);
+////    dataTest(64);
+////    dataTest(100);
+////    dataTest(128);
+////    dataTest(200);
+////    dataTest(256);
+////    dataTest(300);
+////}
+////
+////TEST(BruteForceMatcher, accuracy) { CV_GpuBruteForceMatcherTest test; test.safe_run(); }
diff --git a/modules/gpu/test/test_filters.cpp b/modules/gpu/test/test_filters.cpp
index 91d1f14387..77846dde4c 100644
--- a/modules/gpu/test/test_filters.cpp
+++ b/modules/gpu/test/test_filters.cpp
@@ -39,329 +39,516 @@
 //
 //M*/
 
-#include <iostream>
-#include <cmath>
-#include <limits>
 #include "test_precomp.hpp"
 
-using namespace cv;
-using namespace std;
-using namespace gpu;
+#ifdef HAVE_CUDA
 
-class CV_GpuNppFilterTest : public cvtest::BaseTest
+struct FilterTest
 {
-public:
-    CV_GpuNppFilterTest(const char* /*test_name*/, const char* /*test_funcs*/) {}
-    virtual ~CV_GpuNppFilterTest() {}
+    static cv::Mat img_rgba;
+    static cv::Mat img_gray;
 
-protected:
-    void run(int);
-    virtual int test(const Mat& img) = 0;
-    
-    int test8UC1(const Mat& img)
+    static void SetUpTestCase() 
     {
-        cv::Mat img_C1;
-        cvtColor(img, img_C1, CV_BGR2GRAY);
-        return test(img_C1);
+        cv::Mat img = readImage("stereobp/aloe-L.png");
+        cv::cvtColor(img, img_rgba, CV_BGR2BGRA);
+        cv::cvtColor(img, img_gray, CV_BGR2GRAY);
     }
 
-    int test8UC4(const Mat& img)
+    static void TearDownTestCase() 
     {
-        cv::Mat img_C4;
-        cvtColor(img, img_C4, CV_BGR2BGRA);
-        return test(img_C4);
+        img_rgba.release();
+        img_gray.release();
     }
-        
-    int CheckNorm(const Mat& m1, const Mat& m2, const Size& ksize)
-    {
-        Rect roi = Rect(ksize.width, ksize.height, m1.cols - 2 * ksize.width, m1.rows - 2 * ksize.height);
-        Mat m1ROI = m1(roi);
-        Mat m2ROI = m2(roi);
+};
 
-        double res = norm(m1ROI, m2ROI, NORM_INF);
+cv::Mat FilterTest::img_rgba;
+cv::Mat FilterTest::img_gray;
 
-        // Max difference (2.0) in GaussianBlur
-        if (res <= 2)
-            return cvtest::TS::OK;
-        
-        ts->printf(cvtest::TS::LOG, "Norm: %f\n", res);
-        return cvtest::TS::FAIL_GENERIC;
+static double checkNorm(const cv::Mat& m1, const cv::Mat& m2, const cv::Size& ksize)
+{
+    cv::Rect roi(ksize.width, ksize.height, m1.cols - 2 * ksize.width, m1.rows - 2 * ksize.height);
+    cv::Mat m1ROI = m1(roi);
+    cv::Mat m2ROI = m2(roi);
+    return checkNorm(m1ROI, m2ROI);
+}
+
+static double checkNorm(const cv::Mat& m1, const cv::Mat& m2, int ksize)
+{
+    return checkNorm(m1, m2, cv::Size(ksize, ksize));
+}
+
+#define EXPECT_MAT_NEAR_KSIZE(mat1, mat2, ksize, eps) \
+    { \
+        ASSERT_EQ(mat1.type(), mat2.type()); \
+        ASSERT_EQ(mat1.size(), mat2.size()); \
+        EXPECT_LE(checkNorm(mat1, mat2, ksize), eps); \
     }
-};
 
-void CV_GpuNppFilterTest::run( int )
-{    
-    cv::Mat img = cv::imread(std::string(ts->get_data_path()) + "stereobp/aloe-L.png");
+/////////////////////////////////////////////////////////////////////////////////////////////////
+// blur
+
+struct Blur : FilterTest, testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int, int> >
+{
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size ksize;
+
+    cv::Mat dst_gold_rgba;
+    cv::Mat dst_gold_gray;
+
+    using FilterTest::SetUpTestCase;
+    using FilterTest::TearDownTestCase;
     
-    if (img.empty())
+    virtual void SetUp()
     {
-        ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA);
-        return;
+        devInfo = std::tr1::get<0>(GetParam());
+        ksize = cv::Size(std::tr1::get<1>(GetParam()), std::tr1::get<2>(GetParam()));
+
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        cv::blur(img_rgba, dst_gold_rgba, ksize);
+        cv::blur(img_gray, dst_gold_gray, ksize);
     }
+};
+
+TEST_P(Blur, Accuracy)
+{
+    ASSERT_TRUE(!img_rgba.empty() && !img_gray.empty());
+
+    PRINT_PARAM(devInfo);
+    PRINT_PARAM(ksize);
 
-    //run tests
-    int testResult = cvtest::TS::OK;
+    cv::Mat dst_rgba;
+    cv::Mat dst_gray;
 
-    if (test8UC1(img) != cvtest::TS::OK)
-        testResult = cvtest::TS::FAIL_GENERIC;
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat dev_dst_rgba;
+        cv::gpu::GpuMat dev_dst_gray;
 
-    if (test8UC4(img) != cvtest::TS::OK)
-        testResult = cvtest::TS::FAIL_GENERIC;
+        cv::gpu::blur(cv::gpu::GpuMat(img_rgba), dev_dst_rgba, ksize);
+        cv::gpu::blur(cv::gpu::GpuMat(img_gray), dev_dst_gray, ksize);
 
-    ts->set_failed_test_info(testResult);
+        dev_dst_rgba.download(dst_rgba);
+        dev_dst_gray.download(dst_gray);
+    );
 
-    ts->set_failed_test_info(cvtest::TS::OK);
+    EXPECT_MAT_NEAR_KSIZE(dst_gold_rgba, dst_rgba, ksize, 1.0);
+    EXPECT_MAT_NEAR_KSIZE(dst_gold_gray, dst_gray, ksize, 1.0);
 }
 
-////////////////////////////////////////////////////////////////////////////////
-// blur
-struct CV_GpuNppImageBlurTest : public CV_GpuNppFilterTest
+INSTANTIATE_TEST_CASE_P(Filter, Blur, testing::Combine(
+                        testing::ValuesIn(devices()), 
+                        testing::Values(3, 5, 7), 
+                        testing::Values(3, 5, 7)));
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+// sobel
+
+struct Sobel : FilterTest, testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int, std::pair<int, int> > >
 {
-    CV_GpuNppImageBlurTest() : CV_GpuNppFilterTest( "GPU-NppImageBlur", "blur" ) {}
+    cv::gpu::DeviceInfo devInfo;
+    int ksize;
+    int dx, dy;
+
+    cv::Mat dst_gold_rgba;
+    cv::Mat dst_gold_gray;
 
-    int test(const Mat& img)
+    using FilterTest::SetUpTestCase;
+    using FilterTest::TearDownTestCase;
+    
+    virtual void SetUp()
     {
-        int ksizes[] = {3, 5, 7};
-        int ksizes_num = sizeof(ksizes) / sizeof(int);
+        devInfo = std::tr1::get<0>(GetParam());
+        ksize = std::tr1::get<1>(GetParam());
+        std::pair<int, int> d = std::tr1::get<2>(GetParam());
+        dx = d.first; dy = d.second;
 
-        int test_res = cvtest::TS::OK;
+        cv::gpu::setDevice(devInfo.deviceID());
+        
+        cv::Sobel(img_rgba, dst_gold_rgba, -1, dx, dy, ksize);
+        cv::Sobel(img_gray, dst_gold_gray, -1, dx, dy, ksize);
+    }
+};
+
+TEST_P(Sobel, Accuracy)
+{
+    ASSERT_TRUE(!img_rgba.empty() && !img_gray.empty());
+
+    PRINT_PARAM(devInfo);
+    PRINT_PARAM(ksize);
+    PRINT_PARAM(dx);
+    PRINT_PARAM(dy);
+
+    cv::Mat dst_rgba;
+    cv::Mat dst_gray;
 
-        for (int i = 0; i < ksizes_num; ++i)
-        {
-            for (int j = 0; j < ksizes_num; ++j)
-            {
-                Size ksize(ksizes[i], ksizes[j]);
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat dev_dst_rgba;
+        cv::gpu::GpuMat dev_dst_gray;
 
-                ts->printf(cvtest::TS::LOG, "\nksize = (%dx%d)\n", ksizes[i], ksizes[j]);
+        cv::gpu::Sobel(cv::gpu::GpuMat(img_rgba), dev_dst_rgba, -1, dx, dy, ksize);
+        cv::gpu::Sobel(cv::gpu::GpuMat(img_gray), dev_dst_gray, -1, dx, dy, ksize);
 
-                Mat cpudst;
-                cv::blur(img, cpudst, ksize);
+        dev_dst_rgba.download(dst_rgba);
+        dev_dst_gray.download(dst_gray);
+    );
 
-                GpuMat gpu1(img);
-                GpuMat gpudst;
-                cv::gpu::blur(gpu1, gpudst, ksize);
+    EXPECT_MAT_NEAR_KSIZE(dst_gold_rgba, dst_rgba, ksize, 0.0);
+    EXPECT_MAT_NEAR_KSIZE(dst_gold_gray, dst_gray, ksize, 0.0);
+}
+
+INSTANTIATE_TEST_CASE_P(Filter, Sobel, testing::Combine(
+                        testing::ValuesIn(devices()), 
+                        testing::Values(3, 5, 7), 
+                        testing::Values(std::make_pair(1, 0), std::make_pair(0, 1), std::make_pair(1, 1), std::make_pair(2, 0), std::make_pair(2, 1), std::make_pair(0, 2), std::make_pair(1, 2), std::make_pair(2, 2))));
 
-                if (CheckNorm(cpudst, gpudst, ksize) != cvtest::TS::OK)
-                    test_res = cvtest::TS::FAIL_GENERIC;
-            }
-        }
+/////////////////////////////////////////////////////////////////////////////////////////////////
+// scharr
+
+struct Scharr : FilterTest, testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, std::pair<int, int> > >
+{
+    cv::gpu::DeviceInfo devInfo;
+    int dx, dy;
 
-        return test_res;
+    cv::Mat dst_gold_rgba;
+    cv::Mat dst_gold_gray;
+
+    using FilterTest::SetUpTestCase;
+    using FilterTest::TearDownTestCase;
+    
+    virtual void SetUp()
+    {
+        devInfo = std::tr1::get<0>(GetParam());
+        std::pair<int, int> d = std::tr1::get<1>(GetParam());
+        dx = d.first; dy = d.second;
+
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        cv::Scharr(img_rgba, dst_gold_rgba, -1, dx, dy);
+        cv::Scharr(img_gray, dst_gold_gray, -1, dx, dy);
     }
 };
 
-////////////////////////////////////////////////////////////////////////////////
-// Sobel
-struct CV_GpuNppImageSobelTest : public CV_GpuNppFilterTest
+TEST_P(Scharr, Accuracy)
 {
-    CV_GpuNppImageSobelTest() : CV_GpuNppFilterTest( "GPU-NppImageSobel", "Sobel" ) {}
+    ASSERT_TRUE(!img_rgba.empty() && !img_gray.empty());
 
-    int test(const Mat& img)
-    {
-        int ksizes[] = {3, 5, 7};
-        int ksizes_num = sizeof(ksizes) / sizeof(int);
+    PRINT_PARAM(devInfo);
+    PRINT_PARAM(dx);
+    PRINT_PARAM(dy);
 
-        int dx = 1, dy = 0;
+    cv::Mat dst_rgba;
+    cv::Mat dst_gray;
 
-        int test_res = cvtest::TS::OK;
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat dev_dst_rgba;
+        cv::gpu::GpuMat dev_dst_gray;
 
-        for (int i = 0; i < ksizes_num; ++i)
-        {
-            ts->printf(cvtest::TS::LOG, "\nksize = %d\n", ksizes[i]);
+        cv::gpu::Scharr(cv::gpu::GpuMat(img_rgba), dev_dst_rgba, -1, dx, dy);
+        cv::gpu::Scharr(cv::gpu::GpuMat(img_gray), dev_dst_gray, -1, dx, dy);
 
-            Mat cpudst;
-            cv::Sobel(img, cpudst, -1, dx, dy, ksizes[i]);
+        dev_dst_rgba.download(dst_rgba);
+        dev_dst_gray.download(dst_gray);
+    );
 
-            GpuMat gpu1(img);
-            GpuMat gpudst;
-            cv::gpu::Sobel(gpu1, gpudst, -1, dx, dy, ksizes[i]);
+    EXPECT_MAT_NEAR_KSIZE(dst_gold_rgba, dst_rgba, 3, 0.0);
+    EXPECT_MAT_NEAR_KSIZE(dst_gold_gray, dst_gray, 3, 0.0);
+}
 
-            if (CheckNorm(cpudst, gpudst, Size(ksizes[i], ksizes[i])) != cvtest::TS::OK)
-                test_res = cvtest::TS::FAIL_GENERIC;
-        }
+INSTANTIATE_TEST_CASE_P(Filter, Scharr, testing::Combine(
+                        testing::ValuesIn(devices()),
+                        testing::Values(std::make_pair(1, 0), std::make_pair(0, 1))));
 
-        return test_res;
-    }
-};
+/////////////////////////////////////////////////////////////////////////////////////////////////
+// gaussianBlur
 
-////////////////////////////////////////////////////////////////////////////////
-// Scharr
-struct CV_GpuNppImageScharrTest : public CV_GpuNppFilterTest
+struct GaussianBlur : FilterTest, testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int, int> >
 {
-    CV_GpuNppImageScharrTest() : CV_GpuNppFilterTest( "GPU-NppImageScharr", "Scharr" ) {}
+    cv::gpu::DeviceInfo devInfo;
+    cv::Size ksize;
+
+    double sigma1, sigma2;
 
-    int test(const Mat& img)
+    cv::Mat dst_gold_rgba;
+    cv::Mat dst_gold_gray;
+
+    using FilterTest::SetUpTestCase;
+    using FilterTest::TearDownTestCase;
+    
+    virtual void SetUp()
     {
-        int dx = 1, dy = 0;
+        devInfo = std::tr1::get<0>(GetParam());
+        ksize = cv::Size(std::tr1::get<1>(GetParam()), std::tr1::get<2>(GetParam()));
 
-        Mat cpudst;
-        cv::Scharr(img, cpudst, -1, dx, dy);
+        cv::gpu::setDevice(devInfo.deviceID());
+        
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
 
-        GpuMat gpu1(img);
-        GpuMat gpudst;
-        cv::gpu::Scharr(gpu1, gpudst, -1, dx, dy);
-                
-        return CheckNorm(cpudst, gpudst, Size(3, 3));
+        sigma1 = rng.uniform(0.1, 1.0); 
+        sigma2 = rng.uniform(0.1, 1.0);
+        
+        cv::GaussianBlur(img_rgba, dst_gold_rgba, ksize, sigma1, sigma2);
+        cv::GaussianBlur(img_gray, dst_gold_gray, ksize, sigma1, sigma2);
     }
 };
 
-
-////////////////////////////////////////////////////////////////////////////////
-// GaussianBlur
-struct CV_GpuNppImageGaussianBlurTest : public CV_GpuNppFilterTest
+TEST_P(GaussianBlur, Accuracy)
 {
-    CV_GpuNppImageGaussianBlurTest() : CV_GpuNppFilterTest( "GPU-NppImageGaussianBlur", "GaussianBlur" ) {}
+    ASSERT_TRUE(!img_rgba.empty() && !img_gray.empty());
 
-    int test(const Mat& img)
-    {
-        int ksizes[] = {3, 5, 7};
-        int ksizes_num = sizeof(ksizes) / sizeof(int);
+    PRINT_PARAM(devInfo);
+    PRINT_PARAM(ksize);
+    PRINT_PARAM(sigma1);
+    PRINT_PARAM(sigma2);
 
-        int test_res = cvtest::TS::OK;
+    cv::Mat dst_rgba;
+    cv::Mat dst_gray;
 
-        const double sigma1 = 3.0;
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat dev_dst_rgba;
+        cv::gpu::GpuMat dev_dst_gray;
 
-        for (int i = 0; i < ksizes_num; ++i)
-        {
-            for (int j = 0; j < ksizes_num; ++j)
-            {
-                cv::Size ksize(ksizes[i], ksizes[j]);
+        cv::gpu::GaussianBlur(cv::gpu::GpuMat(img_rgba), dev_dst_rgba, ksize, sigma1, sigma2);
+        cv::gpu::GaussianBlur(cv::gpu::GpuMat(img_gray), dev_dst_gray, ksize, sigma1, sigma2);
 
-                ts->printf(cvtest::TS::LOG, "ksize = (%dx%d)\t\n", ksizes[i], ksizes[j]);
+        dev_dst_rgba.download(dst_rgba);
+        dev_dst_gray.download(dst_gray);
+    );
 
-                Mat cpudst;
-                cv::GaussianBlur(img, cpudst, ksize, sigma1);
+    EXPECT_MAT_NEAR_KSIZE(dst_gold_rgba, dst_rgba, ksize, 3.0);
+    EXPECT_MAT_NEAR_KSIZE(dst_gold_gray, dst_gray, ksize, 3.0);
+}
+
+INSTANTIATE_TEST_CASE_P(Filter, GaussianBlur, testing::Combine(
+                        testing::ValuesIn(devices()), 
+                        testing::Values(3, 5, 7), 
+                        testing::Values(3, 5, 7)));
 
-                GpuMat gpu1(img);
-                GpuMat gpudst;
-                cv::gpu::GaussianBlur(gpu1, gpudst, ksize, sigma1);
+/////////////////////////////////////////////////////////////////////////////////////////////////
+// laplacian
+
+struct Laplacian : FilterTest, testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
+{
+    cv::gpu::DeviceInfo devInfo;
+    int ksize;
+
+    cv::Mat dst_gold_rgba;
+    cv::Mat dst_gold_gray;
+
+    using FilterTest::SetUpTestCase;
+    using FilterTest::TearDownTestCase;
+    
+    virtual void SetUp()
+    {
+        devInfo = std::tr1::get<0>(GetParam());
+        ksize = std::tr1::get<1>(GetParam());
 
-                if (CheckNorm(cpudst, gpudst, ksize) != cvtest::TS::OK)
-                    test_res = cvtest::TS::FAIL_GENERIC;
-            }
-        }
+        cv::gpu::setDevice(devInfo.deviceID());
 
-        return test_res;
+        cv::Laplacian(img_rgba, dst_gold_rgba, -1, ksize);
+        cv::Laplacian(img_gray, dst_gold_gray, -1, ksize);
     }
 };
 
-////////////////////////////////////////////////////////////////////////////////
-// Laplacian
-struct CV_GpuNppImageLaplacianTest : public CV_GpuNppFilterTest
+TEST_P(Laplacian, Accuracy)
 {
-    CV_GpuNppImageLaplacianTest() : CV_GpuNppFilterTest( "GPU-NppImageLaplacian", "Laplacian" ) {}
+    ASSERT_TRUE(!img_rgba.empty() && !img_gray.empty());
 
-    int test(const Mat& img)
-    {
-        int ksizes[] = {1, 3};
-        int ksizes_num = sizeof(ksizes) / sizeof(int);
+    PRINT_PARAM(devInfo);
+    PRINT_PARAM(ksize);
 
-        int test_res = cvtest::TS::OK;
+    cv::Mat dst_rgba;
+    cv::Mat dst_gray;
 
-        for (int i = 0; i < ksizes_num; ++i)
-        {
-            ts->printf(cvtest::TS::LOG, "\nksize = %d\n", ksizes[i]);
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat dev_dst_rgba;
+        cv::gpu::GpuMat dev_dst_gray;
 
-            Mat cpudst;
-            cv::Laplacian(img, cpudst, -1, ksizes[i]);
+        cv::gpu::Laplacian(cv::gpu::GpuMat(img_rgba), dev_dst_rgba, -1, ksize);
+        cv::gpu::Laplacian(cv::gpu::GpuMat(img_gray), dev_dst_gray, -1, ksize);
 
-            GpuMat gpu1(img);
-            GpuMat gpudst;
-            cv::gpu::Laplacian(gpu1, gpudst, -1, ksizes[i]);
+        dev_dst_rgba.download(dst_rgba);
+        dev_dst_gray.download(dst_gray);
+    );
 
-            if (CheckNorm(cpudst, gpudst, Size(3, 3)) != cvtest::TS::OK)
-                test_res = cvtest::TS::FAIL_GENERIC;
-        }
+    EXPECT_MAT_NEAR_KSIZE(dst_gold_rgba, dst_rgba, 3, 0.0);
+    EXPECT_MAT_NEAR_KSIZE(dst_gold_gray, dst_gray, 3, 0.0);
+}
 
-        return test_res;
-    }
-};
+INSTANTIATE_TEST_CASE_P(Filter, Laplacian, testing::Combine(
+                        testing::ValuesIn(devices()),
+                        testing::Values(1, 3)));
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+// erode
 
-////////////////////////////////////////////////////////////////////////////////
-// Erode
-class CV_GpuErodeTest : public CV_GpuNppFilterTest
+struct Erode : FilterTest, testing::TestWithParam<cv::gpu::DeviceInfo>
 {
-public:
-    CV_GpuErodeTest() : CV_GpuNppFilterTest( "GPU-NppErode", "erode" ) {} 
+    cv::gpu::DeviceInfo devInfo;
+
+    cv::Mat kernel;
 
-protected:
-	virtual int test(const Mat& img)
+    cv::Mat dst_gold_rgba;
+    cv::Mat dst_gold_gray;
+
+    using FilterTest::SetUpTestCase;
+    using FilterTest::TearDownTestCase;
+    
+    virtual void SetUp()
     {
-        Mat kernel(Mat::ones(3, 3, CV_8U));
+        devInfo = GetParam();
 
-	    cv::Mat cpuRes;
-        cv::erode(img, cpuRes, kernel);
+        cv::gpu::setDevice(devInfo.deviceID());
 
-	    GpuMat gpuRes;
-        cv::gpu::erode(GpuMat(img), gpuRes, kernel);
+        kernel = cv::Mat::ones(3, 3, CV_8U);
 
-	    return CheckNorm(cpuRes, gpuRes, Size(3, 3));
+        cv::erode(img_rgba, dst_gold_rgba, kernel);
+        cv::erode(img_gray, dst_gold_gray, kernel);
     }
 };
 
-////////////////////////////////////////////////////////////////////////////////
-// Dilate
-class CV_GpuDilateTest : public CV_GpuNppFilterTest
+TEST_P(Erode, Accuracy)
+{
+    ASSERT_TRUE(!img_rgba.empty() && !img_gray.empty());
+
+    PRINT_PARAM(devInfo);
+
+    cv::Mat dst_rgba;
+    cv::Mat dst_gray;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat dev_dst_rgba;
+        cv::gpu::GpuMat dev_dst_gray;
+
+        cv::gpu::erode(cv::gpu::GpuMat(img_rgba), dev_dst_rgba, kernel);
+        cv::gpu::erode(cv::gpu::GpuMat(img_gray), dev_dst_gray, kernel);
+
+        dev_dst_rgba.download(dst_rgba);
+        dev_dst_gray.download(dst_gray);
+    );
+
+    EXPECT_MAT_NEAR_KSIZE(dst_gold_rgba, dst_rgba, 3, 0.0);
+    EXPECT_MAT_NEAR_KSIZE(dst_gold_gray, dst_gray, 3, 0.0);
+}
+
+INSTANTIATE_TEST_CASE_P(Filter, Erode, testing::ValuesIn(devices()));
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+// dilate
+
+struct Dilate : FilterTest, testing::TestWithParam<cv::gpu::DeviceInfo>
 {
-public:
-    CV_GpuDilateTest() : CV_GpuNppFilterTest( "GPU-NppDilate", "dilate" ) {} 
+    cv::gpu::DeviceInfo devInfo;
+
+    cv::Mat kernel;
 
-protected:
-	virtual int test(const Mat& img)
+    cv::Mat dst_gold_rgba;
+    cv::Mat dst_gold_gray;
+
+    using FilterTest::SetUpTestCase;
+    using FilterTest::TearDownTestCase;
+    
+    virtual void SetUp()
     {
-        Mat kernel(Mat::ones(3, 3, CV_8U));
+        devInfo = GetParam();
 
-	    cv::Mat cpuRes;
-        cv::dilate(img, cpuRes, kernel);
+        cv::gpu::setDevice(devInfo.deviceID());
 
-	    GpuMat gpuRes;
-        cv::gpu::dilate(GpuMat(img), gpuRes, kernel);
-	
-	    return CheckNorm(cpuRes, gpuRes, Size(3, 3));
+        kernel = cv::Mat::ones(3, 3, CV_8U);
+
+        cv::dilate(img_rgba, dst_gold_rgba, kernel);
+        cv::dilate(img_gray, dst_gold_gray, kernel);
     }
 };
 
-////////////////////////////////////////////////////////////////////////////////
-// MorphologyEx
-class CV_GpuMorphExTest : public CV_GpuNppFilterTest
+TEST_P(Dilate, Accuracy)
 {
-public:
-    CV_GpuMorphExTest() : CV_GpuNppFilterTest( "GPU-NppMorphologyEx", "morphologyEx" ) {} 
+    ASSERT_TRUE(!img_rgba.empty() && !img_gray.empty());
 
-protected:
-	virtual int test(const Mat& img)
-    {
-        static int ops[] = { MORPH_OPEN, CV_MOP_CLOSE, CV_MOP_GRADIENT, CV_MOP_TOPHAT, CV_MOP_BLACKHAT};
-        const char *names[] = { "MORPH_OPEN", "CV_MOP_CLOSE", "CV_MOP_GRADIENT", "CV_MOP_TOPHAT", "CV_MOP_BLACKHAT"};
-        int num = sizeof(ops)/sizeof(ops[0]);
+    PRINT_PARAM(devInfo);
+
+    cv::Mat dst_rgba;
+    cv::Mat dst_gray;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat dev_dst_rgba;
+        cv::gpu::GpuMat dev_dst_gray;
+
+        cv::gpu::dilate(cv::gpu::GpuMat(img_rgba), dev_dst_rgba, kernel);
+        cv::gpu::dilate(cv::gpu::GpuMat(img_gray), dev_dst_gray, kernel);
 
-        GpuMat kernel(Mat::ones(3, 3, CV_8U));
+        dev_dst_rgba.download(dst_rgba);
+        dev_dst_gray.download(dst_gray);
+    );
 
-        int res = cvtest::TS::OK;
+    EXPECT_MAT_NEAR_KSIZE(dst_gold_rgba, dst_rgba, 3, 0.0);
+    EXPECT_MAT_NEAR_KSIZE(dst_gold_gray, dst_gray, 3, 0.0);
+}
+
+INSTANTIATE_TEST_CASE_P(Filter, Dilate, testing::ValuesIn(devices()));
 
-        for(int i = 0; i < num; ++i)
-        {
-            ts->printf(cvtest::TS::LOG, "Tesing %s\n", names[i]);
+/////////////////////////////////////////////////////////////////////////////////////////////////
+// morphEx
 
-	        cv::Mat cpuRes;
-            cv::morphologyEx(img, cpuRes, ops[i], (Mat)kernel);
+static const int morphOps[] = {cv::MORPH_OPEN, CV_MOP_CLOSE, CV_MOP_GRADIENT, CV_MOP_TOPHAT, CV_MOP_BLACKHAT};
+static const char* morphOps_str[] = {"MORPH_OPEN", "MOP_CLOSE", "MOP_GRADIENT", "MOP_TOPHAT", "MOP_BLACKHAT"};
+
+struct MorphEx : FilterTest, testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
+{
+    cv::gpu::DeviceInfo devInfo;
+    int morphOpsIdx;
 
-	        GpuMat gpuRes;
-            cv::gpu::morphologyEx(GpuMat(img), gpuRes, ops[i], kernel);
+    cv::Mat kernel;
 
-            if (cvtest::TS::OK != CheckNorm(cpuRes, gpuRes, Size(4, 4)))
-                res = cvtest::TS::FAIL_GENERIC;
-        }
-        return res;
+    cv::Mat dst_gold_rgba;
+    cv::Mat dst_gold_gray;
+
+    using FilterTest::SetUpTestCase;
+    using FilterTest::TearDownTestCase;
+    
+    virtual void SetUp()
+    {
+        devInfo = std::tr1::get<0>(GetParam());
+        morphOpsIdx = std::tr1::get<1>(GetParam());
+
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        kernel = cv::Mat::ones(3, 3, CV_8U);
+
+        cv::morphologyEx(img_rgba, dst_gold_rgba, morphOps[morphOpsIdx], kernel);
+        cv::morphologyEx(img_gray, dst_gold_gray, morphOps[morphOpsIdx], kernel);
     }
 };
 
+TEST_P(MorphEx, Accuracy)
+{
+    ASSERT_TRUE(!img_rgba.empty() && !img_gray.empty());
+
+    const char* morphOpStr = morphOps_str[morphOpsIdx];
+
+    PRINT_PARAM(devInfo);
+    PRINT_PARAM(morphOpStr);
+
+    cv::Mat dst_rgba;
+    cv::Mat dst_gray;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat dev_dst_rgba;
+        cv::gpu::GpuMat dev_dst_gray;
+
+        cv::gpu::morphologyEx(cv::gpu::GpuMat(img_rgba), dev_dst_rgba, morphOps[morphOpsIdx], cv::gpu::GpuMat(kernel));
+        cv::gpu::morphologyEx(cv::gpu::GpuMat(img_gray), dev_dst_gray, morphOps[morphOpsIdx], cv::gpu::GpuMat(kernel));
+
+        dev_dst_rgba.download(dst_rgba);
+        dev_dst_gray.download(dst_gray);
+    );
+
+    EXPECT_MAT_NEAR_KSIZE(dst_gold_rgba, dst_rgba, 4, 0.0);
+    EXPECT_MAT_NEAR_KSIZE(dst_gold_gray, dst_gray, 4, 0.0);
+}
+
+INSTANTIATE_TEST_CASE_P(Filter, MorphEx, testing::Combine(
+                        testing::ValuesIn(devices()),
+                        testing::Range(0, 5)));
 
-TEST(blur, accuracy) { CV_GpuNppImageBlurTest test; test.safe_run(); }
-TEST(sobel, accuracy) { CV_GpuNppImageSobelTest test; test.safe_run(); }
-TEST(scharr, accuracy) { CV_GpuNppImageScharrTest test; test.safe_run(); }
-TEST(gaussianBlur, accuracy) { CV_GpuNppImageGaussianBlurTest test; test.safe_run(); }
-TEST(laplcaian, accuracy) { CV_GpuNppImageLaplacianTest test; test.safe_run(); }
-TEST(erode, accuracy) { CV_GpuErodeTest test; test.safe_run(); }
-TEST(dilate, accuracy) { CV_GpuDilateTest test; test.safe_run(); }
-TEST(morphEx, accuracy) { CV_GpuMorphExTest test;  test.safe_run(); }
+#endif // HAVE_CUDA
diff --git a/modules/gpu/test/test_gpu_base.cpp b/modules/gpu/test/test_gpu_base.cpp
new file mode 100644
index 0000000000..f035d7a3ca
--- /dev/null
+++ b/modules/gpu/test/test_gpu_base.cpp
@@ -0,0 +1,154 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                        Intel License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of Intel Corporation may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+
+bool supportFeature(const cv::gpu::DeviceInfo& info, cv::gpu::FeatureSet feature)
+{
+    return cv::gpu::TargetArchs::builtWith(feature) && info.supports(feature);
+}
+
+const std::vector<cv::gpu::DeviceInfo>& devices()
+{
+    static std::vector<cv::gpu::DeviceInfo> devs;
+    static bool first = true;
+
+    if (first)
+    {
+        int deviceCount = cv::gpu::getCudaEnabledDeviceCount();
+
+        devs.reserve(deviceCount);
+
+        for (int i = 0; i < deviceCount; ++i)
+        {
+            cv::gpu::DeviceInfo info(i);
+            if (info.isCompatible())
+                devs.push_back(info);
+        }
+
+        first = false;
+    }
+
+    return devs;
+}
+
+std::vector<cv::gpu::DeviceInfo> devices(cv::gpu::FeatureSet feature)
+{
+    const std::vector<cv::gpu::DeviceInfo>& d = devices();
+    
+    std::vector<cv::gpu::DeviceInfo> devs_filtered;
+
+    if (cv::gpu::TargetArchs::builtWith(feature))
+    {
+        devs_filtered.reserve(d.size());
+
+        for (size_t i = 0, size = d.size(); i < size; ++i)
+        {
+            const cv::gpu::DeviceInfo& info = d[i];
+
+            if (info.supports(feature))
+                devs_filtered.push_back(info);
+        }
+    }
+
+    return devs_filtered;
+}
+
+std::vector<int> types(int depth_start, int depth_end, int cn_start, int cn_end)
+{
+    std::vector<int> v;
+
+    v.reserve((depth_end - depth_start + 1) * (cn_end - cn_start + 1));
+
+    for (int depth = depth_start; depth <= depth_end; ++depth)
+    {
+        for (int cn = cn_start; cn <= cn_end; ++cn)
+        {
+            v.push_back(CV_MAKETYPE(depth, cn));
+        }
+    }
+
+    return v;
+}
+
+const std::vector<int>& all_types()
+{
+    static std::vector<int> v = types(CV_8U, CV_64F, 1, 4);
+    return v;
+}
+
+cv::Mat readImage(const std::string& fileName, int flags)
+{
+    return cv::imread(std::string(cvtest::TS::ptr()->get_data_path()) + fileName, flags);
+}
+
+double checkNorm(const cv::Mat& m1, const cv::Mat& m2)
+{
+    return cv::norm(m1, m2, cv::NORM_INF);
+}
+
+double checkSimilarity(const cv::Mat& m1, const cv::Mat& m2)
+{
+    cv::Mat diff;
+    cv::matchTemplate(m1, m2, diff, CV_TM_CCORR_NORMED);
+    return std::abs(diff.at<float>(0, 0) - 1.f);
+}
+
+namespace cv
+{
+    std::ostream& operator << (std::ostream& os, const Size& sz)
+    {
+        return os << sz.width << "x" << sz.height;
+    }
+
+    std::ostream& operator << (std::ostream& os, const Scalar& s)
+    {
+        return os << "[" << s[0] << ", " << s[1] << ", " << s[2] << ", " << s[3] << "]";
+    }
+
+    namespace gpu
+    {
+        std::ostream& operator << (std::ostream& os, const DeviceInfo& info)
+        {
+            return os << info.name();
+        }
+    }
+}
diff --git a/modules/gpu/test/test_gpu_base.hpp b/modules/gpu/test/test_gpu_base.hpp
new file mode 100644
index 0000000000..25bf163a3d
--- /dev/null
+++ b/modules/gpu/test/test_gpu_base.hpp
@@ -0,0 +1,103 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                        Intel License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of Intel Corporation may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_TEST_GPU_BASE_HPP__
+#define __OPENCV_TEST_GPU_BASE_HPP__
+
+//! return true if device supports specified feature and gpu module was built with support the feature.
+bool supportFeature(const cv::gpu::DeviceInfo& info, cv::gpu::FeatureSet feature);
+
+//! return all devices compatible with current gpu module build.
+const std::vector<cv::gpu::DeviceInfo>& devices();
+//! return all devices compatible with current gpu module build which support specified feature.
+std::vector<cv::gpu::DeviceInfo> devices(cv::gpu::FeatureSet feature);
+
+//! return vector with types from specified range.
+std::vector<int> types(int depth_start, int depth_end, int cn_start, int cn_end);
+
+//! return vector with all types (depth: CV_8U-CV_64F, channels: 1-4).
+const std::vector<int>& all_types();
+
+//! read image from testdata folder.
+cv::Mat readImage(const std::string& fileName, int flags = CV_LOAD_IMAGE_COLOR);
+
+double checkNorm(const cv::Mat& m1, const cv::Mat& m2);
+double checkSimilarity(const cv::Mat& m1, const cv::Mat& m2);
+
+#define OSTR_NAME(suf) ostr_ ## suf
+
+#define PRINT_PARAM(name) \
+        std::ostringstream OSTR_NAME(name); \
+        OSTR_NAME(name) << # name << ": " << name; \
+        SCOPED_TRACE(OSTR_NAME(name).str());
+
+#define PRINT_TYPE(type) \
+        std::ostringstream OSTR_NAME(type); \
+        OSTR_NAME(type) << # type << ": " << cvtest::getTypeName(type) << "c" << CV_MAT_CN(type); \
+        SCOPED_TRACE(OSTR_NAME(type).str());
+
+#define EXPECT_MAT_NEAR(mat1, mat2, eps) \
+    { \
+        ASSERT_EQ(mat1.type(), mat2.type()); \
+        ASSERT_EQ(mat1.size(), mat2.size()); \
+        EXPECT_LE(checkNorm(mat1, mat2), eps); \
+    }
+
+#define EXPECT_MAT_SIMILAR(mat1, mat2, eps) \
+    { \
+        ASSERT_EQ(mat1.type(), mat2.type()); \
+        ASSERT_EQ(mat1.size(), mat2.size()); \
+        EXPECT_LE(checkSimilarity(mat1, mat2), eps); \
+    }
+
+
+//! for gtest ASSERT
+namespace cv
+{
+    std::ostream& operator << (std::ostream& os, const Size& sz);
+    std::ostream& operator << (std::ostream& os, const Scalar& s);
+    namespace gpu
+    {
+        std::ostream& operator << (std::ostream& os, const DeviceInfo& info);
+    }
+}
+
+#endif // __OPENCV_TEST_GPU_BASE_HPP__
diff --git a/modules/gpu/test/test_hog.cpp b/modules/gpu/test/test_hog.cpp
index c6089b0766..2fd061c972 100644
--- a/modules/gpu/test/test_hog.cpp
+++ b/modules/gpu/test/test_hog.cpp
@@ -40,32 +40,24 @@
 //M*/
 
 #include "test_precomp.hpp"
-#include <fstream>
 
-using namespace std;
+#ifdef HAVE_CUDA
 
 //#define DUMP
 
-#define CHECK(pred, err) if (!(pred)) { \
-    ts->printf(cvtest::TS::CONSOLE, "Fail: \"%s\" at line: %d\n", #pred, __LINE__); \
-    ts->set_failed_test_info(err); \
-    return; }
-
-struct CV_GpuHogDetectTestRunner: cv::gpu::HOGDescriptor
+struct CV_GpuHogDetectTestRunner : cv::gpu::HOGDescriptor
 {
-    CV_GpuHogDetectTestRunner(cvtest::TS* ts_): ts(ts_) {}
-
-    void run(int) 
+    void run() 
     {       
-        cv::Mat img_rgb = cv::imread(std::string(ts->get_data_path()) + "hog/road.png");
-        CHECK(!img_rgb.empty(), cvtest::TS::FAIL_MISSING_TEST_DATA);
+        cv::Mat img_rgb = readImage("hog/road.png");
+        ASSERT_TRUE(!img_rgb.empty());
 
 #ifdef DUMP
-        f.open((std::string(ts->get_data_path()) + "hog/expected_output.bin").c_str(), std::ios_base::binary);
-        CHECK(f.is_open(), cvtest::TS::FAIL_GENERIC);
+        f.open((std::string(cvtest::TS::ptr()->get_data_path()) + "hog/expected_output.bin").c_str(), std::ios_base::binary);
+        ASSERT_TRUE(f.is_open());
 #else
-        f.open((std::string(ts->get_data_path()) + "hog/expected_output.bin").c_str(), std::ios_base::binary);
-        CHECK(f.is_open(), cvtest::TS::FAIL_MISSING_TEST_DATA);
+        f.open((std::string(cvtest::TS::ptr()->get_data_path()) + "hog/expected_output.bin").c_str(), std::ios_base::binary);
+        ASSERT_TRUE(f.is_open());
 #endif
 
         // Test on color image
@@ -78,7 +70,6 @@ struct CV_GpuHogDetectTestRunner: cv::gpu::HOGDescriptor
         test(img);
 
         f.close();
-
     }
 
 #ifdef DUMP
@@ -107,24 +98,24 @@ struct CV_GpuHogDetectTestRunner: cv::gpu::HOGDescriptor
 
         f.read((char*)&rows, sizeof(rows));
         f.read((char*)&cols, sizeof(cols));
-        CHECK(rows == block_hists.rows, cvtest::TS::FAIL_INVALID_OUTPUT);
-        CHECK(cols == block_hists.cols, cvtest::TS::FAIL_INVALID_OUTPUT);
+        ASSERT_EQ(rows, block_hists.rows);
+        ASSERT_EQ(cols, block_hists.cols);
         for (int i = 0; i < block_hists.rows; ++i)
         {
             for (int j = 0; j < block_hists.cols; ++j)
             {
                 float val;
                 f.read((char*)&val, sizeof(val));
-                CHECK(fabs(val - block_hists.at<float>(i, j)) < 1e-3f, cvtest::TS::FAIL_INVALID_OUTPUT);
+                ASSERT_NEAR(val, block_hists.at<float>(i, j), 1e-3);
             }
         }
         f.read((char*)&nlocations, sizeof(nlocations));
-        CHECK(nlocations == static_cast<int>(locations.size()), cvtest::TS::FAIL_INVALID_OUTPUT);
+        ASSERT_EQ(nlocations, static_cast<int>(locations.size()));
         for (int i = 0; i < nlocations; ++i)
         {
             cv::Point location;
             f.read((char*)&location, sizeof(location));
-            CHECK(location == locations[i], cvtest::TS::FAIL_INVALID_OUTPUT);
+            ASSERT_EQ(location, locations[i]);
         }
     }
 #endif
@@ -176,39 +167,47 @@ struct CV_GpuHogDetectTestRunner: cv::gpu::HOGDescriptor
 #else
     std::ifstream f;
 #endif
-
-    cvtest::TS* ts;
 };
 
-
-struct CV_GpuHogDetectTest: cvtest::BaseTest 
+struct HogDetect : testing::TestWithParam<cv::gpu::DeviceInfo>
 {
-    CV_GpuHogDetectTest() {}
-
-    void run(int i)
+    cv::gpu::DeviceInfo devInfo;
+    
+    virtual void SetUp()
     {
-        CV_GpuHogDetectTestRunner runner(ts);
-        runner.run(i);
+        devInfo = GetParam();
+
+        cv::gpu::setDevice(devInfo.deviceID());
     }
 };
 
-TEST(HOG, detect_accuracy) { CV_GpuHogDetectTest test; test.safe_run(); }
+TEST_P(HogDetect, Accuracy)
+{
+    PRINT_PARAM(devInfo);
+
+    ASSERT_NO_THROW(
+        CV_GpuHogDetectTestRunner runner;
+        runner.run();
+    );
+}
+
+INSTANTIATE_TEST_CASE_P(HOG, HogDetect, testing::ValuesIn(devices()));
 
-struct CV_GpuHogGetDescriptorsTestRunner: cv::gpu::HOGDescriptor
+struct CV_GpuHogGetDescriptorsTestRunner : cv::gpu::HOGDescriptor
 {
-    CV_GpuHogGetDescriptorsTestRunner(cvtest::TS* ts_): HOGDescriptor(cv::Size(64, 128)), ts(ts_) {}
+    CV_GpuHogGetDescriptorsTestRunner(): cv::gpu::HOGDescriptor(cv::Size(64, 128)) {}
 
-    void run(int)
+    void run()
     {
         // Load image (e.g. train data, composed from windows)
-        cv::Mat img_rgb = cv::imread(std::string(ts->get_data_path()) + "hog/train_data.png");
-        CHECK(!img_rgb.empty(), cvtest::TS::FAIL_MISSING_TEST_DATA);
+        cv::Mat img_rgb = readImage("hog/train_data.png");
+        ASSERT_TRUE(!img_rgb.empty());
 
         // Convert to C4
         cv::Mat img;
         cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
-        cv::gpu::GpuMat d_img(img);
 
+        cv::gpu::GpuMat d_img(img);
 
         // Convert train images into feature vectors (train table)
         cv::gpu::GpuMat descriptors, descriptors_by_cols;
@@ -223,7 +222,7 @@ struct CV_GpuHogGetDescriptorsTestRunner: cv::gpu::HOGDescriptor
         block_hist_size = 36;
         cv::Size descr_size_expected = cv::Size(blocks_per_win_x * blocks_per_win_y * block_hist_size,
                                                 wins_per_img_x * wins_per_img_y);
-        CHECK(descriptors.size() == descr_size_expected, cvtest::TS::FAIL_INVALID_OUTPUT);
+        ASSERT_EQ(descr_size_expected, descriptors.size());
 
         // Check both formats of output descriptors are handled correctly
         cv::Mat dr(descriptors);
@@ -235,8 +234,8 @@ struct CV_GpuHogGetDescriptorsTestRunner: cv::gpu::HOGDescriptor
             for (int y = 0; y < blocks_per_win_y; ++y)
                 for (int x = 0; x < blocks_per_win_x; ++x)
                     for (int k = 0; k < block_hist_size; ++k)
-                        CHECK(l[(y * blocks_per_win_x + x) * block_hist_size + k] ==
-                              r[(x * blocks_per_win_y + y) * block_hist_size + k], cvtest::TS::FAIL_INVALID_OUTPUT);
+                        ASSERT_EQ(l[(y * blocks_per_win_x + x) * block_hist_size + k],
+                                  r[(x * blocks_per_win_y + y) * block_hist_size + k]);
         }
 
         /* Now we want to extract the same feature vectors, but from single images. NOTE: results will
@@ -244,39 +243,39 @@ struct CV_GpuHogGetDescriptorsTestRunner: cv::gpu::HOGDescriptor
         wont't call getDescriptors and will use computeBlockHistograms instead of. computeBlockHistograms
         works good, it can be checked in the gpu_hog sample */
 
-        img_rgb = cv::imread(std::string(ts->get_data_path()) + "hog/positive1.png");
-        CHECK(!img_rgb.empty(), cvtest::TS::FAIL_MISSING_TEST_DATA);
+        img_rgb = readImage("hog/positive1.png");
+        ASSERT_TRUE(!img_rgb.empty());
         cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
         computeBlockHistograms(cv::gpu::GpuMat(img));
         // Everything is fine with interpolation for left top subimage
-        CHECK(cv::norm((cv::Mat)block_hists, (cv::Mat)descriptors.rowRange(0, 1)) == 0.f, cvtest::TS::FAIL_INVALID_OUTPUT);
+        ASSERT_EQ(0.0, cv::norm((cv::Mat)block_hists, (cv::Mat)descriptors.rowRange(0, 1)));
 
-        img_rgb = cv::imread(std::string(ts->get_data_path()) + "hog/positive2.png");
-        CHECK(!img_rgb.empty(), cvtest::TS::FAIL_MISSING_TEST_DATA);
+        img_rgb = readImage("hog/positive2.png");
+        ASSERT_TRUE(!img_rgb.empty());
         cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
         computeBlockHistograms(cv::gpu::GpuMat(img));
         compare_inner_parts(block_hists, descriptors.rowRange(1, 2));
 
-        img_rgb = cv::imread(std::string(ts->get_data_path()) + "hog/negative1.png");
-        CHECK(!img_rgb.empty(), cvtest::TS::FAIL_MISSING_TEST_DATA);
+        img_rgb = readImage("hog/negative1.png");
+        ASSERT_TRUE(!img_rgb.empty());
         cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
         computeBlockHistograms(cv::gpu::GpuMat(img));
         compare_inner_parts(block_hists, descriptors.rowRange(2, 3));
 
-        img_rgb = cv::imread(std::string(ts->get_data_path()) + "hog/negative2.png");
-        CHECK(!img_rgb.empty(), cvtest::TS::FAIL_MISSING_TEST_DATA);
+        img_rgb = readImage("hog/negative2.png");
+        ASSERT_TRUE(!img_rgb.empty());
         cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
         computeBlockHistograms(cv::gpu::GpuMat(img));
         compare_inner_parts(block_hists, descriptors.rowRange(3, 4));
 
-        img_rgb = cv::imread(std::string(ts->get_data_path()) + "hog/positive3.png");
-        CHECK(!img_rgb.empty(), cvtest::TS::FAIL_MISSING_TEST_DATA);
+        img_rgb = readImage("hog/positive3.png");
+        ASSERT_TRUE(!img_rgb.empty());
         cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
         computeBlockHistograms(cv::gpu::GpuMat(img));
         compare_inner_parts(block_hists, descriptors.rowRange(4, 5));
 
-        img_rgb = cv::imread(std::string(ts->get_data_path()) + "hog/negative3.png");
-        CHECK(!img_rgb.empty(), cvtest::TS::FAIL_MISSING_TEST_DATA);
+        img_rgb = readImage("hog/negative3.png");
+        ASSERT_TRUE(!img_rgb.empty());
         cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
         computeBlockHistograms(cv::gpu::GpuMat(img));
         compare_inner_parts(block_hists, descriptors.rowRange(5, 6));
@@ -291,7 +290,7 @@ struct CV_GpuHogGetDescriptorsTestRunner: cv::gpu::HOGDescriptor
                 {
                     float a = d1.at<float>(0, (i * blocks_per_win_x + j) * block_hist_size);
                     float b = d2.at<float>(0, (i * blocks_per_win_x + j) * block_hist_size);
-                    CHECK(a == b, cvtest::TS::FAIL_INVALID_OUTPUT)
+                    ASSERT_FLOAT_EQ(a, b);
                 }
     }
 
@@ -300,20 +299,30 @@ struct CV_GpuHogGetDescriptorsTestRunner: cv::gpu::HOGDescriptor
     int blocks_per_win_x;
     int blocks_per_win_y;
     int block_hist_size;
-
-    cvtest::TS* ts;
 };
 
-
-struct CV_GpuHogGetDescriptorsTest: cvtest::BaseTest 
+struct HogGetDescriptors : testing::TestWithParam<cv::gpu::DeviceInfo>
 {
-    CV_GpuHogGetDescriptorsTest() {}
-
-    void run(int i)
+    cv::gpu::DeviceInfo devInfo;
+    
+    virtual void SetUp()
     {
-        CV_GpuHogGetDescriptorsTestRunner runner(ts);
-        runner.run(i);
+        devInfo = GetParam();
+
+        cv::gpu::setDevice(devInfo.deviceID());
     }
 };
 
-TEST(HOG, descriptors_accuracy) { CV_GpuHogGetDescriptorsTest test; test.safe_run(); }
+TEST_P(HogGetDescriptors, Accuracy)
+{
+    PRINT_PARAM(devInfo);
+
+    ASSERT_NO_THROW(
+        CV_GpuHogGetDescriptorsTestRunner runner;
+        runner.run();
+    );
+}
+
+INSTANTIATE_TEST_CASE_P(HOG, HogGetDescriptors, testing::ValuesIn(devices()));
+
+#endif // HAVE_CUDA
diff --git a/modules/gpu/test/test_imgproc.cpp b/modules/gpu/test/test_imgproc.cpp
new file mode 100644
index 0000000000..219b73e4ab
--- /dev/null
+++ b/modules/gpu/test/test_imgproc.cpp
@@ -0,0 +1,2137 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                        Intel License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of Intel Corporation may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+
+#ifdef HAVE_CUDA
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////
+// threshold
+
+struct Threshold : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int, int> >
+{
+    cv::gpu::DeviceInfo devInfo;
+    int type;
+    int threshOp;
+
+    cv::Size size;
+    cv::Mat src;
+    double maxVal;
+    double thresh;
+
+    cv::Mat dst_gold;
+    
+    virtual void SetUp()
+    {
+        devInfo = std::tr1::get<0>(GetParam());
+        type = std::tr1::get<1>(GetParam());
+        threshOp = std::tr1::get<2>(GetParam());
+
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+        size = cv::Size(rng.uniform(20, 150), rng.uniform(20, 150));
+
+        src = cvtest::randomMat(rng, size, type, 0.0, 127.0, false);
+
+        maxVal = rng.uniform(20.0, 127.0);
+        thresh = rng.uniform(0.0, maxVal);
+
+        cv::threshold(src, dst_gold, thresh, maxVal, threshOp);
+    }
+};
+
+TEST_P(Threshold, Accuracy)
+{
+    static const char* ops[] = {"THRESH_BINARY", "THRESH_BINARY_INV", "THRESH_TRUNC", "THRESH_TOZERO", "THRESH_TOZERO_INV"};
+    const char* threshOpStr = ops[threshOp];
+
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+    PRINT_PARAM(size);
+    PRINT_PARAM(threshOpStr);
+    PRINT_PARAM(maxVal);
+    PRINT_PARAM(thresh);
+
+    cv::Mat dst;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat gpuRes;
+
+        cv::gpu::threshold(cv::gpu::GpuMat(src), gpuRes, thresh, maxVal, threshOp);
+
+        gpuRes.download(dst);
+    );
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, Threshold, testing::Combine(
+                        testing::ValuesIn(devices()), 
+                        testing::Values(CV_8U, CV_32F), 
+                        testing::Values((int)cv::THRESH_BINARY, (int)cv::THRESH_BINARY_INV, (int)cv::THRESH_TRUNC, (int)cv::THRESH_TOZERO, (int)cv::THRESH_TOZERO_INV)));
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////
+// resize
+
+struct Resize : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int, int> >
+{
+    cv::gpu::DeviceInfo devInfo;
+    int type;
+    int interpolation;
+
+    cv::Size size;
+    cv::Mat src;
+
+    cv::Mat dst_gold1;
+    cv::Mat dst_gold2;
+    
+    virtual void SetUp()
+    {
+        devInfo = std::tr1::get<0>(GetParam());
+        type = std::tr1::get<1>(GetParam());
+        interpolation = std::tr1::get<2>(GetParam());
+
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+        size = cv::Size(rng.uniform(20, 150), rng.uniform(20, 150));
+
+        src = cvtest::randomMat(rng, size, type, 0.0, 127.0, false);
+
+        cv::resize(src, dst_gold1, cv::Size(), 2.0, 2.0, interpolation);
+        cv::resize(src, dst_gold2, cv::Size(), 0.5, 0.5, interpolation);
+    }
+};
+
+TEST_P(Resize, Accuracy)
+{
+    static const char* interpolations[] = {"INTER_NEAREST", "INTER_LINEAR"};
+    const char* interpolationStr = interpolations[interpolation];
+
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+    PRINT_PARAM(size);
+    PRINT_PARAM(interpolationStr);
+
+    cv::Mat dst1;
+    cv::Mat dst2;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat dev_src(src);
+        cv::gpu::GpuMat gpuRes1;
+        cv::gpu::GpuMat gpuRes2;
+
+        cv::gpu::resize(dev_src, gpuRes1, cv::Size(), 2.0, 2.0, interpolation);
+        cv::gpu::resize(dev_src, gpuRes2, cv::Size(), 0.5, 0.5, interpolation);
+
+        gpuRes1.download(dst1);
+        gpuRes2.download(dst2);
+    );
+
+    EXPECT_MAT_SIMILAR(dst_gold1, dst1, 0.5);
+    EXPECT_MAT_SIMILAR(dst_gold2, dst2, 0.5);
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, Resize, testing::Combine(
+                        testing::ValuesIn(devices()), 
+                        testing::Values(CV_8UC1, CV_8UC4), 
+                        testing::Values((int)cv::INTER_NEAREST, (int)cv::INTER_LINEAR)));
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////
+// copyMakeBorder
+
+struct CopyMakeBorder : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
+{
+    cv::gpu::DeviceInfo devInfo;
+    int type;
+
+    cv::Size size;
+    cv::Mat src;
+    int top;
+    int botton;
+    int left;
+    int right;
+    cv::Scalar val;
+
+    cv::Mat dst_gold;
+    
+    virtual void SetUp()
+    {
+        devInfo = std::tr1::get<0>(GetParam());
+        type = std::tr1::get<1>(GetParam());
+
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+        size = cv::Size(rng.uniform(20, 150), rng.uniform(20, 150));
+
+        src = cvtest::randomMat(rng, size, type, 0.0, 127.0, false);
+        
+        top = rng.uniform(1, 10);
+        botton = rng.uniform(1, 10);
+        left = rng.uniform(1, 10);
+        right = rng.uniform(1, 10);
+        val = cv::Scalar(rng.uniform(0, 255), rng.uniform(0, 255), rng.uniform(0, 255), rng.uniform(0, 255));
+
+        cv::copyMakeBorder(src, dst_gold, top, botton, left, right, cv::BORDER_CONSTANT, val);
+    }
+};
+
+TEST_P(CopyMakeBorder, Accuracy)
+{
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+    PRINT_PARAM(size);
+    PRINT_PARAM(top);
+    PRINT_PARAM(botton);
+    PRINT_PARAM(left);
+    PRINT_PARAM(right);
+    PRINT_PARAM(val);
+
+    cv::Mat dst;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat gpuRes;
+
+        cv::gpu::copyMakeBorder(cv::gpu::GpuMat(src), gpuRes, top, botton, left, right, val);
+
+        gpuRes.download(dst);
+    );
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, CopyMakeBorder, testing::Combine(
+                        testing::ValuesIn(devices()), 
+                        testing::Values(CV_8UC1, CV_8UC4, CV_32SC1)));
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////
+// warpAffine & warpPerspective
+
+static const int warpFlags[] = {cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC, cv::INTER_NEAREST | cv::WARP_INVERSE_MAP, cv::INTER_LINEAR | cv::WARP_INVERSE_MAP, cv::INTER_CUBIC | cv::WARP_INVERSE_MAP};
+static const char* warpFlags_str[] = {"INTER_NEAREST", "INTER_LINEAR", "INTER_CUBIC", "INTER_NEAREST | WARP_INVERSE_MAP", "INTER_LINEAR | WARP_INVERSE_MAP", "INTER_CUBIC | WARP_INVERSE_MAP"};
+
+struct WarpAffine : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int, int> >
+{
+    cv::gpu::DeviceInfo devInfo;
+    int type;
+    int flagIdx;
+
+    cv::Size size;
+    cv::Mat src;
+    cv::Mat M;
+
+    cv::Mat dst_gold;
+    
+    virtual void SetUp()
+    {
+        devInfo = std::tr1::get<0>(GetParam());
+        type = std::tr1::get<1>(GetParam());
+        flagIdx = std::tr1::get<2>(GetParam());
+
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+        size = cv::Size(rng.uniform(20, 150), rng.uniform(20, 150));
+
+        src = cvtest::randomMat(rng, size, type, 0.0, 127.0, false);
+
+        static double reflect[2][3] = { {-1,  0, 0},
+                                        { 0, -1, 0}};
+        reflect[0][2] = size.width;
+        reflect[1][2] = size.height;
+        M = cv::Mat(2, 3, CV_64F, (void*)reflect); 
+
+        cv::warpAffine(src, dst_gold, M, src.size(), warpFlags[flagIdx]);       
+    }
+};
+
+TEST_P(WarpAffine, Accuracy)
+{
+    const char* warpFlagStr = warpFlags_str[flagIdx];
+
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+    PRINT_PARAM(size);
+    PRINT_PARAM(warpFlagStr);
+
+    cv::Mat dst;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat gpuRes;
+
+        cv::gpu::warpAffine(cv::gpu::GpuMat(src), gpuRes, M, src.size(), warpFlags[flagIdx]);
+
+        gpuRes.download(dst);
+    );
+
+    // Check inner parts (ignoring 1 pixel width border)
+    cv::Mat dst_gold_roi = dst_gold.rowRange(1, dst_gold.rows - 1).colRange(1, dst_gold.cols - 1);
+    cv::Mat dst_roi = dst.rowRange(1, dst.rows - 1).colRange(1, dst.cols - 1);
+
+    EXPECT_MAT_NEAR(dst_gold_roi, dst_roi, 1e-3);
+}
+
+struct WarpPerspective : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int, int> >
+{
+    cv::gpu::DeviceInfo devInfo;
+    int type;
+    int flagIdx;
+
+    cv::Size size;
+    cv::Mat src;
+    cv::Mat M;
+
+    cv::Mat dst_gold;
+    
+    virtual void SetUp()
+    {
+        devInfo = std::tr1::get<0>(GetParam());
+        type = std::tr1::get<1>(GetParam());
+        flagIdx = std::tr1::get<2>(GetParam());
+
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+        size = cv::Size(rng.uniform(20, 150), rng.uniform(20, 150));
+
+        src = cvtest::randomMat(rng, size, type, 0.0, 127.0, false);
+
+        static double reflect[3][3] = { { -1, 0, 0},
+                                        { 0, -1, 0},
+                                        { 0,  0, 1}};
+        reflect[0][2] = size.width;
+        reflect[1][2] = size.height;
+        M = cv::Mat(3, 3, CV_64F, (void*)reflect);
+
+        cv::warpPerspective(src, dst_gold, M, src.size(), warpFlags[flagIdx]);       
+    }
+};
+
+TEST_P(WarpPerspective, Accuracy)
+{
+    const char* warpFlagStr = warpFlags_str[flagIdx];
+
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+    PRINT_PARAM(size);
+    PRINT_PARAM(warpFlagStr);
+
+    cv::Mat dst;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat gpuRes;
+
+        cv::gpu::warpPerspective(cv::gpu::GpuMat(src), gpuRes, M, src.size(), warpFlags[flagIdx]);
+
+        gpuRes.download(dst);
+    );
+
+    // Check inner parts (ignoring 1 pixel width border)
+    cv::Mat dst_gold_roi = dst_gold.rowRange(1, dst_gold.rows - 1).colRange(1, dst_gold.cols - 1);
+    cv::Mat dst_roi = dst.rowRange(1, dst.rows - 1).colRange(1, dst.cols - 1);
+
+    EXPECT_MAT_NEAR(dst_gold_roi, dst_roi, 1e-3);
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, WarpAffine, testing::Combine(
+                        testing::ValuesIn(devices()), 
+                        testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
+                        testing::Range(0, 6)));
+
+INSTANTIATE_TEST_CASE_P(ImgProc, WarpPerspective, testing::Combine(
+                        testing::ValuesIn(devices()), 
+                        testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
+                        testing::Range(0, 6)));
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////
+// integral
+
+struct Integral : testing::TestWithParam<cv::gpu::DeviceInfo>
+{
+    cv::gpu::DeviceInfo devInfo;
+
+    cv::Size size;
+    cv::Mat src;
+
+    cv::Mat dst_gold;
+    
+    virtual void SetUp()
+    {
+        devInfo = GetParam();
+
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+        size = cv::Size(rng.uniform(20, 150), rng.uniform(20, 150));
+
+        src = cvtest::randomMat(rng, size, CV_8UC1, 0.0, 255.0, false); 
+
+        cv::integral(src, dst_gold, CV_32S);     
+    }
+};
+
+TEST_P(Integral, Accuracy)
+{
+    PRINT_PARAM(devInfo);
+    PRINT_PARAM(size);
+
+    cv::Mat dst;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat gpuRes;
+
+        cv::gpu::integral(cv::gpu::GpuMat(src), gpuRes);
+
+        gpuRes.download(dst);
+    );
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, Integral, testing::ValuesIn(devices()));
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////
+// cvtColor
+
+struct CvtColor : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
+{
+    static cv::Mat imgBase;
+
+    static void SetUpTestCase() 
+    {
+        imgBase = readImage("stereobm/aloe-L.png"); 
+    }
+
+    static void TearDownTestCase() 
+    {
+        imgBase.release();
+    } 
+
+    cv::gpu::DeviceInfo devInfo;
+    int type;
+
+    cv::Mat img;
+    
+    virtual void SetUp()
+    {
+        devInfo = std::tr1::get<0>(GetParam());
+        type = std::tr1::get<1>(GetParam());
+
+        cv::gpu::setDevice(devInfo.deviceID());   
+
+        imgBase.convertTo(img, type, type == CV_32F ? 1.0 / 255.0 : 1.0);
+    }
+};
+
+cv::Mat CvtColor::imgBase;
+
+TEST_P(CvtColor, BGR2RGB)
+{
+    ASSERT_TRUE(!img.empty());
+
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+
+    cv::Mat src = img;
+    cv::Mat dst_gold;
+    cv::cvtColor(src, dst_gold, CV_BGR2RGB);
+
+    cv::Mat dst;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat gpuRes;
+
+        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_BGR2RGB);
+
+        gpuRes.download(dst);
+    );
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+}
+
+TEST_P(CvtColor, BGR2RGBA)
+{
+    ASSERT_TRUE(!img.empty());
+
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+
+    cv::Mat src = img;
+    cv::Mat dst_gold;
+    cv::cvtColor(src, dst_gold, CV_BGR2RGBA);
+
+    cv::Mat dst;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat gpuRes;
+
+        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_BGR2RGBA);
+
+        gpuRes.download(dst);
+    );
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+}
+
+TEST_P(CvtColor, BGRA2RGB)
+{
+    ASSERT_TRUE(!img.empty());
+
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+
+    cv::Mat src;
+    cv::cvtColor(img, src, CV_BGR2BGRA);
+    cv::Mat dst_gold;
+    cv::cvtColor(src, dst_gold, CV_BGRA2RGB);
+
+    cv::Mat dst;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat gpuRes;
+
+        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_BGRA2RGB);
+
+        gpuRes.download(dst);
+    );
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+}
+
+TEST_P(CvtColor, BGR2YCrCb)
+{
+    ASSERT_TRUE(!img.empty());
+
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+
+    cv::Mat src = img;
+    cv::Mat dst_gold;
+    cv::cvtColor(src, dst_gold, CV_BGR2YCrCb);
+
+    cv::Mat dst;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat gpuRes;
+
+        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_BGR2YCrCb);
+
+        gpuRes.download(dst);
+    );
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 1e-5);
+}
+
+TEST_P(CvtColor, YCrCb2RGB)
+{
+    ASSERT_TRUE(!img.empty());
+
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+
+    cv::Mat src;
+    cv::cvtColor(img, src, CV_BGR2YCrCb);
+    cv::Mat dst_gold;
+    cv::cvtColor(src, dst_gold, CV_YCrCb2RGB);
+
+    cv::Mat dst;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat gpuRes;
+
+        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_YCrCb2RGB);
+
+        gpuRes.download(dst);
+    );
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 1e-5);
+}
+
+TEST_P(CvtColor, BGR2YUV)
+{
+    ASSERT_TRUE(!img.empty());
+
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+
+    cv::Mat src = img;
+    cv::Mat dst_gold;
+    cv::cvtColor(src, dst_gold, CV_BGR2YUV);
+
+    cv::Mat dst;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat gpuRes;
+
+        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_BGR2YUV);
+
+        gpuRes.download(dst);
+    );
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 1e-5);
+}
+
+TEST_P(CvtColor, YUV2BGR)
+{
+    ASSERT_TRUE(!img.empty());
+
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+
+    cv::Mat src;
+    cv::cvtColor(img, src, CV_BGR2YUV);
+    cv::Mat dst_gold;
+    cv::cvtColor(src, dst_gold, CV_YUV2BGR);
+
+    cv::Mat dst;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat gpuRes;
+
+        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_YUV2BGR);
+
+        gpuRes.download(dst);
+    );
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 1e-5);
+}
+
+TEST_P(CvtColor, BGR2XYZ)
+{
+    ASSERT_TRUE(!img.empty());
+
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+
+    cv::Mat src = img;
+    cv::Mat dst_gold;
+    cv::cvtColor(src, dst_gold, CV_BGR2XYZ);
+
+    cv::Mat dst;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat gpuRes;
+
+        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_BGR2XYZ);
+
+        gpuRes.download(dst);
+    );
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 1e-5);
+}
+
+TEST_P(CvtColor, XYZ2BGR)
+{
+    ASSERT_TRUE(!img.empty());
+
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+
+    cv::Mat src;
+    cv::cvtColor(img, src, CV_BGR2XYZ);
+    cv::Mat dst_gold;
+    cv::cvtColor(src, dst_gold, CV_XYZ2BGR);
+
+    cv::Mat dst;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat gpuRes;
+
+        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_XYZ2BGR);
+
+        gpuRes.download(dst);
+    );
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 1e-5);
+}
+
+TEST_P(CvtColor, BGR2HSV)
+{
+    if (type == CV_16U)
+        return;
+
+    ASSERT_TRUE(!img.empty());
+
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+
+    cv::Mat src = img;
+    cv::Mat dst_gold;
+    cv::cvtColor(src, dst_gold, CV_BGR2HSV);
+
+    cv::Mat dst;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat gpuRes;
+
+        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_BGR2HSV);
+
+        gpuRes.download(dst);
+    );
+
+    EXPECT_MAT_NEAR(dst_gold, dst, type == CV_32F ? 1e-2 : 1);
+}
+
+TEST_P(CvtColor, HSV2BGR)
+{
+    if (type == CV_16U)
+        return;
+
+    ASSERT_TRUE(!img.empty());
+
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+
+    cv::Mat src;
+    cv::cvtColor(img, src, CV_BGR2HSV);
+    cv::Mat dst_gold;
+    cv::cvtColor(src, dst_gold, CV_HSV2BGR);
+
+    cv::Mat dst;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat gpuRes;
+
+        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_HSV2BGR);
+
+        gpuRes.download(dst);
+    );
+
+    EXPECT_MAT_NEAR(dst_gold, dst, type == CV_32F ? 1e-2 : 1);
+}
+
+TEST_P(CvtColor, BGR2HSV_FULL)
+{
+    if (type == CV_16U)
+        return;
+
+    ASSERT_TRUE(!img.empty());
+
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+
+    cv::Mat src = img;
+    cv::Mat dst_gold;
+    cv::cvtColor(src, dst_gold, CV_BGR2HSV_FULL);
+
+    cv::Mat dst;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat gpuRes;
+
+        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_BGR2HSV_FULL);
+
+        gpuRes.download(dst);
+    );
+
+    EXPECT_MAT_NEAR(dst_gold, dst, type == CV_32F ? 1e-2 : 1);
+}
+
+TEST_P(CvtColor, HSV2BGR_FULL)
+{
+    if (type == CV_16U)
+        return;
+
+    ASSERT_TRUE(!img.empty());
+
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+
+    cv::Mat src;
+    cv::cvtColor(img, src, CV_BGR2HSV_FULL);
+    cv::Mat dst_gold;
+    cv::cvtColor(src, dst_gold, CV_HSV2BGR_FULL);
+
+    cv::Mat dst;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat gpuRes;
+
+        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_HSV2BGR_FULL);
+
+        gpuRes.download(dst);
+    );
+
+    EXPECT_MAT_NEAR(dst_gold, dst, type == CV_32F ? 1e-2 : 1);
+}
+
+TEST_P(CvtColor, BGR2HLS)
+{
+    if (type == CV_16U)
+        return;
+
+    ASSERT_TRUE(!img.empty());
+
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+
+    cv::Mat src = img;
+    cv::Mat dst_gold;
+    cv::cvtColor(src, dst_gold, CV_BGR2HLS);
+
+    cv::Mat dst;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat gpuRes;
+
+        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_BGR2HLS);
+
+        gpuRes.download(dst);
+    );
+
+    EXPECT_MAT_NEAR(dst_gold, dst, type == CV_32F ? 1e-2 : 1);
+}
+
+TEST_P(CvtColor, HLS2BGR)
+{
+    if (type == CV_16U)
+        return;
+
+    ASSERT_TRUE(!img.empty());
+
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+
+    cv::Mat src;
+    cv::cvtColor(img, src, CV_BGR2HLS);
+    cv::Mat dst_gold;
+    cv::cvtColor(src, dst_gold, CV_HLS2BGR);
+
+    cv::Mat dst;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat gpuRes;
+
+        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_HLS2BGR);
+
+        gpuRes.download(dst);
+    );
+
+    EXPECT_MAT_NEAR(dst_gold, dst, type == CV_32F ? 1e-2 : 1);
+}
+
+TEST_P(CvtColor, BGR2HLS_FULL)
+{
+    if (type == CV_16U)
+        return;
+
+    ASSERT_TRUE(!img.empty());
+
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+
+    cv::Mat src = img;
+    cv::Mat dst_gold;
+    cv::cvtColor(src, dst_gold, CV_BGR2HLS_FULL);
+
+    cv::Mat dst;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat gpuRes;
+
+        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_BGR2HLS_FULL);
+
+        gpuRes.download(dst);
+    );
+
+    EXPECT_MAT_NEAR(dst_gold, dst, type == CV_32F ? 1e-2 : 1);
+}
+
+TEST_P(CvtColor, HLS2BGR_FULL)
+{
+    if (type == CV_16U)
+        return;
+
+    ASSERT_TRUE(!img.empty());
+
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+
+    cv::Mat src;
+    cv::cvtColor(img, src, CV_BGR2HLS_FULL);
+    cv::Mat dst_gold;
+    cv::cvtColor(src, dst_gold, CV_HLS2BGR_FULL);
+
+    cv::Mat dst;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat gpuRes;
+
+        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_HLS2BGR_FULL);
+
+        gpuRes.download(dst);
+    );
+
+    EXPECT_MAT_NEAR(dst_gold, dst, type == CV_32F ? 1e-2 : 1);
+}
+
+TEST_P(CvtColor, BGR2GRAY)
+{
+    ASSERT_TRUE(!img.empty());
+
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+
+    cv::Mat src = img;
+    cv::Mat dst_gold;
+    cv::cvtColor(src, dst_gold, CV_BGR2GRAY);
+
+    cv::Mat dst;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat gpuRes;
+
+        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_BGR2GRAY);
+
+        gpuRes.download(dst);
+    );
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 1e-5);
+}
+
+TEST_P(CvtColor, GRAY2RGB)
+{
+    ASSERT_TRUE(!img.empty());
+
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+
+    cv::Mat src;
+    cv::cvtColor(img, src, CV_BGR2GRAY);
+    cv::Mat dst_gold;
+    cv::cvtColor(src, dst_gold, CV_GRAY2RGB);
+
+    cv::Mat dst;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat gpuRes;
+
+        cv::gpu::cvtColor(cv::gpu::GpuMat(src), gpuRes, CV_GRAY2RGB);
+
+        gpuRes.download(dst);
+    );
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, CvtColor, testing::Combine(
+                        testing::ValuesIn(devices()), 
+                        testing::Values(CV_8U, CV_16U, CV_32F)));
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////
+// histograms
+
+struct Histograms : testing::TestWithParam<cv::gpu::DeviceInfo>
+{
+    static cv::Mat hsv;
+
+    static void SetUpTestCase() 
+    {
+        cv::Mat img = readImage("stereobm/aloe-L.png");
+        cv::cvtColor(img, hsv, CV_BGR2HSV);
+    }
+
+    static void TearDownTestCase() 
+    {
+        hsv.release();
+    }
+
+    cv::gpu::DeviceInfo devInfo;
+    
+    int hbins;
+    float hranges[2];
+
+    cv::Mat hist_gold;
+    
+    virtual void SetUp()
+    {
+        devInfo = GetParam();
+
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        hbins = 30;
+
+        hranges[0] = 0;
+        hranges[1] = 180;
+
+        int histSize[] = {hbins};
+        const float* ranges[] = {hranges};
+
+        cv::MatND histnd;
+
+        int channels[] = {0};
+        cv::calcHist(&hsv, 1, channels, cv::Mat(), histnd, 1, histSize, ranges);
+
+        hist_gold = histnd;
+        hist_gold = hist_gold.t();
+        hist_gold.convertTo(hist_gold, CV_32S);
+    }
+};
+
+cv::Mat Histograms::hsv;
+
+TEST_P(Histograms, Accuracy)
+{
+    ASSERT_TRUE(!hsv.empty());
+
+    PRINT_PARAM(devInfo);
+
+    cv::Mat hist;
+    
+    ASSERT_NO_THROW(
+        std::vector<cv::gpu::GpuMat> srcs;
+        cv::gpu::split(cv::gpu::GpuMat(hsv), srcs);
+
+        cv::gpu::GpuMat gpuHist;
+
+        cv::gpu::histEven(srcs[0], gpuHist, hbins, (int)hranges[0], (int)hranges[1]);
+
+        gpuHist.download(hist);
+    );
+
+    EXPECT_MAT_NEAR(hist_gold, hist, 0.0);
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, Histograms, testing::ValuesIn(devices()));
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////
+// cornerHarris
+
+static const int borderTypes[] = {cv::BORDER_REPLICATE, cv::BORDER_CONSTANT, cv::BORDER_REFLECT, cv::BORDER_WRAP, cv::BORDER_REFLECT101, cv::BORDER_TRANSPARENT};
+static const char* borderTypes_str[] = {"BORDER_REPLICATE", "BORDER_CONSTANT", "BORDER_REFLECT", "BORDER_WRAP", "BORDER_REFLECT101", "BORDER_TRANSPARENT"};
+
+struct CornerHarris : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int, int> >
+{
+    static cv::Mat img;
+
+    static void SetUpTestCase() 
+    {
+        img = readImage("stereobm/aloe-L.png", CV_LOAD_IMAGE_GRAYSCALE);
+    }
+
+    static void TearDownTestCase() 
+    {
+        img.release();
+    }
+
+    cv::gpu::DeviceInfo devInfo;
+    int type;
+    int borderTypeIdx;
+
+    cv::Mat src;
+    int blockSize;
+    int apertureSize;        
+    double k;
+
+    cv::Mat dst_gold;
+    
+    virtual void SetUp()
+    {
+        devInfo = std::tr1::get<0>(GetParam());
+        type = std::tr1::get<1>(GetParam());
+        borderTypeIdx = std::tr1::get<2>(GetParam());
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+        
+        img.convertTo(src, type, type == CV_32F ? 1.0 / 255.0 : 1.0);
+        
+        blockSize = 1 + rng.next() % 5;
+        apertureSize = 1 + 2 * (rng.next() % 4);        
+        k = rng.uniform(0.1, 0.9);
+
+        cv::cornerHarris(src, dst_gold, blockSize, apertureSize, k, borderTypes[borderTypeIdx]);
+    }
+};
+
+cv::Mat CornerHarris::img;
+
+TEST_P(CornerHarris, Accuracy)
+{
+    const char* borderTypeStr = borderTypes_str[borderTypeIdx];
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+    PRINT_PARAM(borderTypeStr);
+    PRINT_PARAM(blockSize);
+    PRINT_PARAM(apertureSize);
+    PRINT_PARAM(k);
+
+    cv::Mat dst;
+    
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat dev_dst;
+        cv::gpu::cornerHarris(cv::gpu::GpuMat(src), dev_dst, blockSize, apertureSize, k, borderTypes[borderTypeIdx]);
+        dev_dst.download(dst);
+    );
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 1e-3);
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, CornerHarris, testing::Combine(
+                        testing::ValuesIn(devices()), 
+                        testing::Values(CV_8UC1, CV_32FC1), 
+                        testing::Values(0, 4)));
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////
+// cornerMinEigen
+
+struct CornerMinEigen : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int, int> >
+{
+    static cv::Mat img;
+
+    static void SetUpTestCase() 
+    {
+        img = readImage("stereobm/aloe-L.png", CV_LOAD_IMAGE_GRAYSCALE);
+    }
+
+    static void TearDownTestCase() 
+    {
+        img.release();
+    }
+
+    cv::gpu::DeviceInfo devInfo;
+    int type;
+    int borderTypeIdx;
+
+    cv::Mat src;
+    int blockSize;
+    int apertureSize;
+
+    cv::Mat dst_gold;
+    
+    virtual void SetUp()
+    {
+        devInfo = std::tr1::get<0>(GetParam());
+        type = std::tr1::get<1>(GetParam());
+        borderTypeIdx = std::tr1::get<2>(GetParam());
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+        img.convertTo(src, type, type == CV_32F ? 1.0 / 255.0 : 1.0);
+        
+        blockSize = 1 + rng.next() % 5;
+        apertureSize = 1 + 2 * (rng.next() % 4);
+
+        cv::cornerMinEigenVal(src, dst_gold, blockSize, apertureSize, borderTypes[borderTypeIdx]);
+    }
+};
+
+cv::Mat CornerMinEigen::img;
+
+TEST_P(CornerMinEigen, Accuracy)
+{
+    const char* borderTypeStr = borderTypes_str[borderTypeIdx];
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+    PRINT_PARAM(borderTypeStr);
+    PRINT_PARAM(blockSize);
+    PRINT_PARAM(apertureSize);
+
+    cv::Mat dst;
+    
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat dev_dst;
+        cv::gpu::cornerMinEigenVal(cv::gpu::GpuMat(src), dev_dst, blockSize, apertureSize, borderTypes[borderTypeIdx]);
+        dev_dst.download(dst);
+    );
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 1e-2);
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, CornerMinEigen, testing::Combine(
+                        testing::ValuesIn(devices()), 
+                        testing::Values(CV_8UC1, CV_32FC1), 
+                        testing::Values(0, 4)));
+
+////////////////////////////////////////////////////////////////////////
+// ColumnSum
+
+struct ColumnSum : testing::TestWithParam<cv::gpu::DeviceInfo>
+{
+    cv::gpu::DeviceInfo devInfo;
+
+    cv::Size size;
+    cv::Mat src;
+
+    virtual void SetUp()
+    {
+        devInfo = GetParam();
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+        size = cv::Size(rng.uniform(100, 400), rng.uniform(100, 400));
+
+        src = cvtest::randomMat(rng, size, CV_32F, 0.0, 1.0, false);
+    }
+};
+
+TEST_P(ColumnSum, Accuracy)
+{
+    PRINT_PARAM(devInfo);
+    PRINT_PARAM(size);
+
+    cv::Mat dst;
+    
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat dev_dst;
+        cv::gpu::columnSum(cv::gpu::GpuMat(src), dev_dst);
+        dev_dst.download(dst);
+    );
+
+    for (int j = 0; j < src.cols; ++j)
+    {
+        float gold = src.at<float>(0, j);
+        float res = dst.at<float>(0, j);
+        ASSERT_NEAR(res, gold, 0.5);
+    }
+
+    for (int i = 1; i < src.rows; ++i)
+    {
+        for (int j = 0; j < src.cols; ++j)
+        {
+            float gold = src.at<float>(i, j) += src.at<float>(i - 1, j);
+            float res = dst.at<float>(i, j);
+            ASSERT_NEAR(res, gold, 0.5);
+        }
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, ColumnSum, testing::ValuesIn(devices()));
+
+////////////////////////////////////////////////////////////////////////
+// Norm
+
+static const int normTypes[] = {cv::NORM_INF, cv::NORM_L1, cv::NORM_L2};
+static const char* normTypes_str[] = {"NORM_INF", "NORM_L1", "NORM_L2"};
+
+struct Norm : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int, int> >
+{
+    cv::gpu::DeviceInfo devInfo;
+    int type;
+    int normTypeIdx;
+
+    cv::Size size;
+    cv::Mat src;
+
+    double gold;
+
+    virtual void SetUp()
+    {
+        devInfo = std::tr1::get<0>(GetParam());
+        type = std::tr1::get<1>(GetParam());
+        normTypeIdx = std::tr1::get<2>(GetParam());
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+        size = cv::Size(rng.uniform(100, 400), rng.uniform(100, 400));
+
+        src = cvtest::randomMat(rng, size, type, 0.0, 10.0, false);
+
+        gold = cv::norm(src, normTypes[normTypeIdx]);
+    }
+};
+
+TEST_P(Norm, Accuracy)
+{
+    const char* normTypeStr = normTypes_str[normTypeIdx];
+
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+    PRINT_PARAM(size);
+    PRINT_PARAM(normTypeStr);
+
+    double res;
+
+    ASSERT_NO_THROW(
+        res = cv::gpu::norm(cv::gpu::GpuMat(src), normTypes[normTypeIdx]);
+    );
+
+    ASSERT_NEAR(res, gold, 0.5);
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, Norm, testing::Combine(
+                        testing::ValuesIn(devices()), 
+                        testing::ValuesIn(types(CV_8U, CV_32F, 1, 1)),
+                        testing::Range(0, 3)));
+
+////////////////////////////////////////////////////////////////////////////////
+// reprojectImageTo3D
+
+struct ReprojectImageTo3D : testing::TestWithParam<cv::gpu::DeviceInfo>
+{
+    cv::gpu::DeviceInfo devInfo;
+
+    cv::Size size;
+    cv::Mat disp;
+    cv::Mat Q;
+
+    cv::Mat dst_gold;
+
+    virtual void SetUp()
+    {
+        devInfo = GetParam();
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+        size = cv::Size(rng.uniform(100, 500), rng.uniform(100, 500));
+
+        disp = cvtest::randomMat(rng, size, CV_8UC1, 5.0, 30.0, false);
+
+        Q = cvtest::randomMat(rng, cv::Size(4, 4), CV_32FC1, 0.1, 1.0, false);
+
+        cv::reprojectImageTo3D(disp, dst_gold, Q, false);
+    }
+};
+
+TEST_P(ReprojectImageTo3D, Accuracy)
+{
+    PRINT_PARAM(devInfo);
+    PRINT_PARAM(size);
+
+    cv::Mat dst;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat gpures;
+        cv::gpu::reprojectImageTo3D(cv::gpu::GpuMat(disp), gpures, Q);
+        gpures.download(dst);
+    );
+
+    ASSERT_EQ(dst_gold.size(), dst.size());
+
+    for (int y = 0; y < dst_gold.rows; ++y)
+    {
+        const cv::Vec3f* cpu_row = dst_gold.ptr<cv::Vec3f>(y);
+        const cv::Vec4f* gpu_row = dst.ptr<cv::Vec4f>(y);
+
+        for (int x = 0; x < dst_gold.cols; ++x)
+        {
+            cv::Vec3f gold = cpu_row[x];
+            cv::Vec4f res = gpu_row[x];
+
+            ASSERT_NEAR(res[0], gold[0], 1e-5);
+            ASSERT_NEAR(res[1], gold[1], 1e-5);
+            ASSERT_NEAR(res[2], gold[2], 1e-5);
+        }
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, ReprojectImageTo3D, testing::ValuesIn(devices()));
+
+////////////////////////////////////////////////////////////////////////////////
+// Downsample
+
+struct Downsample : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
+{
+    cv::gpu::DeviceInfo devInfo;
+    int k;
+
+    cv::Size size;
+
+    cv::Size dst_gold_size;
+
+    virtual void SetUp()
+    {
+        devInfo = std::tr1::get<0>(GetParam());
+        k = std::tr1::get<1>(GetParam());
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+        size = cv::Size(200 + cvtest::randInt(rng) % 1000, 200 + cvtest::randInt(rng) % 1000);
+
+        dst_gold_size = cv::Size((size.width + k - 1) / k, (size.height + k - 1) / k);
+    }
+};
+
+TEST_P(Downsample, Accuracy8U)
+{
+    PRINT_PARAM(devInfo);
+    PRINT_PARAM(size);
+    PRINT_PARAM(k);
+
+    cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+    cv::Mat src = cvtest::randomMat(rng, size, CV_8U, 0, 255, false);
+
+    cv::Mat dst;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat gpures;
+        cv::gpu::downsample(cv::gpu::GpuMat(src), gpures, k);
+        gpures.download(dst);
+    );
+
+    ASSERT_EQ(dst_gold_size, dst.size());
+
+    for (int y = 0; y < dst.rows; ++y)
+    {
+        for (int x = 0; x < dst.cols; ++x)
+        {
+            int gold = src.at<uchar>(y * k, x * k);
+            int res = dst.at<uchar>(y, x);
+            ASSERT_EQ(gold, res);
+        }
+    }
+}
+
+TEST_P(Downsample, Accuracy32F)
+{
+    PRINT_PARAM(devInfo);
+    PRINT_PARAM(size);
+    PRINT_PARAM(k);
+
+    cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+    cv::Mat src = cvtest::randomMat(rng, size, CV_32F, 0, 1.0, false);
+
+    cv::Mat dst;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat gpures;
+        cv::gpu::downsample(cv::gpu::GpuMat(src), gpures, k);
+        gpures.download(dst);
+    );
+
+    ASSERT_EQ(dst_gold_size, dst.size());
+
+    for (int y = 0; y < dst.rows; ++y)
+    {
+        for (int x = 0; x < dst.cols; ++x)
+        {
+            float gold = src.at<float>(y * k, x * k);
+            float res = dst.at<float>(y, x);
+            ASSERT_FLOAT_EQ(gold, res);
+        }
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, Downsample, testing::Combine(
+                        testing::ValuesIn(devices()), 
+                        testing::Range(2, 6)));
+
+////////////////////////////////////////////////////////////////////////////////
+// meanShift
+
+struct MeanShift : testing::TestWithParam<cv::gpu::DeviceInfo>
+{
+    static cv::Mat rgba;
+
+    static void SetUpTestCase() 
+    {
+        cv::Mat img = readImage("meanshift/cones.png");
+        cv::cvtColor(img, rgba, CV_BGR2BGRA);
+    }
+
+    static void TearDownTestCase() 
+    {
+        rgba.release();
+    }
+
+    cv::gpu::DeviceInfo devInfo;
+
+    int spatialRad;
+    int colorRad;
+
+    virtual void SetUp()
+    {
+        devInfo = GetParam();
+
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        spatialRad = 30;
+        colorRad = 30;
+    }
+};
+
+cv::Mat MeanShift::rgba;
+
+TEST_P(MeanShift, Filtering)
+{
+    cv::Mat img_template;
+    
+    if (supportFeature(devInfo, cv::gpu::FEATURE_SET_COMPUTE_20))
+        img_template = readImage("meanshift/con_result.png");
+    else
+        img_template = readImage("meanshift/con_result_CC1X.png");
+
+    ASSERT_TRUE(!rgba.empty() && !img_template.empty());
+
+    PRINT_PARAM(devInfo);
+
+    cv::Mat dst;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat dev_dst;
+        cv::gpu::meanShiftFiltering(cv::gpu::GpuMat(rgba), dev_dst, spatialRad, colorRad);
+        dev_dst.download(dst);
+    );
+
+    ASSERT_EQ(CV_8UC4, dst.type());
+
+    cv::Mat result;
+    cv::cvtColor(dst, result, CV_BGRA2BGR);
+
+    EXPECT_MAT_NEAR(img_template, result, 0.0);
+}
+
+TEST_P(MeanShift, Proc)
+{
+    cv::Mat spmap_template;
+    cv::FileStorage fs;
+
+    if (supportFeature(devInfo, cv::gpu::FEATURE_SET_COMPUTE_20))
+        fs.open(std::string(cvtest::TS::ptr()->get_data_path()) + "meanshift/spmap.yaml", cv::FileStorage::READ);
+    else
+        fs.open(std::string(cvtest::TS::ptr()->get_data_path()) + "meanshift/spmap_CC1X.yaml", cv::FileStorage::READ);
+
+    ASSERT_TRUE(fs.isOpened());
+
+    fs["spmap"] >> spmap_template;
+
+    ASSERT_TRUE(!rgba.empty() && !spmap_template.empty());
+
+    PRINT_PARAM(devInfo);
+
+    cv::Mat rmap_filtered;
+    cv::Mat rmap;
+    cv::Mat spmap;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat d_rmap_filtered;
+        cv::gpu::meanShiftFiltering(cv::gpu::GpuMat(rgba), d_rmap_filtered, spatialRad, colorRad);
+
+        cv::gpu::GpuMat d_rmap;
+        cv::gpu::GpuMat d_spmap;
+        cv::gpu::meanShiftProc(cv::gpu::GpuMat(rgba), d_rmap, d_spmap, spatialRad, colorRad);
+
+        d_rmap_filtered.download(rmap_filtered);
+        d_rmap.download(rmap);
+        d_spmap.download(spmap);
+    );
+
+    ASSERT_EQ(CV_8UC4, rmap.type());
+    
+    EXPECT_MAT_NEAR(rmap_filtered, rmap, 0.0);    
+    EXPECT_MAT_NEAR(spmap_template, spmap, 0.0);
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, MeanShift, testing::ValuesIn(devices(cv::gpu::FEATURE_SET_COMPUTE_12)));
+
+struct MeanShiftSegmentation : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
+{
+    static cv::Mat rgba;
+
+    static void SetUpTestCase() 
+    {
+        cv::Mat img = readImage("meanshift/cones.png");
+        cv::cvtColor(img, rgba, CV_BGR2BGRA);
+    }
+
+    static void TearDownTestCase() 
+    {
+        rgba.release();
+    }
+
+    cv::gpu::DeviceInfo devInfo;
+    int minsize;
+
+    cv::Mat dst_gold;
+
+    virtual void SetUp()
+    {
+        devInfo = std::tr1::get<0>(GetParam());
+        minsize = std::tr1::get<1>(GetParam());
+
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        std::ostringstream path;
+        path << "meanshift/cones_segmented_sp10_sr10_minsize" << minsize;
+        if (supportFeature(devInfo, cv::gpu::FEATURE_SET_COMPUTE_20))
+            path << ".png";
+        else
+            path << "_CC1X.png";
+
+        dst_gold = readImage(path.str());
+    }
+};
+
+cv::Mat MeanShiftSegmentation::rgba;
+
+TEST_P(MeanShiftSegmentation, Regression)
+{
+    ASSERT_TRUE(!rgba.empty() && !dst_gold.empty());
+
+    PRINT_PARAM(devInfo);
+    PRINT_PARAM(minsize);
+
+    cv::Mat dst;
+
+    ASSERT_NO_THROW(
+        cv::gpu::meanShiftSegmentation(cv::gpu::GpuMat(rgba), dst, 10, 10, minsize);
+    );
+
+    cv::Mat dst_rgb;
+    cv::cvtColor(dst, dst_rgb, CV_BGRA2BGR);
+
+    EXPECT_MAT_SIMILAR(dst_gold, dst_rgb, 1e-5);
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, MeanShiftSegmentation, testing::Combine(
+                        testing::ValuesIn(devices(cv::gpu::FEATURE_SET_COMPUTE_12)), 
+                        testing::Values(0, 4, 20, 84, 340, 1364)));
+
+////////////////////////////////////////////////////////////////////////////////
+// matchTemplate
+
+static const char* matchTemplateMethods[] = {"SQDIFF", "SQDIFF_NORMED", "CCORR", "CCORR_NORMED", "CCOEFF", "CCOEFF_NORMED"};
+
+struct MatchTemplate8U : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int, int> >
+{
+    cv::gpu::DeviceInfo devInfo;
+    int cn;
+    int method;
+
+    int n, m, h, w;
+    cv::Mat image, templ;
+
+    cv::Mat dst_gold;
+
+    virtual void SetUp()
+    {
+        devInfo = std::tr1::get<0>(GetParam());
+        cn = std::tr1::get<1>(GetParam());
+        method = std::tr1::get<2>(GetParam());
+
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+        n = rng.uniform(30, 100);
+        m = rng.uniform(30, 100);
+        h = rng.uniform(5, n - 1);
+        w = rng.uniform(5, m - 1);
+
+        image = cvtest::randomMat(rng, cv::Size(m, n), CV_MAKETYPE(CV_8U, cn), 1.0, 10.0, false);
+        templ = cvtest::randomMat(rng, cv::Size(w, h), CV_MAKETYPE(CV_8U, cn), 1.0, 10.0, false);
+
+        cv::matchTemplate(image, templ, dst_gold, method);
+    }
+};
+
+TEST_P(MatchTemplate8U, Regression)
+{
+    const char* matchTemplateMethodStr = matchTemplateMethods[method];
+    PRINT_PARAM(devInfo);
+    PRINT_PARAM(cn);
+    PRINT_PARAM(matchTemplateMethodStr);
+    PRINT_PARAM(n);
+    PRINT_PARAM(m);
+    PRINT_PARAM(h);
+    PRINT_PARAM(w);
+
+    cv::Mat dst;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat dev_dst;
+        cv::gpu::matchTemplate(cv::gpu::GpuMat(image), cv::gpu::GpuMat(templ), dev_dst, method);
+        dev_dst.download(dst);
+    );
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 5 * h * w * 1e-4);
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, MatchTemplate8U, testing::Combine(
+                        testing::ValuesIn(devices()),
+                        testing::Range(1, 5), 
+                        testing::Values((int)CV_TM_SQDIFF, (int)CV_TM_SQDIFF_NORMED, (int)CV_TM_CCORR, (int)CV_TM_CCORR_NORMED, (int)CV_TM_CCOEFF, (int)CV_TM_CCOEFF_NORMED)));
+
+struct MatchTemplate32F : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int, int> >
+{
+    cv::gpu::DeviceInfo devInfo;
+    int cn;
+    int method;
+
+    int n, m, h, w;
+    cv::Mat image, templ;
+
+    cv::Mat dst_gold;
+
+    virtual void SetUp()
+    {
+        devInfo = std::tr1::get<0>(GetParam());
+        cn = std::tr1::get<1>(GetParam());
+        method = std::tr1::get<2>(GetParam());
+
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+        n = rng.uniform(30, 100);
+        m = rng.uniform(30, 100);
+        h = rng.uniform(5, n - 1);
+        w = rng.uniform(5, m - 1);
+
+        image = cvtest::randomMat(rng, cv::Size(m, n), CV_MAKETYPE(CV_32F, cn), 0.001, 1.0, false);
+        templ = cvtest::randomMat(rng, cv::Size(w, h), CV_MAKETYPE(CV_32F, cn), 0.001, 1.0, false);
+
+        cv::matchTemplate(image, templ, dst_gold, method);
+    }
+};
+
+TEST_P(MatchTemplate32F, Regression)
+{
+    const char* matchTemplateMethodStr = matchTemplateMethods[method];
+    PRINT_PARAM(devInfo);
+    PRINT_PARAM(cn);
+    PRINT_PARAM(matchTemplateMethodStr);
+    PRINT_PARAM(n);
+    PRINT_PARAM(m);
+    PRINT_PARAM(h);
+    PRINT_PARAM(w);
+
+    cv::Mat dst;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat dev_dst;
+        cv::gpu::matchTemplate(cv::gpu::GpuMat(image), cv::gpu::GpuMat(templ), dev_dst, method);
+        dev_dst.download(dst);
+    );
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.25 * h * w * 1e-4);
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, MatchTemplate32F, testing::Combine(
+                        testing::ValuesIn(devices()), 
+                        testing::Range(1, 5), 
+                        testing::Values((int)CV_TM_SQDIFF, (int)CV_TM_CCORR)));
+
+struct MatchTemplate : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
+{
+    static cv::Mat image;
+    static cv::Mat pattern;
+
+    static cv::Point maxLocGold;
+
+    static void SetUpTestCase() 
+    {
+        image = readImage("matchtemplate/black.png");
+        pattern = readImage("matchtemplate/cat.png");
+
+        maxLocGold = cv::Point(284, 12);
+    }
+
+    static void TearDownTestCase() 
+    {
+        image.release();
+        pattern.release();
+    }
+
+    cv::gpu::DeviceInfo devInfo;
+    int method;
+
+    virtual void SetUp()
+    {
+        devInfo = std::tr1::get<0>(GetParam());
+        method = std::tr1::get<1>(GetParam());
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+cv::Mat MatchTemplate::image;
+cv::Mat MatchTemplate::pattern;
+cv::Point MatchTemplate::maxLocGold;
+
+TEST_P(MatchTemplate, FindPatternInBlack)
+{
+    ASSERT_TRUE(!image.empty() && !pattern.empty());
+
+    const char* matchTemplateMethodStr = matchTemplateMethods[method];
+
+    PRINT_PARAM(devInfo);
+    PRINT_PARAM(matchTemplateMethodStr);
+
+    cv::Mat dst;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat dev_dst;
+        cv::gpu::matchTemplate(cv::gpu::GpuMat(image), cv::gpu::GpuMat(pattern), dev_dst, method);
+        dev_dst.download(dst);
+    );
+
+    double maxValue;
+    cv::Point maxLoc;
+    cv::minMaxLoc(dst, NULL, &maxValue, NULL, &maxLoc);
+
+    ASSERT_EQ(maxLocGold, maxLoc);
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, MatchTemplate, testing::Combine(
+                        testing::ValuesIn(devices()), 
+                        testing::Values((int)CV_TM_CCOEFF_NORMED, (int)CV_TM_CCORR_NORMED)));
+
+////////////////////////////////////////////////////////////////////////////
+// MulSpectrums
+
+struct MulSpectrums : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
+{
+    cv::gpu::DeviceInfo devInfo;
+    int flag;
+
+    cv::Mat a, b; 
+
+    virtual void SetUp() 
+    {
+        devInfo = std::tr1::get<0>(GetParam());
+        flag = std::tr1::get<1>(GetParam());
+
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+        a = cvtest::randomMat(rng, cv::Size(rng.uniform(100, 200), rng.uniform(100, 200)), CV_32FC2, 0.0, 10.0, false);
+        b = cvtest::randomMat(rng, a.size(), CV_32FC2, 0.0, 10.0, false);
+    }
+};
+
+TEST_P(MulSpectrums, Simple)
+{
+    PRINT_PARAM(devInfo);
+    PRINT_PARAM(flag);
+
+    cv::Mat c_gold;
+    cv::mulSpectrums(a, b, c_gold, flag, false);
+    
+    cv::Mat c;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat d_c;
+
+        cv::gpu::mulSpectrums(cv::gpu::GpuMat(a), cv::gpu::GpuMat(b), d_c, flag, false);
+
+        d_c.download(c);
+    );
+
+    EXPECT_MAT_NEAR(c_gold, c, 1e-4);
+}
+
+TEST_P(MulSpectrums, Scaled)
+{
+    PRINT_PARAM(devInfo);
+    PRINT_PARAM(flag);
+
+    float scale = 1.f / a.size().area();
+
+    cv::Mat c_gold;
+    cv::mulSpectrums(a, b, c_gold, flag, false);
+    c_gold.convertTo(c_gold, c_gold.type(), scale);
+
+    cv::Mat c;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat d_c;
+
+        cv::gpu::mulAndScaleSpectrums(cv::gpu::GpuMat(a), cv::gpu::GpuMat(b), d_c, flag, scale, false);
+
+        d_c.download(c);
+    );
+
+    EXPECT_MAT_NEAR(c_gold, c, 1e-4);
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, MulSpectrums, testing::Combine(
+                        testing::ValuesIn(devices()), 
+                        testing::Values(0, (int)cv::DFT_ROWS)));
+
+////////////////////////////////////////////////////////////////////////////
+// Dft
+
+struct Dft : testing::TestWithParam<cv::gpu::DeviceInfo>
+{
+    cv::gpu::DeviceInfo devInfo;
+
+    virtual void SetUp() 
+    {
+        devInfo = GetParam();
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
+};
+
+static void testC2C(const std::string& hint, int cols, int rows, int flags, bool inplace)
+{
+    PRINT_PARAM(hint);
+    PRINT_PARAM(cols);
+    PRINT_PARAM(rows);
+    PRINT_PARAM(flags);
+    PRINT_PARAM(inplace);
+
+    cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+    cv::Mat a = cvtest::randomMat(rng, cv::Size(cols, rows), CV_32FC2, 0.0, 10.0, false);
+
+    cv::Mat b_gold;
+    cv::dft(a, b_gold, flags);
+
+    cv::gpu::GpuMat d_b;
+    cv::gpu::GpuMat d_b_data;
+    if (inplace)
+    {
+        d_b_data.create(1, a.size().area(), CV_32FC2);
+        d_b = cv::gpu::GpuMat(a.rows, a.cols, CV_32FC2, d_b_data.ptr(), a.cols * d_b_data.elemSize());
+    }
+    cv::gpu::dft(cv::gpu::GpuMat(a), d_b, cv::Size(cols, rows), flags);
+
+    EXPECT_TRUE(!inplace || d_b.ptr() == d_b_data.ptr());
+    ASSERT_EQ(CV_32F, d_b.depth());
+    ASSERT_EQ(2, d_b.channels());
+    EXPECT_MAT_NEAR(b_gold, d_b, rows * cols * 1e-4);
+}
+
+TEST_P(Dft, C2C)
+{
+    PRINT_PARAM(devInfo);
+
+    cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+    int cols = 2 + rng.next() % 100, rows = 2 + rng.next() % 100;
+
+    ASSERT_NO_THROW(
+        for (int i = 0; i < 2; ++i)
+        {
+            bool inplace = i != 0;
+
+            testC2C("no flags", cols, rows, 0, inplace);
+            testC2C("no flags 0 1", cols, rows + 1, 0, inplace);
+            testC2C("no flags 1 0", cols, rows + 1, 0, inplace);
+            testC2C("no flags 1 1", cols + 1, rows, 0, inplace);
+            testC2C("DFT_INVERSE", cols, rows, cv::DFT_INVERSE, inplace);
+            testC2C("DFT_ROWS", cols, rows, cv::DFT_ROWS, inplace);
+            testC2C("single col", 1, rows, 0, inplace);
+            testC2C("single row", cols, 1, 0, inplace);
+            testC2C("single col inversed", 1, rows, cv::DFT_INVERSE, inplace);
+            testC2C("single row inversed", cols, 1, cv::DFT_INVERSE, inplace);
+            testC2C("single row DFT_ROWS", cols, 1, cv::DFT_ROWS, inplace);
+            testC2C("size 1 2", 1, 2, 0, inplace);
+            testC2C("size 2 1", 2, 1, 0, inplace);
+        }
+    );
+}
+
+static void testR2CThenC2R(const std::string& hint, int cols, int rows, bool inplace)
+{
+    PRINT_PARAM(hint);
+    PRINT_PARAM(cols);
+    PRINT_PARAM(rows);
+    PRINT_PARAM(inplace);
+    
+    cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+    cv::Mat a = cvtest::randomMat(rng, cv::Size(cols, rows), CV_32FC1, 0.0, 10.0, false);
+
+    cv::gpu::GpuMat d_b, d_c;
+    cv::gpu::GpuMat d_b_data, d_c_data;
+    if (inplace)
+    {
+        if (a.cols == 1)
+        {
+            d_b_data.create(1, (a.rows / 2 + 1) * a.cols, CV_32FC2);
+            d_b = cv::gpu::GpuMat(a.rows / 2 + 1, a.cols, CV_32FC2, d_b_data.ptr(), a.cols * d_b_data.elemSize());
+        }
+        else
+        {
+            d_b_data.create(1, a.rows * (a.cols / 2 + 1), CV_32FC2);
+            d_b = cv::gpu::GpuMat(a.rows, a.cols / 2 + 1, CV_32FC2, d_b_data.ptr(), (a.cols / 2 + 1) * d_b_data.elemSize());
+        }
+        d_c_data.create(1, a.size().area(), CV_32F);
+        d_c = cv::gpu::GpuMat(a.rows, a.cols, CV_32F, d_c_data.ptr(), a.cols * d_c_data.elemSize());
+    }
+
+    cv::gpu::dft(cv::gpu::GpuMat(a), d_b, cv::Size(cols, rows), 0);
+    cv::gpu::dft(d_b, d_c, cv::Size(cols, rows), cv::DFT_REAL_OUTPUT | cv::DFT_SCALE);
+    
+    EXPECT_TRUE(!inplace || d_b.ptr() == d_b_data.ptr());
+    EXPECT_TRUE(!inplace || d_c.ptr() == d_c_data.ptr());
+    ASSERT_EQ(CV_32F, d_c.depth());
+    ASSERT_EQ(1, d_c.channels());
+
+    cv::Mat c(d_c);
+    EXPECT_MAT_NEAR(a, c, rows * cols * 1e-5);
+}
+
+TEST_P(Dft, R2CThenC2R)
+{
+    PRINT_PARAM(devInfo);
+
+    cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+    int cols = 2 + rng.next() % 100, rows = 2 + rng.next() % 100;
+
+    ASSERT_NO_THROW(
+        testR2CThenC2R("sanity", cols, rows, false);
+        testR2CThenC2R("sanity 0 1", cols, rows + 1, false);
+        testR2CThenC2R("sanity 1 0", cols + 1, rows, false);
+        testR2CThenC2R("sanity 1 1", cols + 1, rows + 1, false);
+        testR2CThenC2R("single col", 1, rows, false);
+        testR2CThenC2R("single col 1", 1, rows + 1, false);
+        testR2CThenC2R("single row", cols, 1, false);
+        testR2CThenC2R("single row 1", cols + 1, 1, false);
+
+        testR2CThenC2R("sanity", cols, rows, true);
+        testR2CThenC2R("sanity 0 1", cols, rows + 1, true);
+        testR2CThenC2R("sanity 1 0", cols + 1, rows, true);
+        testR2CThenC2R("sanity 1 1", cols + 1, rows + 1, true);
+        testR2CThenC2R("single row", cols, 1, true);
+        testR2CThenC2R("single row 1", cols + 1, 1, true);
+    );
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, Dft, testing::ValuesIn(devices()));
+
+////////////////////////////////////////////////////////////////////////////
+// blend
+
+template <typename T> static void blendLinearGold(const cv::Mat& img1, const cv::Mat& img2, const cv::Mat& weights1, const cv::Mat& weights2, cv::Mat& result_gold)
+{
+    result_gold.create(img1.size(), img1.type());
+
+    int cn = img1.channels();
+
+    for (int y = 0; y < img1.rows; ++y)
+    {
+        const float* weights1_row = weights1.ptr<float>(y);
+        const float* weights2_row = weights2.ptr<float>(y);
+        const T* img1_row = img1.ptr<T>(y);
+        const T* img2_row = img2.ptr<T>(y);
+        T* result_gold_row = result_gold.ptr<T>(y);
+        for (int x = 0; x < img1.cols * cn; ++x)
+        {
+            float w1 = weights1_row[x / cn];
+            float w2 = weights2_row[x / cn];
+            result_gold_row[x] = static_cast<T>((img1_row[x] * w1 + img2_row[x] * w2) / (w1 + w2 + 1e-5f));
+        }
+    }
+}
+
+struct Blend : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int, int> >
+{
+    cv::gpu::DeviceInfo devInfo;
+    int depth;
+    int cn;
+
+    int type;
+    cv::Size size;
+    cv::Mat img1;
+    cv::Mat img2;
+    cv::Mat weights1;
+    cv::Mat weights2;
+
+    cv::Mat result_gold;
+
+    virtual void SetUp() 
+    {
+        devInfo = std::tr1::get<0>(GetParam());
+        depth = std::tr1::get<1>(GetParam());
+        cn = std::tr1::get<2>(GetParam());
+
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        type = CV_MAKETYPE(depth, cn);
+
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+        size = cv::Size(200 + cvtest::randInt(rng) % 1000, 200 + cvtest::randInt(rng) % 1000);
+
+        img1 = cvtest::randomMat(rng, size, type, 0.0, depth == CV_8U ? 255.0 : 1.0, false);
+        img2 = cvtest::randomMat(rng, size, type, 0.0, depth == CV_8U ? 255.0 : 1.0, false);
+        weights1 = cvtest::randomMat(rng, size, CV_32F, 0, 1, false);
+        weights2 = cvtest::randomMat(rng, size, CV_32F, 0, 1, false);
+        
+        if (depth == CV_8U)
+            blendLinearGold<uchar>(img1, img2, weights1, weights2, result_gold);
+        else
+            blendLinearGold<float>(img1, img2, weights1, weights2, result_gold);
+    }
+};
+
+TEST_P(Blend, Accuracy)
+{
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+    PRINT_PARAM(size);
+
+    cv::Mat result;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat d_result;
+
+        cv::gpu::blendLinear(cv::gpu::GpuMat(img1), cv::gpu::GpuMat(img2), cv::gpu::GpuMat(weights1), cv::gpu::GpuMat(weights2), d_result);
+
+        d_result.download(result);
+    );
+
+    EXPECT_MAT_NEAR(result_gold, result, depth == CV_8U ? 1.0 : 1e-5);
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, Blend, testing::Combine(
+                        testing::ValuesIn(devices()),
+                        testing::Values(CV_8U, CV_32F),
+                        testing::Range(1, 5)));
+
+#endif // HAVE_CUDA
diff --git a/modules/gpu/test/test_imgproc_gpu.cpp b/modules/gpu/test/test_imgproc_gpu.cpp
deleted file mode 100644
index d45d7c77bb..0000000000
--- a/modules/gpu/test/test_imgproc_gpu.cpp
+++ /dev/null
@@ -1,966 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                        Intel License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000, Intel Corporation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of Intel Corporation may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include <cmath>
-#include <limits>
-#include "test_precomp.hpp"
-
-using namespace cv;
-using namespace std;
-using namespace gpu;
-
-class CV_GpuImageProcTest : public cvtest::BaseTest
-{
-public:
-    virtual ~CV_GpuImageProcTest() {}
-
-protected:
-    void run(int);
-
-    int test8UC1 (const Mat& img);
-    int test8UC4 (const Mat& img);
-    int test32SC1(const Mat& img);
-    int test32FC1(const Mat& img);
-
-    virtual int test(const Mat& img) = 0;
-
-    int CheckNorm(const Mat& m1, const Mat& m2);
-
-    // Checks whether two images are similar enough using normalized
-    // cross-correlation as an error measure
-    int CheckSimilarity(const Mat& m1, const Mat& m2, float max_err=1e-3f);
-};
-
-
-int CV_GpuImageProcTest::test8UC1(const Mat& img)
-{
-    cv::Mat img_C1;
-    cvtColor(img, img_C1, CV_BGR2GRAY);
-
-    return test(img_C1);
-}
-
-int CV_GpuImageProcTest::test8UC4(const Mat& img)
-{
-    cv::Mat img_C4;
-    cvtColor(img, img_C4, CV_BGR2BGRA);
-
-    return test(img_C4);
-}
-
-int CV_GpuImageProcTest::test32SC1(const Mat& img)
-{
-    cv::Mat img_C1;
-    cvtColor(img, img_C1, CV_BGR2GRAY);
-    img_C1.convertTo(img_C1, CV_32S);
-
-    return test(img_C1);
-}
-
-int CV_GpuImageProcTest::test32FC1(const Mat& img)
-{
-    cv::Mat temp, img_C1;
-    img.convertTo(temp, CV_32F, 1.f / 255.f);
-    cvtColor(temp, img_C1, CV_BGR2GRAY);
-
-    return test(img_C1);
-}
-
-int CV_GpuImageProcTest::CheckNorm(const Mat& m1, const Mat& m2)
-{
-    double ret = norm(m1, m2, NORM_INF);
-
-    if (ret < std::numeric_limits<double>::epsilon())
-    {
-        return cvtest::TS::OK;
-    }
-    else
-    {
-        ts->printf(cvtest::TS::LOG, "Norm: %f\n", ret);
-        return cvtest::TS::FAIL_GENERIC;
-    }
-}
-
-int CV_GpuImageProcTest::CheckSimilarity(const Mat& m1, const Mat& m2, float max_err)
-{
-    Mat diff;
-    cv::matchTemplate(m1, m2, diff, CV_TM_CCORR_NORMED);
-
-    float err = abs(diff.at<float>(0, 0) - 1.f);
-
-    if (err > max_err)
-        return cvtest::TS::FAIL_INVALID_OUTPUT;
-
-    return cvtest::TS::OK;
-}
-
-void CV_GpuImageProcTest::run( int )
-{
-    //load image
-    cv::Mat img = cv::imread(std::string(ts->get_data_path()) + "stereobp/aloe-L.png");
-
-    if (img.empty())
-    {
-        ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA);
-        return;
-    }
-
-    int testResult = cvtest::TS::OK;
-    //run tests
-    ts->printf(cvtest::TS::LOG, "\n========Start test 8UC1========\n");
-    if (test8UC1(img) == cvtest::TS::OK)
-        ts->printf(cvtest::TS::LOG, "SUCCESS\n");
-    else
-    {
-        ts->printf(cvtest::TS::LOG, "FAIL\n");
-        testResult = cvtest::TS::FAIL_GENERIC;
-    }
-
-    ts->printf(cvtest::TS::LOG, "\n========Start test 8UC4========\n");
-    if (test8UC4(img) == cvtest::TS::OK)
-        ts->printf(cvtest::TS::LOG, "SUCCESS\n");
-    else
-    {
-        ts->printf(cvtest::TS::LOG, "FAIL\n");
-        testResult = cvtest::TS::FAIL_GENERIC;
-    }
-
-    ts->printf(cvtest::TS::LOG, "\n========Start test 32SC1========\n");
-    if (test32SC1(img) == cvtest::TS::OK)
-        ts->printf(cvtest::TS::LOG, "SUCCESS\n");
-    else
-    {
-        ts->printf(cvtest::TS::LOG, "FAIL\n");
-        testResult = cvtest::TS::FAIL_GENERIC;
-    }
-
-    ts->printf(cvtest::TS::LOG, "\n========Start test 32FC1========\n");
-    if (test32FC1(img) == cvtest::TS::OK)
-        ts->printf(cvtest::TS::LOG, "SUCCESS\n");
-    else
-    {
-        ts->printf(cvtest::TS::LOG, "FAIL\n");
-        testResult = cvtest::TS::FAIL_GENERIC;
-    }
-
-    ts->set_failed_test_info(testResult);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// threshold
-struct CV_GpuImageThresholdTest : public CV_GpuImageProcTest
-{
-public:
-    CV_GpuImageThresholdTest() {}
-
-    int test(const Mat& img)
-    {
-        if (img.type() != CV_8UC1 && img.type() != CV_32FC1)
-        {
-            ts->printf(cvtest::TS::LOG, "\nUnsupported type\n");
-            return cvtest::TS::OK;
-        }
-
-        const double maxVal = img.type() == CV_8UC1 ? 255 : 1.0;
-
-        cv::RNG& rng = ts->get_rng();
-
-        int res = cvtest::TS::OK;
-
-        for (int type = THRESH_BINARY; type <= THRESH_TOZERO_INV; ++type)
-        {
-            const double thresh = rng.uniform(0.0, maxVal);
-
-            cv::Mat cpuRes;
-            cv::threshold(img, cpuRes, thresh, maxVal, type);
-
-            GpuMat gpu1(img);
-            GpuMat gpuRes;
-            cv::gpu::threshold(gpu1, gpuRes, thresh, maxVal, type);
-
-            if (CheckNorm(cpuRes, gpuRes) != cvtest::TS::OK)
-                res = cvtest::TS::FAIL_GENERIC;
-        }
-
-        return res;
-    }
-};
-
-////////////////////////////////////////////////////////////////////////////////
-// resize
-struct CV_GpuNppImageResizeTest : public CV_GpuImageProcTest
-{
-    CV_GpuNppImageResizeTest() {}
-    int test(const Mat& img)
-    {
-        if (img.type() != CV_8UC1 && img.type() != CV_8UC4)
-        {
-            ts->printf(cvtest::TS::LOG, "Unsupported type\n");
-            return cvtest::TS::OK;
-        }
-
-        int interpolations[] = {INTER_NEAREST, INTER_LINEAR, /*INTER_CUBIC,*/ /*INTER_LANCZOS4*/};
-        const char* interpolations_str[] = {"INTER_NEAREST", "INTER_LINEAR", /*"INTER_CUBIC",*/ /*"INTER_LANCZOS4"*/};
-        int interpolations_num = sizeof(interpolations) / sizeof(int);
-
-        int test_res = cvtest::TS::OK;
-
-        for (int i = 0; i < interpolations_num; ++i)
-        {
-            ts->printf(cvtest::TS::LOG, "Interpolation: %s\n", interpolations_str[i]);
-
-            Mat cpu_res1, cpu_res2;
-            cv::resize(img, cpu_res1, Size(), 2.0, 2.0, interpolations[i]);
-            cv::resize(cpu_res1, cpu_res2, Size(), 0.5, 0.5, interpolations[i]);
-
-            GpuMat gpu1(img), gpu_res1, gpu_res2;
-            cv::gpu::resize(gpu1, gpu_res1, Size(), 2.0, 2.0, interpolations[i]);
-            cv::gpu::resize(gpu_res1, gpu_res2, Size(), 0.5, 0.5, interpolations[i]);
-
-            if (CheckSimilarity(cpu_res2, gpu_res2) != cvtest::TS::OK)
-                test_res = cvtest::TS::FAIL_GENERIC;
-        }
-
-        return test_res;
-    }
-};
-
-////////////////////////////////////////////////////////////////////////////////
-// copyMakeBorder
-struct CV_GpuNppImageCopyMakeBorderTest : public CV_GpuImageProcTest
-{
-    CV_GpuNppImageCopyMakeBorderTest() {}
-
-    int test(const Mat& img)
-    {
-        if (img.type() != CV_8UC1 && img.type() != CV_8UC4 && img.type() != CV_32SC1)
-        {
-            ts->printf(cvtest::TS::LOG, "\nUnsupported type\n");
-            return cvtest::TS::OK;
-        }
-
-        cv::RNG& rng = ts->get_rng();
-        int top = rng.uniform(1, 10);
-        int botton = rng.uniform(1, 10);
-        int left = rng.uniform(1, 10);
-        int right = rng.uniform(1, 10);
-        cv::Scalar val(rng.uniform(0, 255), rng.uniform(0, 255), rng.uniform(0, 255), rng.uniform(0, 255));
-
-        Mat cpudst;
-        cv::copyMakeBorder(img, cpudst, top, botton, left, right, BORDER_CONSTANT, val);
-
-        GpuMat gpu1(img);
-        GpuMat gpudst;
-        cv::gpu::copyMakeBorder(gpu1, gpudst, top, botton, left, right, val);
-
-        return CheckNorm(cpudst, gpudst);
-    }
-};
-
-////////////////////////////////////////////////////////////////////////////////
-// warpAffine
-struct CV_GpuNppImageWarpAffineTest : public CV_GpuImageProcTest
-{
-    CV_GpuNppImageWarpAffineTest() {}
-
-    int test(const Mat& img)
-    {
-        if (img.type() == CV_32SC1)
-        {
-            ts->printf(cvtest::TS::LOG, "\nUnsupported type\n");
-            return cvtest::TS::OK;
-        }
-
-        static double reflect[2][3] = { {-1, 0, 0},
-                                        { 0, -1, 0} };
-        reflect[0][2] = img.cols;
-        reflect[1][2] = img.rows;
-
-        Mat M(2, 3, CV_64F, (void*)reflect);
-
-        int flags[] = {INTER_NEAREST, INTER_LINEAR, INTER_CUBIC, INTER_NEAREST | WARP_INVERSE_MAP, INTER_LINEAR | WARP_INVERSE_MAP, INTER_CUBIC | WARP_INVERSE_MAP};
-        const char* flags_str[] = {"INTER_NEAREST", "INTER_LINEAR", "INTER_CUBIC", "INTER_NEAREST | WARP_INVERSE_MAP", "INTER_LINEAR | WARP_INVERSE_MAP", "INTER_CUBIC | WARP_INVERSE_MAP"};
-        int flags_num = sizeof(flags) / sizeof(int);
-
-        int test_res = cvtest::TS::OK;
-
-        for (int i = 0; i < flags_num; ++i)
-        {
-            ts->printf(cvtest::TS::LOG, "\nFlags: %s\n", flags_str[i]);
-
-            Mat cpudst;
-            cv::warpAffine(img, cpudst, M, img.size(), flags[i]);
-
-            GpuMat gpu1(img);
-            GpuMat gpudst;
-            cv::gpu::warpAffine(gpu1, gpudst, M, gpu1.size(), flags[i]);
-
-            // Check inner parts (ignoring 1 pixel width border)
-            if (CheckSimilarity(cpudst.rowRange(1, cpudst.rows - 1).colRange(1, cpudst.cols - 1),
-                                gpudst.rowRange(1, gpudst.rows - 1).colRange(1, gpudst.cols - 1)) != cvtest::TS::OK)
-                test_res = cvtest::TS::FAIL_GENERIC;
-        }
-
-        return test_res;
-    }
-};
-
-////////////////////////////////////////////////////////////////////////////////
-// warpPerspective
-struct CV_GpuNppImageWarpPerspectiveTest : public CV_GpuImageProcTest
-{
-    CV_GpuNppImageWarpPerspectiveTest() {}
-
-
-    int test(const Mat& img)
-    {
-        if (img.type() == CV_32SC1)
-        {
-            ts->printf(cvtest::TS::LOG, "\nUnsupported type\n");
-            return cvtest::TS::OK;
-        }
-
-        static double reflect[3][3] = { { -1, 0, 0},
-                                        { 0, -1, 0},
-                                        { 0, 0, 1 }};
-        reflect[0][2] = img.cols;
-        reflect[1][2] = img.rows;
-        Mat M(3, 3, CV_64F, (void*)reflect);
-
-        int flags[] = {INTER_NEAREST, INTER_LINEAR, INTER_CUBIC, INTER_NEAREST | WARP_INVERSE_MAP, INTER_LINEAR | WARP_INVERSE_MAP, INTER_CUBIC | WARP_INVERSE_MAP};
-        const char* flags_str[] = {"INTER_NEAREST", "INTER_LINEAR", "INTER_CUBIC", "INTER_NEAREST | WARP_INVERSE_MAP", "INTER_LINEAR | WARP_INVERSE_MAP", "INTER_CUBIC | WARP_INVERSE_MAP"};
-        int flags_num = sizeof(flags) / sizeof(int);
-
-        int test_res = cvtest::TS::OK;
-
-        for (int i = 0; i < flags_num; ++i)
-        {
-            ts->printf(cvtest::TS::LOG, "\nFlags: %s\n", flags_str[i]);
-
-            Mat cpudst;
-            cv::warpPerspective(img, cpudst, M, img.size(), flags[i]);
-
-            GpuMat gpu1(img);
-            GpuMat gpudst;
-            cv::gpu::warpPerspective(gpu1, gpudst, M, gpu1.size(), flags[i]);
-
-            // Check inner parts (ignoring 1 pixel width border)
-            if (CheckSimilarity(cpudst.rowRange(1, cpudst.rows - 1).colRange(1, cpudst.cols - 1),
-                                gpudst.rowRange(1, gpudst.rows - 1).colRange(1, gpudst.cols - 1)) != cvtest::TS::OK)
-                test_res = cvtest::TS::FAIL_GENERIC;
-        }
-
-        return test_res;
-    }
-};
-
-////////////////////////////////////////////////////////////////////////////////
-// integral
-struct CV_GpuNppImageIntegralTest : public CV_GpuImageProcTest
-{
-    CV_GpuNppImageIntegralTest() {}
-
-    int test(const Mat& img)
-    {
-        if (img.type() != CV_8UC1)
-        {
-            ts->printf(cvtest::TS::LOG, "\nUnsupported type\n");
-            return cvtest::TS::OK;
-        }
-
-        Mat cpusum;
-        cv::integral(img, cpusum, CV_32S);
-
-        GpuMat gpu1(img);
-        GpuMat gpusum;
-        cv::gpu::integral(gpu1, gpusum);
-
-        return CheckNorm(cpusum, gpusum) == cvtest::TS::OK ? cvtest::TS::OK : cvtest::TS::FAIL_GENERIC;
-    }
-};
-
-////////////////////////////////////////////////////////////////////////////////
-// Canny
-//struct CV_GpuNppImageCannyTest : public CV_GpuImageProcTest
-//{
-//    CV_GpuNppImageCannyTest() : CV_GpuImageProcTest( "GPU-NppImageCanny", "Canny" ) {}
-//
-//    int test(const Mat& img)
-//    {
-//        if (img.type() != CV_8UC1)
-//        {
-//            ts->printf(cvtest::TS::LOG, "\nUnsupported type\n");
-//            return cvtest::TS::OK;
-//        }
-//
-//        const double threshold1 = 1.0, threshold2 = 10.0;
-//
-//        Mat cpudst;
-//        cv::Canny(img, cpudst, threshold1, threshold2);
-//
-//        GpuMat gpu1(img);
-//        GpuMat gpudst;
-//        cv::gpu::Canny(gpu1, gpudst, threshold1, threshold2);
-//
-//        return CheckNorm(cpudst, gpudst);
-//    }
-//};
-
-////////////////////////////////////////////////////////////////////////////////
-// cvtColor
-class CV_GpuCvtColorTest : public cvtest::BaseTest
-{
-public:
-    CV_GpuCvtColorTest() {}
-    ~CV_GpuCvtColorTest() {};
-
-protected:
-    void run(int);
-
-    int CheckNorm(const Mat& m1, const Mat& m2);
-};
-
-
-int CV_GpuCvtColorTest::CheckNorm(const Mat& m1, const Mat& m2)
-{
-    float max_err = 1e-2f;
-
-    Mat diff;
-    cv::matchTemplate(m1, m2, diff, CV_TM_CCORR_NORMED);
-
-    float err = abs(diff.at<float>(0, 0) - 1.f);
-
-    if (err > max_err)
-        return cvtest::TS::FAIL_INVALID_OUTPUT;
-
-    return cvtest::TS::OK;
-}
-
-void CV_GpuCvtColorTest::run( int )
-{
-    cv::Mat img = cv::imread(std::string(ts->get_data_path()) + "stereobp/aloe-L.png");
-
-    if (img.empty())
-    {
-        ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA);
-        return;
-    }
-
-    int testResult = cvtest::TS::OK;
-    cv::Mat cpuRes;
-    cv::gpu::GpuMat gpuImg(img), gpuRes;
-
-    int codes[] = { CV_BGR2RGB, CV_RGB2BGRA, CV_BGRA2RGB,
-                    CV_RGB2BGR555, CV_BGR5552BGR, CV_BGR2BGR565, CV_BGR5652RGB,
-                    CV_RGB2YCrCb, CV_YCrCb2BGR, CV_BGR2YUV, CV_YUV2RGB,
-                    CV_RGB2XYZ, CV_XYZ2BGR, CV_BGR2XYZ, CV_XYZ2RGB,
-                    CV_RGB2HSV, CV_HSV2BGR, CV_BGR2HSV_FULL, CV_HSV2RGB_FULL,
-                    CV_RGB2HLS, CV_HLS2BGR, CV_BGR2HLS_FULL, CV_HLS2RGB_FULL,
-                    CV_RGB2GRAY, CV_GRAY2BGRA, CV_BGRA2GRAY,
-                    CV_GRAY2BGR555, CV_BGR5552GRAY, CV_GRAY2BGR565, CV_BGR5652GRAY};
-    const char* codes_str[] = { "CV_BGR2RGB", "CV_RGB2BGRA", "CV_BGRA2RGB",
-                                "CV_RGB2BGR555", "CV_BGR5552BGR", "CV_BGR2BGR565", "CV_BGR5652RGB",
-                                "CV_RGB2YCrCb", "CV_YCrCb2BGR", "CV_BGR2YUV", "CV_YUV2RGB",
-                                "CV_RGB2XYZ", "CV_XYZ2BGR", "CV_BGR2XYZ", "CV_XYZ2RGB",
-                                "CV_RGB2HSV", "CV_HSV2RGB", "CV_BGR2HSV_FULL", "CV_HSV2RGB_FULL",
-                                "CV_RGB2HLS", "CV_HLS2RGB", "CV_BGR2HLS_FULL", "CV_HLS2RGB_FULL",
-                                "CV_RGB2GRAY", "CV_GRAY2BGRA", "CV_BGRA2GRAY",
-                                "CV_GRAY2BGR555", "CV_BGR5552GRAY", "CV_GRAY2BGR565", "CV_BGR5652GRAY"};
-    int codes_num = sizeof(codes) / sizeof(int);
-
-    for (int i = 0; i < codes_num; ++i)
-    {
-        ts->printf(cvtest::TS::LOG, "\n%s\n", codes_str[i]);
-
-        cv::cvtColor(img, cpuRes, codes[i]);
-        cv::gpu::cvtColor(gpuImg, gpuRes, codes[i]);
-
-        if (CheckNorm(cpuRes, gpuRes) == cvtest::TS::OK)
-            ts->printf(cvtest::TS::LOG, "\nSUCCESS\n");
-        else
-        {
-            ts->printf(cvtest::TS::LOG, "\nFAIL\n");
-            testResult = cvtest::TS::FAIL_GENERIC;
-        }
-
-        img = cpuRes;
-        gpuImg = gpuRes;
-    }
-
-    ts->set_failed_test_info(testResult);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// Histograms
-class CV_GpuHistogramsTest : public cvtest::BaseTest
-{
-public:
-    CV_GpuHistogramsTest() {}
-    ~CV_GpuHistogramsTest() {};
-
-protected:
-    void run(int);
-
-    int CheckNorm(const Mat& m1, const Mat& m2)
-    {
-        double ret = norm(m1, m2, NORM_INF);
-
-        if (ret < std::numeric_limits<double>::epsilon())
-        {
-            return cvtest::TS::OK;
-        }
-        else
-        {
-            ts->printf(cvtest::TS::LOG, "\nNorm: %f\n", ret);
-            return cvtest::TS::FAIL_GENERIC;
-        }
-    }
-};
-
-void CV_GpuHistogramsTest::run( int )
-{
-    //load image
-    cv::Mat img = cv::imread(std::string(ts->get_data_path()) + "stereobp/aloe-L.png");
-
-    if (img.empty())
-    {
-        ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA);
-        return;
-    }
-
-    Mat hsv;
-    cv::cvtColor(img, hsv, CV_BGR2HSV);
-
-    int hbins = 30;
-    int histSize[] = {hbins};
-
-    float hranges[] = {0, 180};
-    const float* ranges[] = {hranges};
-
-    MatND hist;
-
-    int channels[] = {0};
-    calcHist(&hsv, 1, channels, Mat(), hist, 1, histSize, ranges);
-
-    GpuMat gpuHsv(hsv);
-    std::vector<GpuMat> srcs;
-    cv::gpu::split(gpuHsv, srcs);
-    GpuMat gpuHist;
-    histEven(srcs[0], gpuHist, hbins, (int)hranges[0], (int)hranges[1]);
-
-    Mat cpuHist = hist;
-    cpuHist = cpuHist.t();
-    cpuHist.convertTo(cpuHist, CV_32S);
-
-    ts->set_failed_test_info(CheckNorm(cpuHist, gpuHist));
-}
-
-////////////////////////////////////////////////////////////////////////
-// Corner Harris feature detector
-
-struct CV_GpuCornerHarrisTest: cvtest::BaseTest
-{
-    CV_GpuCornerHarrisTest() {}
-
-    void run(int)
-    {
-        for (int i = 0; i < 5; ++i)
-        {
-            int rows = 25 + rand() % 300, cols = 25 + rand() % 300;
-            if (!compareToCpuTest(rows, cols, CV_32F, 1 + rand() % 5, 1 + 2 * (rand() % 4))) return;
-            if (!compareToCpuTest(rows, cols, CV_32F, 1 + rand() % 5, -1)) return;
-            if (!compareToCpuTest(rows, cols, CV_8U, 1 + rand() % 5, 1 + 2 * (rand() % 4))) return;
-            if (!compareToCpuTest(rows, cols, CV_8U, 1 + rand() % 5, -1)) return;
-        }
-    }
-
-    bool compareToCpuTest(int rows, int cols, int depth, int blockSize, int apertureSize)
-    {
-        RNG rng;
-        cv::Mat src(rows, cols, depth);
-        if (depth == CV_32F)
-            rng.fill(src, RNG::UNIFORM, cv::Scalar(0), cv::Scalar(1));
-        else if (depth == CV_8U)
-            rng.fill(src, RNG::UNIFORM, cv::Scalar(0), cv::Scalar(256));
-
-        double k = 0.1;
-
-        cv::Mat dst_gold;
-        cv::gpu::GpuMat dst;
-        cv::Mat dsth;
-        int borderType;
-
-        borderType = BORDER_REFLECT101;
-        cv::cornerHarris(src, dst_gold, blockSize, apertureSize, k, borderType);
-        cv::gpu::cornerHarris(cv::gpu::GpuMat(src), dst, blockSize, apertureSize, k, borderType);
-
-        dsth = dst;
-        for (int i = 0; i < dst.rows; ++i)
-        {
-            for (int j = 0; j < dst.cols; ++j)
-            {
-                float a = dst_gold.at<float>(i, j);
-                float b = dsth.at<float>(i, j);
-                if (fabs(a - b) > 1e-3f)
-                {
-                    ts->printf(cvtest::TS::CONSOLE, "%d %d %f %f %d\n", i, j, a, b, apertureSize);
-                    ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-                    return false;
-                };
-            }
-        }
-
-        borderType = BORDER_REPLICATE;
-        cv::cornerHarris(src, dst_gold, blockSize, apertureSize, k, borderType);
-        cv::gpu::cornerHarris(cv::gpu::GpuMat(src), dst, blockSize, apertureSize, k, borderType);
-
-        dsth = dst;
-        for (int i = 0; i < dst.rows; ++i)
-        {
-            for (int j = 0; j < dst.cols; ++j)
-            {
-                float a = dst_gold.at<float>(i, j);
-                float b = dsth.at<float>(i, j);
-                if (fabs(a - b) > 1e-3f)
-                {
-                    ts->printf(cvtest::TS::CONSOLE, "%d %d %f %f %d\n", i, j, a, b, apertureSize);
-                    ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-                    return false;
-                };
-            }
-        }
-        return true;
-    }
-};
-
-////////////////////////////////////////////////////////////////////////
-// Corner Min Eigen Val
-
-struct CV_GpuCornerMinEigenValTest: cvtest::BaseTest
-{
-    CV_GpuCornerMinEigenValTest() {}
-
-    void run(int)
-    {
-        for (int i = 0; i < 3; ++i)
-        {
-            int rows = 25 + rand() % 300, cols = 25 + rand() % 300;
-            if (!compareToCpuTest(rows, cols, CV_32F, 1 + rand() % 5, -1)) return;
-            if (!compareToCpuTest(rows, cols, CV_32F, 1 + rand() % 5, 1 + 2 * (rand() % 4))) return;
-            if (!compareToCpuTest(rows, cols, CV_8U, 1 + rand() % 5, -1)) return;
-            if (!compareToCpuTest(rows, cols, CV_8U, 1 + rand() % 5, 1 + 2 * (rand() % 4))) return;
-        }
-    }
-
-    bool compareToCpuTest(int rows, int cols, int depth, int blockSize, int apertureSize)
-    {
-        RNG rng;
-        cv::Mat src(rows, cols, depth);
-        if (depth == CV_32F)
-            rng.fill(src, RNG::UNIFORM, cv::Scalar(0), cv::Scalar(1));
-        else if (depth == CV_8U)
-            rng.fill(src, RNG::UNIFORM, cv::Scalar(0), cv::Scalar(256));
-
-        cv::Mat dst_gold;
-        cv::gpu::GpuMat dst;
-        cv::Mat dsth;
-
-        int borderType;
-
-        borderType = BORDER_REFLECT101;
-        cv::cornerMinEigenVal(src, dst_gold, blockSize, apertureSize, borderType);
-        cv::gpu::cornerMinEigenVal(cv::gpu::GpuMat(src), dst, blockSize, apertureSize, borderType);
-
-        dsth = dst;
-        for (int i = 0; i < dst.rows; ++i)
-        {
-            for (int j = 0; j < dst.cols; ++j)
-            {
-                float a = dst_gold.at<float>(i, j);
-                float b = dsth.at<float>(i, j);
-                if (fabs(a - b) > 1e-2f)
-                {
-                    ts->printf(cvtest::TS::CONSOLE, "%d %d %f %f %d %d\n", i, j, a, b, apertureSize, blockSize);
-                    ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-                    return false;
-                };
-            }
-        }
-
-        borderType = BORDER_REPLICATE;
-        cv::cornerMinEigenVal(src, dst_gold, blockSize, apertureSize, borderType);
-        cv::gpu::cornerMinEigenVal(cv::gpu::GpuMat(src), dst, blockSize, apertureSize, borderType);
-
-        dsth = dst;
-        for (int i = 0; i < dst.rows; ++i)
-        {
-            for (int j = 0; j < dst.cols; ++j)
-            {
-                float a = dst_gold.at<float>(i, j);
-                float b = dsth.at<float>(i, j);
-                if (fabs(a - b) > 1e-2f)
-                {
-                    ts->printf(cvtest::TS::CONSOLE, "%d %d %f %f %d %d\n", i, j, a, b, apertureSize, blockSize);
-                    ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-                    return false;
-                };
-            }
-        }
-
-        return true;
-    }
-};
-
-struct CV_GpuColumnSumTest: cvtest::BaseTest
-{
-    CV_GpuColumnSumTest() {}
-
-    void run(int)
-    {
-        int cols = 375;
-        int rows = 1072;
-
-        Mat src(rows, cols, CV_32F);
-        RNG rng(1);
-        rng.fill(src, RNG::UNIFORM, Scalar(0), Scalar(1));
-
-        GpuMat d_dst;
-        columnSum(GpuMat(src), d_dst);
-
-        Mat dst = d_dst;
-        for (int j = 0; j < src.cols; ++j)
-        {
-            float a = src.at<float>(0, j);
-            float b = dst.at<float>(0, j);
-            if (fabs(a - b) > 0.5f)
-            {
-                ts->printf(cvtest::TS::CONSOLE, "big diff at %d %d: %f %f\n", 0, j, a, b);
-                ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-                return;
-            }
-        }
-        for (int i = 1; i < src.rows; ++i)
-        {
-            for (int j = 0; j < src.cols; ++j)
-            {
-                float a = src.at<float>(i, j) += src.at<float>(i - 1, j);
-                float b = dst.at<float>(i, j);
-                if (fabs(a - b) > 0.5f)
-                {
-                    ts->printf(cvtest::TS::CONSOLE, "big diff at %d %d: %f %f\n", i, j, a, b);
-                    ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-                    return;
-                }
-            }
-        }
-    }
-};
-
-struct CV_GpuNormTest : cvtest::BaseTest
-{
-    CV_GpuNormTest() {}
-
-    void run(int)
-    {
-        RNG rng(0);
-
-        int rows = rng.uniform(1, 500);
-        int cols = rng.uniform(1, 500);
-
-        for (int cn = 1; cn <= 4; ++cn)
-        {
-            test(NORM_L1, rows, cols, CV_8U, cn, Scalar::all(0), Scalar::all(10));
-            test(NORM_L1, rows, cols, CV_8S, cn, Scalar::all(-10), Scalar::all(10));
-            test(NORM_L1, rows, cols, CV_16U, cn, Scalar::all(0), Scalar::all(10));
-            test(NORM_L1, rows, cols, CV_16S, cn, Scalar::all(-10), Scalar::all(10));
-            test(NORM_L1, rows, cols, CV_32S, cn, Scalar::all(-10), Scalar::all(10));
-            test(NORM_L1, rows, cols, CV_32F, cn, Scalar::all(0), Scalar::all(1));
-
-            test(NORM_L2, rows, cols, CV_8U, cn, Scalar::all(0), Scalar::all(10));
-            test(NORM_L2, rows, cols, CV_8S, cn, Scalar::all(-10), Scalar::all(10));
-            test(NORM_L2, rows, cols, CV_16U, cn, Scalar::all(0), Scalar::all(10));
-            test(NORM_L2, rows, cols, CV_16S, cn, Scalar::all(-10), Scalar::all(10));
-            test(NORM_L2, rows, cols, CV_32S, cn, Scalar::all(-10), Scalar::all(10));
-            test(NORM_L2, rows, cols, CV_32F, cn, Scalar::all(0), Scalar::all(1));
-
-            test(NORM_INF, rows, cols, CV_8U, cn, Scalar::all(0), Scalar::all(10));
-            test(NORM_INF, rows, cols, CV_8S, cn, Scalar::all(-10), Scalar::all(10));
-            test(NORM_INF, rows, cols, CV_16U, cn, Scalar::all(0), Scalar::all(10));
-            test(NORM_INF, rows, cols, CV_16S, cn, Scalar::all(-10), Scalar::all(10));
-            test(NORM_INF, rows, cols, CV_32S, cn, Scalar::all(-10), Scalar::all(10));
-            test(NORM_INF, rows, cols, CV_32F, cn, Scalar::all(0), Scalar::all(1));
-        }
-    }
-
-    void gen(Mat& mat, int rows, int cols, int type, Scalar low, Scalar high)
-    {
-        mat.create(rows, cols, type);
-        RNG rng(0);
-        rng.fill(mat, RNG::UNIFORM, low, high);
-    }
-
-    void test(int norm_type, int rows, int cols, int depth, int cn, Scalar low, Scalar high)
-    {
-        int type = CV_MAKE_TYPE(depth, cn);
-
-        Mat src;
-        gen(src, rows, cols, type, low, high);
-
-        double gold = norm(src, norm_type);
-        double mine = norm(GpuMat(src), norm_type);
-
-        if (abs(gold - mine) > 1e-3)
-        {
-            ts->printf(cvtest::TS::CONSOLE, "failed test: gold=%f, mine=%f, norm_type=%d, rows=%d, "
-                       "cols=%d, depth=%d, cn=%d\n", gold, mine, norm_type, rows, cols, depth, cn);
-            ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-        }
-    }
-};
-
-////////////////////////////////////////////////////////////////////////////////
-// reprojectImageTo3D
-class CV_GpuReprojectImageTo3DTest : public cvtest::BaseTest
-{
-public:
-    CV_GpuReprojectImageTo3DTest() {}
-
-protected:
-    void run(int)
-    {
-        Mat disp(320, 240, CV_8UC1);
-
-        RNG& rng = ts->get_rng();
-        rng.fill(disp, RNG::UNIFORM, Scalar(5), Scalar(30));
-
-        Mat Q(4, 4, CV_32FC1);
-        rng.fill(Q, RNG::UNIFORM, Scalar(0.1), Scalar(1));
-
-        Mat cpures;
-        GpuMat gpures;
-
-        reprojectImageTo3D(disp, cpures, Q, false);
-        reprojectImageTo3D(GpuMat(disp), gpures, Q);
-
-        Mat temp = gpures;
-
-        for (int y = 0; y < cpures.rows; ++y)
-        {
-            const Vec3f* cpu_row = cpures.ptr<Vec3f>(y);
-            const Vec4f* gpu_row = temp.ptr<Vec4f>(y);
-            for (int x = 0; x < cpures.cols; ++x)
-            {
-                Vec3f a = cpu_row[x];
-                Vec4f b = gpu_row[x];
-
-                if (fabs(a[0] - b[0]) > 1e-5 || fabs(a[1] - b[1]) > 1e-5 || fabs(a[2] - b[2]) > 1e-5)
-                {
-                    ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-                    return;
-                }
-            }
-        }
-    }
-};
-
-TEST(threshold, accuracy) { CV_GpuImageThresholdTest test; test.safe_run(); }
-TEST(resize, accuracy) { CV_GpuNppImageResizeTest test; test.safe_run(); }
-TEST(copyMakeBorder, accuracy) { CV_GpuNppImageCopyMakeBorderTest test; test.safe_run(); }
-TEST(warpAffine, accuracy) { CV_GpuNppImageWarpAffineTest test; test.safe_run(); }
-TEST(warpPerspective, accuracy) { CV_GpuNppImageWarpPerspectiveTest test; test.safe_run(); }
-TEST(integral, accuracy) { CV_GpuNppImageIntegralTest test; test.safe_run(); }
-TEST(cvtColor, accuracy) { CV_GpuCvtColorTest test; test.safe_run(); }
-TEST(histograms, accuracy) { CV_GpuHistogramsTest test; test.safe_run(); }
-TEST(cornerHearris, accuracy) { CV_GpuCornerHarrisTest test; test.safe_run(); }
-TEST(minEigen, accuracy) { CV_GpuCornerMinEigenValTest test; test.safe_run(); }
-TEST(columnSum, accuracy) { CV_GpuColumnSumTest test; test.safe_run(); }
-TEST(norm, accuracy) { CV_GpuNormTest test; test.safe_run(); }
-TEST(reprojectImageTo3D, accuracy) { CV_GpuReprojectImageTo3DTest test; test.safe_run(); }
-
-TEST(downsample, accuracy_on_8U)
-{
-    RNG& rng = cvtest::TS::ptr()->get_rng();
-    Size size(200 + cvtest::randInt(rng) % 1000, 200 + cvtest::randInt(rng) % 1000);
-    Mat src = cvtest::randomMat(rng, size, CV_8U, 0, 255, false);
-
-    for (int k = 2; k <= 5; ++k)
-    {
-        GpuMat d_dst;
-        downsample(GpuMat(src), d_dst, k);
-
-        Size dst_gold_size((src.cols + k - 1) / k, (src.rows + k - 1) / k);
-        ASSERT_EQ(dst_gold_size.width, d_dst.cols)
-            << "rows=" << size.height << ", cols=" << size.width << ", k=" << k;
-        ASSERT_EQ(dst_gold_size.height, d_dst.rows)
-            << "rows=" << size.height << ", cols=" << size.width << ", k=" << k;
-
-        Mat dst = d_dst;
-        for (int y = 0; y < dst.rows; ++y)
-            for (int x = 0; x < dst.cols; ++x)
-                ASSERT_EQ(src.at<uchar>(y * k, x * k), dst.at<uchar>(y, x))
-                    << "rows=" << size.height << ", cols=" << size.width << ", k=" << k;
-    }
-}
-
-TEST(downsample, accuracy_on_32F)
-{
-    RNG& rng = cvtest::TS::ptr()->get_rng();
-    Size size(200 + cvtest::randInt(rng) % 1000, 200 + cvtest::randInt(rng) % 1000);
-    Mat src = cvtest::randomMat(rng, size, CV_32F, 0, 1, false);
-
-    for (int k = 2; k <= 5; ++k)
-    {
-        GpuMat d_dst;
-        downsample(GpuMat(src), d_dst, k);
-
-        Size dst_gold_size((src.cols + k - 1) / k, (src.rows + k - 1) / k);
-        ASSERT_EQ(dst_gold_size.width, d_dst.cols)
-            << "rows=" << size.height << ", cols=" << size.width << ", k=" << k;
-        ASSERT_EQ(dst_gold_size.height, d_dst.rows)
-            << "rows=" << size.height << ", cols=" << size.width << ", k=" << k;
-
-        Mat dst = d_dst;
-        for (int y = 0; y < dst.rows; ++y)
-            for (int x = 0; x < dst.cols; ++x)
-                ASSERT_FLOAT_EQ(src.at<float>(y * k, x * k), dst.at<float>(y, x))
-                    << "rows=" << size.height << ", cols=" << size.width << ", k=" << k;
-    }
-}
diff --git a/modules/gpu/test/test_main.cpp b/modules/gpu/test/test_main.cpp
index 89c1b272bc..3a2df605e8 100644
--- a/modules/gpu/test/test_main.cpp
+++ b/modules/gpu/test/test_main.cpp
@@ -1,13 +1,107 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                        Intel License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of Intel Corporation may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
 #include "test_precomp.hpp"
 
-int main(int argc, char **argv)
+#ifdef HAVE_CUDA
+
+void print_info()
+{
+    int deviceCount = cv::gpu::getCudaEnabledDeviceCount();
+
+    printf("Found %d CUDA devices\n\n", deviceCount);
+
+    for (int i = 0; i < deviceCount; ++i)
+    {
+        cv::gpu::DeviceInfo info(i);
+        printf("Device %d:\n", i);
+        printf("\tName: %s\n", info.name().c_str());
+        printf("\tCompute capability version: %d.%d\n", info.majorVersion(), info.minorVersion());
+        printf("\tTotal memory: %d Mb\n", static_cast<int>(static_cast<int>(info.totalMemory() / 1024.0) / 1024.0));
+        printf("\tFree memory: %d Mb\n", static_cast<int>(static_cast<int>(info.freeMemory() / 1024.0) / 1024.0));
+        if (!info.isCompatible())
+            printf("\tThis device is not compatible with current GPU module build\n");
+        printf("\n");
+    }
+    
+    printf("GPU module was compiled for next GPU archs:\n");
+    printf("\tBIN:%s\n", CUDA_ARCH_BIN);
+    printf("\tPTX:%s\n\n", CUDA_ARCH_PTX);
+}
+
+enum OutputLevel
+{
+    OutputLevelNone,
+    OutputLevelCompact,
+    OutputLevelFull
+};
+
+extern OutputLevel nvidiaTestOutputLevel;
+
+int main(int argc, char** argv)
 {
     cvtest::TS::ptr()->init("gpu");
-    ::testing::InitGoogleTest(&argc, argv);
-#ifdef HAVE_CUDA
+    testing::InitGoogleTest(&argc, argv);
+
+    cv::CommandLineParser parser(argc, (const char**)argv);
+
+    std::string outputLevel = parser.get<std::string>("nvtest_output_level", "none");
+
+    if (outputLevel == "none")
+        nvidiaTestOutputLevel = OutputLevelNone;
+    else if (outputLevel == "compact")
+        nvidiaTestOutputLevel = OutputLevelCompact;
+    else if (outputLevel == "full")
+        nvidiaTestOutputLevel = OutputLevelFull;
+
+    print_info();
     return RUN_ALL_TESTS();
-#else
-    std::cerr << "opencv_test_gpu: OpenCV was compiled without GPU support\n";
-    return -1;
-#endif
-}
\ No newline at end of file
+}
+
+#else // HAVE_CUDA
+
+int main(int argc, char** argv)
+{
+    printf("OpenCV was built without CUDA support\n");
+    return 0;
+}
+
+#endif // HAVE_CUDA
\ No newline at end of file
diff --git a/modules/gpu/test/test_match_template.cpp b/modules/gpu/test/test_match_template.cpp
deleted file mode 100644
index b646723051..0000000000
--- a/modules/gpu/test/test_match_template.cpp
+++ /dev/null
@@ -1,295 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other GpuMaterials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or bpied warranties, including, but not limited to, the bpied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
-#include <string>
-#include <iostream>
-
-//#define SHOW_TIME
-
-#ifdef SHOW_TIME
-#include <ctime>
-#define F(x) x
-#else
-#define F(x)
-#endif
-
-using namespace cv;
-using namespace std;
-
-struct CV_GpuMatchTemplateTest: cvtest::BaseTest 
-{
-    CV_GpuMatchTemplateTest() {}
-
-    void run(int)
-    {
-        bool double_ok = gpu::TargetArchs::builtWith(gpu::NATIVE_DOUBLE) &&
-                         gpu::DeviceInfo().supports(gpu::NATIVE_DOUBLE);
-        if (!double_ok)
-        {
-            // For sqrIntegral
-            ts->printf(cvtest::TS::CONSOLE, "\nCode and device double support is required (CC >= 1.3)");
-            ts->set_failed_test_info(cvtest::TS::FAIL_GENERIC);
-            return;
-        }
-
-        Mat image, templ;
-        Mat dst_gold;
-        gpu::GpuMat dst;
-        int n, m, h, w;
-        F(clock_t t;)
-
-        RNG& rng = ts->get_rng();
-
-        for (int cn = 1; cn <= 4; ++cn)
-        {
-            F(ts->printf(cvtest::TS::CONSOLE, "cn: %d\n", cn);)
-            for (int i = 0; i <= 0; ++i)
-            {
-                n = rng.uniform(30, 100);
-                m = rng.uniform(30, 100);
-                h = rng.uniform(5, n - 1);
-                w = rng.uniform(5, m - 1);
-
-                gen(image, n, m, CV_8U, cn);
-                gen(templ, h, w, CV_8U, cn);
-                F(t = clock();)
-                matchTemplate(image, templ, dst_gold, CV_TM_SQDIFF);
-                F(cout << "depth: 8U cn: " << cn << " n: " << n << " m: " << m << " w: " << w << " h: " << h << endl;)
-                F(cout << "cpu:" << clock() - t << endl;)
-                F(t = clock();)
-                gpu::matchTemplate(gpu::GpuMat(image), gpu::GpuMat(templ), dst, CV_TM_SQDIFF);
-                F(cout << "gpu_block: " << clock() - t << endl;)
-                if (!check(dst_gold, Mat(dst), 5 * h * w * 1e-4f, "SQDIFF 8U")) return;
-
-                gen(image, n, m, CV_8U, cn);
-                gen(templ, h, w, CV_8U, cn);
-                F(t = clock();)
-                matchTemplate(image, templ, dst_gold, CV_TM_SQDIFF_NORMED);
-                F(cout << "depth: 8U cn: " << cn << " n: " << n << " m: " << m << " w: " << w << " h: " << h << endl;)
-                F(cout << "cpu:" << clock() - t << endl;)
-                F(t = clock();)
-                gpu::matchTemplate(gpu::GpuMat(image), gpu::GpuMat(templ), dst, CV_TM_SQDIFF_NORMED);
-                F(cout << "gpu_block: " << clock() - t << endl;)
-                if (!check(dst_gold, Mat(dst), h * w * 1e-5f, "SQDIFF_NOREMD 8U")) return;
-
-                gen(image, n, m, CV_8U, cn);
-                gen(templ, h, w, CV_8U, cn);
-                F(t = clock();)
-                matchTemplate(image, templ, dst_gold, CV_TM_CCORR);
-                F(cout << "depth: 8U cn: " << cn << " n: " << n << " m: " << m << " w: " << w << " h: " << h << endl;)
-                F(cout << "cpu:" << clock() - t << endl;)
-                F(t = clock();)
-                gpu::matchTemplate(gpu::GpuMat(image), gpu::GpuMat(templ), dst, CV_TM_CCORR);
-                F(cout << "gpu_block: " << clock() - t << endl;)
-                if (!check(dst_gold, Mat(dst), 5 * h * w * cn * cn * 1e-5f, "CCORR 8U")) return;
-
-                gen(image, n, m, CV_8U, cn);
-                gen(templ, h, w, CV_8U, cn);
-                F(t = clock();)
-                matchTemplate(image, templ, dst_gold, CV_TM_CCORR_NORMED);
-                F(cout << "depth: 8U cn: " << cn << " n: " << n << " m: " << m << " w: " << w << " h: " << h << endl;)
-                F(cout << "cpu:" << clock() - t << endl;)
-                F(t = clock();)
-                gpu::matchTemplate(gpu::GpuMat(image), gpu::GpuMat(templ), dst, CV_TM_CCORR_NORMED);
-                F(cout << "gpu_block: " << clock() - t << endl;)
-                if (!check(dst_gold, Mat(dst), h * w * 1e-6f, "CCORR_NORMED 8U")) return;
-
-                gen(image, n, m, CV_8U, cn);
-                gen(templ, h, w, CV_8U, cn);
-                F(t = clock();)
-                matchTemplate(image, templ, dst_gold, CV_TM_CCOEFF);
-                F(cout << "depth: 8U cn: " << cn << " n: " << n << " m: " << m << " w: " << w << " h: " << h << endl;)
-                F(cout << "cpu:" << clock() - t << endl;)
-                F(t = clock();)
-                gpu::matchTemplate(gpu::GpuMat(image), gpu::GpuMat(templ), dst, CV_TM_CCOEFF);
-                F(cout << "gpu_block: " << clock() - t << endl;)
-                if (!check(dst_gold, Mat(dst), 5 * h * w * cn * cn * 1e-5f, "CCOEFF 8U")) return;
-
-                gen(image, n, m, CV_8U, cn);
-                gen(templ, h, w, CV_8U, cn);
-                F(t = clock();)
-                matchTemplate(image, templ, dst_gold, CV_TM_CCOEFF_NORMED);
-                F(cout << "depth: 8U cn: " << cn << " n: " << n << " m: " << m << " w: " << w << " h: " << h << endl;)
-                F(cout << "cpu:" << clock() - t << endl;)
-                F(t = clock();)
-                gpu::matchTemplate(gpu::GpuMat(image), gpu::GpuMat(templ), dst, CV_TM_CCOEFF_NORMED);
-                F(cout << "gpu_block: " << clock() - t << endl;)
-                if (!check(dst_gold, Mat(dst), h * w * 1e-6f, "CCOEFF_NORMED 8U")) return;
-
-                gen(image, n, m, CV_32F, cn);
-                gen(templ, h, w, CV_32F, cn);
-                F(t = clock();)
-                matchTemplate(image, templ, dst_gold, CV_TM_SQDIFF);
-                F(cout << "depth: 32F cn: " << cn << " n: " << n << " m: " << m << " w: " << w << " h: " << h << endl;)
-                F(cout << "cpu:" << clock() - t << endl;)
-                F(t = clock();)
-                gpu::matchTemplate(gpu::GpuMat(image), gpu::GpuMat(templ), dst, CV_TM_SQDIFF);
-                F(cout << "gpu_block: " << clock() - t << endl;)
-                if (!check(dst_gold, Mat(dst), 0.25f * h * w * 1e-5f, "SQDIFF 32F")) return;
-
-                gen(image, n, m, CV_32F, cn);
-                gen(templ, h, w, CV_32F, cn);
-                F(t = clock();)
-                matchTemplate(image, templ, dst_gold, CV_TM_CCORR);
-                F(cout << "depth: 32F cn: " << cn << " n: " << n << " m: " << m << " w: " << w << " h: " << h << endl;)
-                F(cout << "cpu:" << clock() - t << endl;)
-                F(t = clock();)
-                gpu::matchTemplate(gpu::GpuMat(image), gpu::GpuMat(templ), dst, CV_TM_CCORR);
-                F(cout << "gpu_block: " << clock() - t << endl;)
-                if (!check(dst_gold, Mat(dst), 0.25f * h * w * 1e-5f, "CCORR 32F")) return;
-            }
-        }
-    }
-
-    void gen(Mat& a, int rows, int cols, int depth, int cn)
-    {
-        RNG rng;
-        a.create(rows, cols, CV_MAKETYPE(depth, cn));
-        if (depth == CV_8U)
-            rng.fill(a, RNG::UNIFORM, Scalar::all(1), Scalar::all(10));
-        else if (depth == CV_32F)
-            rng.fill(a, RNG::UNIFORM, Scalar::all(0.001f), Scalar::all(1.f));
-    }
-
-    bool check(const Mat& a, const Mat& b, float max_err, const string& method="")
-    {
-        if (a.size() != b.size())
-        {
-            ts->printf(cvtest::TS::CONSOLE, "bad size, method=%s\n", method.c_str());
-            ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-            return false;
-        }
-
-        //for (int i = 0; i < a.rows; ++i)
-        //{
-        //    for (int j = 0; j < a.cols; ++j)
-        //    {
-        //        float a_ = a.at<float>(i, j);
-        //        float b_ = b.at<float>(i, j);
-        //        if (fabs(a_ - b_) > max_err)
-        //        {
-        //            ts->printf(cvtest::TS::CONSOLE, "a=%f, b=%f, i=%d, j=%d\n", a_, b_, i, j);
-        //            cin.get();
-        //        }
-        //    }
-        //}
-
-        float err = (float)norm(a, b, NORM_INF);
-        if (err > max_err)
-        {
-            ts->printf(cvtest::TS::CONSOLE, "bad accuracy: %f, method=%s\n", err, method.c_str());
-            ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-            return false;
-        }
-
-        return true;
-    }
-};
-
-TEST(matchTemplate, accuracy) { CV_GpuMatchTemplateTest test; test.safe_run(); }
-
-struct CV_GpuMatchTemplateFindPatternInBlackTest: cvtest::BaseTest 
-{
-    CV_GpuMatchTemplateFindPatternInBlackTest() {}
-
-    void run(int)
-    {
-        bool double_ok = gpu::TargetArchs::builtWith(gpu::NATIVE_DOUBLE) &&
-                         gpu::DeviceInfo().supports(gpu::NATIVE_DOUBLE);
-        if (!double_ok)
-        {
-            // For sqrIntegral
-            ts->printf(cvtest::TS::CONSOLE, "\nCode and device double support is required (CC >= 1.3)");
-            ts->set_failed_test_info(cvtest::TS::FAIL_GENERIC);
-            return;
-        }
-
-        Mat image = imread(std::string(ts->get_data_path()) + "matchtemplate/black.png");
-        if (image.empty())
-        {
-            ts->printf(cvtest::TS::CONSOLE, "can't open file '%s'", (std::string(ts->get_data_path())
-                                                               + "matchtemplate/black.png").c_str());
-            ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA);
-            return;
-        }
-
-        Mat pattern = imread(std::string(ts->get_data_path()) + "matchtemplate/cat.png");
-        if (pattern.empty())
-        {
-            ts->printf(cvtest::TS::CONSOLE, "can't open file '%s'", (std::string(ts->get_data_path())
-                                                               + "matchtemplate/cat.png").c_str());
-            ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA);
-            return;
-        }
-
-        gpu::GpuMat d_image(image);
-        gpu::GpuMat d_pattern(pattern);
-        gpu::GpuMat d_result;
-
-        double maxValue;
-        Point maxLoc;
-        Point maxLocGold(284, 12);
-
-        gpu::matchTemplate(d_image, d_pattern, d_result, CV_TM_CCOEFF_NORMED);
-        gpu::minMaxLoc(d_result, NULL, &maxValue, NULL, &maxLoc );
-        if (maxLoc != maxLocGold)
-        {
-            ts->printf(cvtest::TS::CONSOLE, "bad match (CV_TM_CCOEFF_NORMED): %d %d, must be at: %d %d",
-                       maxLoc.x, maxLoc.y, maxLocGold.x, maxLocGold.y);
-            ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-            return;
-        }
-
-        gpu::matchTemplate(d_image, d_pattern, d_result, CV_TM_CCORR_NORMED);
-        gpu::minMaxLoc(d_result, NULL, &maxValue, NULL, &maxLoc );
-        if (maxLoc != maxLocGold)
-        {
-            ts->printf(cvtest::TS::CONSOLE, "bad match (CV_TM_CCORR_NORMED): %d %d, must be at: %d %d",
-                       maxLoc.x, maxLoc.y, maxLocGold.x, maxLocGold.y);
-            ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-            return;
-        }
-    }
-};
-
-TEST(matchTemplate, find_pattern_in_black) { CV_GpuMatchTemplateFindPatternInBlackTest test; test.safe_run(); }
diff --git a/modules/gpu/test/test_matop.cpp b/modules/gpu/test/test_matop.cpp
new file mode 100644
index 0000000000..470306d515
--- /dev/null
+++ b/modules/gpu/test/test_matop.cpp
@@ -0,0 +1,614 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other GpuMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or bpied warranties, including, but not limited to, the bpied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+
+#ifdef HAVE_CUDA
+
+////////////////////////////////////////////////////////////////////////////////
+// merge
+
+struct Merge : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
+{
+    cv::gpu::DeviceInfo devInfo;
+    int type;
+
+    cv::Size size;
+    std::vector<cv::Mat> src;
+
+    cv::Mat dst_gold;
+
+    virtual void SetUp() 
+    {
+        devInfo = std::tr1::get<0>(GetParam());
+        type = std::tr1::get<1>(GetParam());
+
+        cv::gpu::setDevice(devInfo.deviceID());
+        
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+        size = cv::Size(rng.uniform(20, 150), rng.uniform(20, 150));
+
+        int depth = CV_MAT_DEPTH(type);
+        int num_channels = CV_MAT_CN(type);
+        src.reserve(num_channels);
+        for (int i = 0; i < num_channels; ++i)
+            src.push_back(cv::Mat(size, depth, cv::Scalar::all(i))); 
+
+        cv::merge(src, dst_gold);
+    }
+};
+
+TEST_P(Merge, Accuracy)
+{
+    if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+        return;
+
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+    PRINT_PARAM(size);
+
+    cv::Mat dst;
+
+    ASSERT_NO_THROW(
+        std::vector<cv::gpu::GpuMat> dev_src;
+        cv::gpu::GpuMat dev_dst;
+
+        for (size_t i = 0; i < src.size(); ++i)
+            dev_src.push_back(cv::gpu::GpuMat(src[i]));
+
+        cv::gpu::merge(dev_src, dev_dst); 
+
+        dev_dst.download(dst);
+    );
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+}
+
+INSTANTIATE_TEST_CASE_P(MatOp, Merge, testing::Combine(
+                        testing::ValuesIn(devices()), 
+                        testing::ValuesIn(all_types())));
+
+////////////////////////////////////////////////////////////////////////////////
+// split
+
+struct Split : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
+{
+    cv::gpu::DeviceInfo devInfo;
+    int type;
+
+    cv::Size size;
+    cv::Mat src;
+
+    std::vector<cv::Mat> dst_gold;
+
+    virtual void SetUp() 
+    {
+        devInfo = std::tr1::get<0>(GetParam());
+        type = std::tr1::get<1>(GetParam());
+
+        cv::gpu::setDevice(devInfo.deviceID());
+        
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+        size = cv::Size(rng.uniform(20, 150), rng.uniform(20, 150));
+
+        src.create(size, type);
+        src.setTo(cv::Scalar(1.0, 2.0, 3.0, 4.0));
+        cv::split(src, dst_gold);
+    }
+};
+
+TEST_P(Split, Accuracy)
+{
+    if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+        return;
+
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+    PRINT_PARAM(size);
+
+    std::vector<cv::Mat> dst;
+    
+    ASSERT_NO_THROW(
+        std::vector<cv::gpu::GpuMat> dev_dst;
+        cv::gpu::split(cv::gpu::GpuMat(src), dev_dst);
+
+        dst.resize(dev_dst.size());
+        for (size_t i = 0; i < dev_dst.size(); ++i)
+            dev_dst[i].download(dst[i]);
+    );
+
+    ASSERT_EQ(dst_gold.size(), dst.size());
+
+    for (size_t i = 0; i < dst_gold.size(); ++i)
+    {
+        EXPECT_MAT_NEAR(dst_gold[i], dst[i], 0.0);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(MatOp, Split, testing::Combine(
+                        testing::ValuesIn(devices()), 
+                        testing::ValuesIn(all_types())));
+
+////////////////////////////////////////////////////////////////////////////////
+// split_merge_consistency
+
+struct SplitMerge : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
+{
+    cv::gpu::DeviceInfo devInfo;
+    int type;
+
+    cv::Size size;
+    cv::Mat orig;
+
+    virtual void SetUp() 
+    {
+        devInfo = std::tr1::get<0>(GetParam());
+        type = std::tr1::get<1>(GetParam());
+
+        cv::gpu::setDevice(devInfo.deviceID());
+        
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+        size = cv::Size(rng.uniform(20, 150), rng.uniform(20, 150));
+
+        orig.create(size, type);
+        orig.setTo(cv::Scalar(1.0, 2.0, 3.0, 4.0));
+    }
+};
+
+TEST_P(SplitMerge, Consistency)
+{
+    if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+        return;
+
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+    PRINT_PARAM(size);
+
+    cv::Mat final;
+
+    ASSERT_NO_THROW(
+        std::vector<cv::gpu::GpuMat> dev_vec;
+        cv::gpu::GpuMat dev_final;
+
+        cv::gpu::split(cv::gpu::GpuMat(orig), dev_vec);    
+        cv::gpu::merge(dev_vec, dev_final);
+
+        dev_final.download(final);
+    );
+
+    EXPECT_MAT_NEAR(orig, final, 0.0);
+}
+
+INSTANTIATE_TEST_CASE_P(MatOp, SplitMerge, testing::Combine(
+                        testing::ValuesIn(devices()), 
+                        testing::ValuesIn(all_types())));
+
+////////////////////////////////////////////////////////////////////////////////
+// setTo
+
+struct SetTo : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
+{
+    cv::gpu::DeviceInfo devInfo;
+    int type;
+
+    cv::Size size;
+    cv::Mat mat_gold;
+
+    virtual void SetUp() 
+    {
+        devInfo = std::tr1::get<0>(GetParam());
+        type = std::tr1::get<1>(GetParam());
+
+        cv::gpu::setDevice(devInfo.deviceID());
+        
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+        size = cv::Size(rng.uniform(20, 150), rng.uniform(20, 150));
+
+        mat_gold.create(size, type);
+    }
+};
+
+TEST_P(SetTo, Zero)
+{
+    if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+        return;
+
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+    PRINT_PARAM(size);
+
+    static cv::Scalar zero = cv::Scalar::all(0);
+
+    cv::Mat mat;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat dev_mat(mat_gold);
+
+        mat_gold.setTo(zero);
+        dev_mat.setTo(zero);
+
+        dev_mat.download(mat);
+    );
+
+    EXPECT_MAT_NEAR(mat_gold, mat, 0.0);
+}
+
+TEST_P(SetTo, SameVal)
+{
+    if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+        return;
+
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+    PRINT_PARAM(size);
+
+    static cv::Scalar s = cv::Scalar::all(1);
+
+    cv::Mat mat;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat dev_mat(mat_gold);
+
+        mat_gold.setTo(s);
+        dev_mat.setTo(s);
+
+        dev_mat.download(mat);
+    );
+
+    EXPECT_MAT_NEAR(mat_gold, mat, 0.0);
+}
+
+TEST_P(SetTo, DifferentVal)
+{
+    if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+        return;
+
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+    PRINT_PARAM(size);
+
+    static cv::Scalar s = cv::Scalar(1, 2, 3, 4);
+
+    cv::Mat mat;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat dev_mat(mat_gold);
+
+        mat_gold.setTo(s);
+        dev_mat.setTo(s);
+
+        dev_mat.download(mat);
+    );
+
+    EXPECT_MAT_NEAR(mat_gold, mat, 0.0);
+}
+
+TEST_P(SetTo, Masked)
+{
+    if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+        return;
+
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+    PRINT_PARAM(size);
+
+    static cv::Scalar s = cv::Scalar(1, 2, 3, 4);
+
+    cv::Mat mat;
+    cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+    cv::Mat mask = cvtest::randomMat(rng, mat.size(), CV_8UC1, 0.0, 1.5, false);
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat dev_mat(mat_gold);
+
+        mat_gold.setTo(s, mask);
+        dev_mat.setTo(s, cv::gpu::GpuMat(mask));
+
+        dev_mat.download(mat);
+    );
+
+    EXPECT_MAT_NEAR(mat_gold, mat, 0.0);
+}
+
+INSTANTIATE_TEST_CASE_P(MatOp, SetTo, testing::Combine(
+                        testing::ValuesIn(devices()), 
+                        testing::ValuesIn(all_types())));
+
+////////////////////////////////////////////////////////////////////////////////
+// copyTo
+
+struct CopyTo : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
+{
+    cv::gpu::DeviceInfo devInfo;
+    int type;
+
+    cv::Size size;
+    cv::Mat src;
+
+    virtual void SetUp() 
+    {
+        devInfo = std::tr1::get<0>(GetParam());
+        type = std::tr1::get<1>(GetParam());
+
+        cv::gpu::setDevice(devInfo.deviceID());
+        
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+        size = cv::Size(rng.uniform(20, 150), rng.uniform(20, 150));
+
+        src = cvtest::randomMat(rng, size, type, 0.0, 127.0, false);
+    }
+};
+
+TEST_P(CopyTo, WithoutMask)
+{
+    if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+        return;
+
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+    PRINT_PARAM(size);
+
+    cv::Mat dst_gold;
+    src.copyTo(dst_gold);
+
+    cv::Mat dst;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat dev_src(src);
+
+        cv::gpu::GpuMat dev_dst;
+
+        dev_src.copyTo(dev_dst);
+
+        dev_dst.download(dst);
+    );
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+}
+
+TEST_P(CopyTo, Masked)
+{
+    if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+        return;
+
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(type);
+    PRINT_PARAM(size);
+
+    cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+    cv::Mat mask = cvtest::randomMat(rng, src.size(), CV_8UC1, 0.0, 1.5, false);
+
+    cv::Mat dst_gold;
+    src.copyTo(dst_gold, mask);
+
+    cv::Mat dst;
+
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat dev_src(src);
+
+        cv::gpu::GpuMat dev_dst;
+
+        dev_src.copyTo(dev_dst, cv::gpu::GpuMat(mask));
+
+        dev_dst.download(dst);
+    );
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+}
+
+INSTANTIATE_TEST_CASE_P(MatOp, CopyTo, testing::Combine(
+                        testing::ValuesIn(devices()), 
+                        testing::ValuesIn(all_types())));
+
+////////////////////////////////////////////////////////////////////////////////
+// convertTo
+
+struct ConvertTo : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int, int> >
+{
+    cv::gpu::DeviceInfo devInfo;
+    int depth1;
+    int depth2;
+
+    cv::Size size;
+    cv::Mat src;
+
+    virtual void SetUp() 
+    {
+        devInfo = std::tr1::get<0>(GetParam());
+        depth1 = std::tr1::get<1>(GetParam());
+        depth2 = std::tr1::get<2>(GetParam());
+
+        cv::gpu::setDevice(devInfo.deviceID());
+        
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+        size = cv::Size(rng.uniform(20, 150), rng.uniform(20, 150));
+
+        src = cvtest::randomMat(rng, size, depth1, 0.0, 127.0, false);
+    }
+};
+
+TEST_P(ConvertTo, WithoutScaling)
+{
+    if ((depth1 == CV_64F || depth2 == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+        return;
+
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(depth1);
+    PRINT_TYPE(depth2);
+    PRINT_PARAM(size);
+
+    cv::Mat dst_gold;
+    src.convertTo(dst_gold, depth2);
+
+    cv::Mat dst;
+    
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat dev_src(src);
+
+        cv::gpu::GpuMat dev_dst;
+
+        dev_src.convertTo(dev_dst, depth2);
+
+        dev_dst.download(dst);
+    );
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+}
+
+TEST_P(ConvertTo, WithScaling)
+{
+    if ((depth1 == CV_64F || depth2 == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+        return;
+
+    PRINT_PARAM(devInfo);
+    PRINT_TYPE(depth1);
+    PRINT_TYPE(depth2);
+    PRINT_PARAM(size);
+    
+    cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+    const double a = rng.uniform(0.0, 1.0);
+    const double b = rng.uniform(-10.0, 10.0);
+    
+    PRINT_PARAM(a);
+    PRINT_PARAM(b);
+
+    cv::Mat dst_gold;
+    src.convertTo(dst_gold, depth2, a, b);
+
+    cv::Mat dst;
+    
+    ASSERT_NO_THROW(
+        cv::gpu::GpuMat dev_src(src);
+
+        cv::gpu::GpuMat dev_dst;
+
+        dev_src.convertTo(dev_dst, depth2, a, b);
+
+        dev_dst.download(dst);
+    );
+
+    const double eps = depth2 < CV_32F ? 1 : 1e-4;
+
+    EXPECT_MAT_NEAR(dst_gold, dst, eps);
+}
+
+INSTANTIATE_TEST_CASE_P(MatOp, ConvertTo, testing::Combine(
+                        testing::ValuesIn(devices()), 
+                        testing::ValuesIn(types(CV_8U, CV_64F, 1, 1)), 
+                        testing::ValuesIn(types(CV_8U, CV_64F, 1, 1))));
+
+////////////////////////////////////////////////////////////////////////////////
+// async
+
+struct Async : testing::TestWithParam<cv::gpu::DeviceInfo>
+{
+    cv::gpu::DeviceInfo devInfo;
+
+    cv::gpu::CudaMem src;
+
+    cv::Mat dst_gold0;
+    cv::Mat dst_gold1;
+
+    virtual void SetUp() 
+    {
+        devInfo = GetParam();
+
+        cv::gpu::setDevice(devInfo.deviceID());
+
+        cv::RNG& rng = cvtest::TS::ptr()->get_rng();
+
+        int rows = rng.uniform(100, 200);
+        int cols = rng.uniform(100, 200);
+
+        src = cv::gpu::CudaMem(cv::Mat::zeros(rows, cols, CV_8UC1));        
+
+        dst_gold0 = cv::Mat(rows, cols, CV_8UC1, cv::Scalar::all(255));
+        dst_gold1 = cv::Mat(rows, cols, CV_8UC1, cv::Scalar::all(128));
+    }
+};
+
+TEST_P(Async, Accuracy)
+{
+    PRINT_PARAM(devInfo);
+
+    cv::Mat dst0, dst1;
+    
+    ASSERT_NO_THROW(
+        cv::gpu::CudaMem cpudst0;
+        cv::gpu::CudaMem cpudst1;
+
+        cv::gpu::GpuMat gpusrc;
+        cv::gpu::GpuMat gpudst0;
+        cv::gpu::GpuMat gpudst1(src.rows, src.cols, CV_8UC1);
+
+        cv::gpu::Stream stream0;
+        cv::gpu::Stream stream1;
+
+        stream0.enqueueUpload(src, gpusrc);
+        cv::gpu::bitwise_not(gpusrc, gpudst0, cv::gpu::GpuMat(), stream0);
+        stream0.enqueueDownload(gpudst0, cpudst0);
+
+        stream1.enqueueMemSet(gpudst1, cv::Scalar::all(128));
+        stream1.enqueueDownload(gpudst1, cpudst1);
+
+        stream0.waitForCompletion();
+        stream1.waitForCompletion();
+
+        dst0 = cpudst0.createMatHeader();
+        dst1 = cpudst1.createMatHeader();
+    );
+
+    EXPECT_MAT_NEAR(dst_gold0, dst0, 0.0);
+    EXPECT_MAT_NEAR(dst_gold1, dst1, 0.0);
+}
+
+INSTANTIATE_TEST_CASE_P(MatOp, Async, testing::ValuesIn(devices()));
+
+#endif // HAVE_CUDA
diff --git a/modules/gpu/test/test_meanshift.cpp b/modules/gpu/test/test_meanshift.cpp
deleted file mode 100644
index b297b982ac..0000000000
--- a/modules/gpu/test/test_meanshift.cpp
+++ /dev/null
@@ -1,233 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                        Intel License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000, Intel Corporation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of Intel Corporation may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
-#include <iostream>
-#include <string>
-
-using namespace cv;
-using namespace cv::gpu;
-
-
-struct CV_GpuMeanShiftTest : public cvtest::BaseTest
-{
-    CV_GpuMeanShiftTest() {}
-
-    void run(int)
-    {
-        bool cc12_ok = TargetArchs::builtWith(FEATURE_SET_COMPUTE_12) && DeviceInfo().supports(FEATURE_SET_COMPUTE_12);
-        if (!cc12_ok)
-        {
-            ts->printf(cvtest::TS::CONSOLE, "\nCompute capability 1.2 is required");
-            ts->set_failed_test_info(cvtest::TS::FAIL_GENERIC);
-            return;
-        }
-
-        int spatialRad = 30;
-        int colorRad = 30;
-
-        cv::Mat img = cv::imread(std::string(ts->get_data_path()) + "meanshift/cones.png");
-        cv::Mat img_template;       
-        
-        if (cv::gpu::TargetArchs::builtWith(cv::gpu::FEATURE_SET_COMPUTE_20) &&
-            cv::gpu::DeviceInfo().supports(cv::gpu::FEATURE_SET_COMPUTE_20))
-            img_template = cv::imread(std::string(ts->get_data_path()) + "meanshift/con_result.png");
-        else
-            img_template = cv::imread(std::string(ts->get_data_path()) + "meanshift/con_result_CC1X.png");
-
-        if (img.empty() || img_template.empty())
-        {
-            ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA);
-            return;
-        }
-
-        cv::Mat rgba;
-        cvtColor(img, rgba, CV_BGR2BGRA);
-
-
-        cv::gpu::GpuMat res;
-        cv::gpu::meanShiftFiltering( cv::gpu::GpuMat(rgba), res, spatialRad, colorRad );
-
-        if (res.type() != CV_8UC4)
-        {
-            ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-            return;
-        }
-
-        cv::Mat result;
-        res.download(result);
-
-        uchar maxDiff = 0;
-        for (int j = 0; j < result.rows; ++j)
-        {
-            const uchar* res_line = result.ptr<uchar>(j);
-            const uchar* ref_line = img_template.ptr<uchar>(j);
-
-            for (int i = 0; i < result.cols; ++i)
-            {
-                for (int k = 0; k < 3; ++k)
-                {
-                    const uchar& ch1 = res_line[result.channels()*i + k];
-                    const uchar& ch2 = ref_line[img_template.channels()*i + k];
-                    uchar diff = static_cast<uchar>(abs(ch1 - ch2));
-                    if (maxDiff < diff)
-                        maxDiff = diff;
-                }
-            }
-        }
-        if (maxDiff > 0)
-        {
-            ts->printf(cvtest::TS::LOG, "\nMeanShift maxDiff = %d\n", maxDiff);
-            ts->set_failed_test_info(cvtest::TS::FAIL_GENERIC);
-            return;
-        }
-
-        ts->set_failed_test_info(cvtest::TS::OK);
-    }
-
-};
-
-TEST(meanShift, accuracy) { CV_GpuMeanShiftTest test; test.safe_run(); }
-
-struct CV_GpuMeanShiftProcTest : public cvtest::BaseTest
-{
-    CV_GpuMeanShiftProcTest() {}
-
-    void run(int)
-    {
-        bool cc12_ok = TargetArchs::builtWith(FEATURE_SET_COMPUTE_12) && DeviceInfo().supports(FEATURE_SET_COMPUTE_12);
-        if (!cc12_ok)
-        {
-            ts->printf(cvtest::TS::CONSOLE, "\nCompute capability 1.2 is required");
-            ts->set_failed_test_info(cvtest::TS::FAIL_GENERIC);
-            return;
-        }
-
-        int spatialRad = 30;
-        int colorRad = 30;
-
-        cv::Mat img = cv::imread(std::string(ts->get_data_path()) + "meanshift/cones.png");
-
-        if (img.empty())
-        {
-            ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA);
-            return;
-        }
-
-        cv::Mat rgba;
-        cvtColor(img, rgba, CV_BGR2BGRA);
-
-        cv::gpu::GpuMat h_rmap_filtered;
-        cv::gpu::meanShiftFiltering( cv::gpu::GpuMat(rgba), h_rmap_filtered, spatialRad, colorRad );
-
-        cv::gpu::GpuMat d_rmap;
-        cv::gpu::GpuMat d_spmap;
-        cv::gpu::meanShiftProc( cv::gpu::GpuMat(rgba), d_rmap, d_spmap, spatialRad, colorRad );
-
-        if (d_rmap.type() != CV_8UC4)
-        {
-            ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-            return;
-        }
-
-        cv::Mat rmap_filtered;
-        h_rmap_filtered.download(rmap_filtered);
-
-        cv::Mat rmap;
-        d_rmap.download(rmap);
-
-        uchar maxDiff = 0;
-        for (int j = 0; j < rmap_filtered.rows; ++j)
-        {
-            const uchar* res_line = rmap_filtered.ptr<uchar>(j);
-            const uchar* ref_line = rmap.ptr<uchar>(j);
-
-            for (int i = 0; i < rmap_filtered.cols; ++i)
-            {
-                for (int k = 0; k < 3; ++k)
-                {
-                    const uchar& ch1 = res_line[rmap_filtered.channels()*i + k];
-                    const uchar& ch2 = ref_line[rmap.channels()*i + k];
-                    uchar diff = static_cast<uchar>(abs(ch1 - ch2));
-                    if (maxDiff < diff)
-                        maxDiff = diff;
-                }
-            }
-        }
-        if (maxDiff > 0)
-        {
-            ts->printf(cvtest::TS::LOG, "\nMeanShiftProc maxDiff = %d\n", maxDiff);
-            ts->set_failed_test_info(cvtest::TS::FAIL_GENERIC);
-            return;
-        }
-
-        cv::Mat spmap;
-        d_spmap.download(spmap);
-
-        cv::Mat spmap_template;
-        cv::FileStorage fs;
-
-        if (cv::gpu::TargetArchs::builtWith(cv::gpu::FEATURE_SET_COMPUTE_20) &&
-            cv::gpu::DeviceInfo().supports(cv::gpu::FEATURE_SET_COMPUTE_20))
-            fs.open(std::string(ts->get_data_path()) + "meanshift/spmap.yaml", cv::FileStorage::READ);
-        else
-            fs.open(std::string(ts->get_data_path()) + "meanshift/spmap_CC1X.yaml", cv::FileStorage::READ);
-        fs["spmap"] >> spmap_template;
-
-        for (int y = 0; y < spmap.rows; ++y) {
-            for (int x = 0; x < spmap.cols; ++x) {
-                cv::Point_<short> expected = spmap_template.at<cv::Point_<short> >(y, x);
-                cv::Point_<short> actual = spmap.at<cv::Point_<short> >(y, x);
-                int diff = (expected - actual).dot(expected - actual);
-                if (actual != expected) {
-                    ts->printf(cvtest::TS::LOG, "\nMeanShiftProc SpMap is bad, diff=%d\n", diff);
-                    ts->set_failed_test_info(cvtest::TS::FAIL_GENERIC);
-                    return;
-                }
-            }
-        }
-
-        ts->set_failed_test_info(cvtest::TS::OK);
-    }
-
-};
-
-TEST(meanShiftProc, accuracy) { CV_GpuMeanShiftProcTest test; test.safe_run(); }
diff --git a/modules/gpu/test/test_mssegmentation.cpp b/modules/gpu/test/test_mssegmentation.cpp
deleted file mode 100644
index b64914704b..0000000000
--- a/modules/gpu/test/test_mssegmentation.cpp
+++ /dev/null
@@ -1,122 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                        Intel License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000, Intel Corporation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of Intel Corporation may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include <iostream>
-#include <string>
-#include <iosfwd>
-#include "test_precomp.hpp"
-using namespace cv;
-using namespace cv::gpu;
-using namespace std;
-
-struct CV_GpuMeanShiftSegmentationTest : public cvtest::BaseTest {
-    CV_GpuMeanShiftSegmentationTest() {}
-
-    void run(int) 
-    {
-        bool cc12_ok = TargetArchs::builtWith(FEATURE_SET_COMPUTE_12) && DeviceInfo().supports(FEATURE_SET_COMPUTE_12);
-        if (!cc12_ok)
-        {
-            ts->printf(cvtest::TS::CONSOLE, "\nCompute capability 1.2 is required");
-            ts->set_failed_test_info(cvtest::TS::FAIL_GENERIC);
-            return;
-        }
-
-        Mat img_rgb = imread(string(ts->get_data_path()) + "meanshift/cones.png");
-        if (img_rgb.empty())
-        {
-            ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA);
-            return;
-        }
-
-        Mat img;
-        cvtColor(img_rgb, img, CV_BGR2BGRA);
-
-
-        for (int minsize = 0; minsize < 2000; minsize = (minsize + 1) * 4)
-        {
-            stringstream path;
-            path << ts->get_data_path() << "meanshift/cones_segmented_sp10_sr10_minsize" << minsize;
-            if (TargetArchs::builtWith(FEATURE_SET_COMPUTE_20) && DeviceInfo().supports(FEATURE_SET_COMPUTE_20))
-                path << ".png";
-            else
-                path << "_CC1X.png";
-
-            Mat dst;
-            meanShiftSegmentation((GpuMat)img, dst, 10, 10, minsize);
-            Mat dst_rgb;
-            cvtColor(dst, dst_rgb, CV_BGRA2BGR);
-
-            //imwrite(path.str(), dst_rgb);
-            Mat dst_ref = imread(path.str());
-            if (dst_ref.empty())
-            {
-                ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA);
-                return;
-            }
-            if (CheckSimilarity(dst_rgb, dst_ref, 1e-3f) != cvtest::TS::OK)
-            {
-                ts->printf(cvtest::TS::LOG, "\ndiffers from image *minsize%d.png\n", minsize);
-                ts->set_failed_test_info(cvtest::TS::FAIL_BAD_ACCURACY);
-            }
-        }
-
-        ts->set_failed_test_info(cvtest::TS::OK);
-    }    
-
-    int CheckSimilarity(const Mat& m1, const Mat& m2, float max_err)
-    {
-        Mat diff;
-        cv::matchTemplate(m1, m2, diff, CV_TM_CCORR_NORMED);
-
-        float err = abs(diff.at<float>(0, 0) - 1.f);
-
-        if (err > max_err)
-            return cvtest::TS::FAIL_INVALID_OUTPUT;
-
-        return cvtest::TS::OK;
-    }
-
-
-};
-
-
-TEST(meanShiftSegmentation, regression) { CV_GpuMeanShiftSegmentationTest test; test.safe_run(); }
diff --git a/modules/gpu/test/test_nvidia.cpp b/modules/gpu/test/test_nvidia.cpp
index 412e5fcaa8..2620dd7f48 100644
--- a/modules/gpu/test/test_nvidia.cpp
+++ b/modules/gpu/test/test_nvidia.cpp
@@ -40,35 +40,198 @@
 //M*/
 
 #include "test_precomp.hpp"
-#include "cvconfig.h"
 
-class CV_NVidiaTestsCaller : public cvtest::BaseTest
+#ifdef HAVE_CUDA
+
+enum OutputLevel
+{
+    OutputLevelNone,
+    OutputLevelCompact,
+    OutputLevelFull
+};
+
+bool nvidia_NPPST_Integral_Image(const std::string& test_data_path, OutputLevel outputLevel);
+bool nvidia_NPPST_Squared_Integral_Image(const std::string& test_data_path, OutputLevel outputLevel);
+bool nvidia_NPPST_RectStdDev(const std::string& test_data_path, OutputLevel outputLevel);
+bool nvidia_NPPST_Resize(const std::string& test_data_path, OutputLevel outputLevel);
+bool nvidia_NPPST_Vector_Operations(const std::string& test_data_path, OutputLevel outputLevel);
+bool nvidia_NPPST_Transpose(const std::string& test_data_path, OutputLevel outputLevel);
+bool nvidia_NCV_Vector_Operations(const std::string& test_data_path, OutputLevel outputLevel);
+bool nvidia_NCV_Haar_Cascade_Loader(const std::string& test_data_path, OutputLevel outputLevel);
+bool nvidia_NCV_Haar_Cascade_Application(const std::string& test_data_path, OutputLevel outputLevel);
+bool nvidia_NCV_Hypotheses_Filtration(const std::string& test_data_path, OutputLevel outputLevel);
+bool nvidia_NCV_Visualization(const std::string& test_data_path, OutputLevel outputLevel);
+
+struct NVidiaTest : testing::TestWithParam<cv::gpu::DeviceInfo>
 {
-public:
-    CV_NVidiaTestsCaller() {}
-    virtual ~CV_NVidiaTestsCaller() {}
-
-protected:
-    
-	void run( int )
-	{   
-		;
-
-#if defined(HAVE_CUDA)
-		bool main_nvidia(const std::string&);
-
-		// Invoke all NVIDIA Staging tests and obtain the result
-		bool passed = main_nvidia(std::string(ts->get_data_path()) + "haarcascade/");
-
-		if (passed)
-		    ts->set_failed_test_info(cvtest::TS::OK);
-		else
-		    ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-
-#else
-		ts->set_failed_test_info(cvtest::TS::SKIPPED);
-#endif
-	}   
+    static std::string path;
+
+    cv::gpu::DeviceInfo devInfo;
+
+    static void SetUpTestCase() 
+    {
+        path = std::string(cvtest::TS::ptr()->get_data_path()) + "haarcascade/";
+    }
+
+    virtual void SetUp() 
+    {
+        devInfo = GetParam();
+
+        cv::gpu::setDevice(devInfo.deviceID());
+    }
 };
 
-TEST(NVidia, multitest) { CV_NVidiaTestsCaller test; test.safe_run(); }
+std::string NVidiaTest::path;
+
+struct NPPST : NVidiaTest {};
+struct NCV : NVidiaTest {};
+
+OutputLevel nvidiaTestOutputLevel = OutputLevelNone;
+
+TEST_P(NPPST, Integral) 
+{
+    PRINT_PARAM(devInfo);
+
+    bool res;
+
+    ASSERT_NO_THROW(
+        res = nvidia_NPPST_Integral_Image(path, nvidiaTestOutputLevel);
+    );
+
+    ASSERT_TRUE(res);
+}
+
+TEST_P(NPPST, SquaredIntegral) 
+{
+    PRINT_PARAM(devInfo);
+
+    bool res;
+
+    ASSERT_NO_THROW(
+        res = nvidia_NPPST_Squared_Integral_Image(path, nvidiaTestOutputLevel);
+    );
+
+    ASSERT_TRUE(res);
+}
+
+TEST_P(NPPST, RectStdDev) 
+{
+    PRINT_PARAM(devInfo);
+
+    bool res;
+
+    ASSERT_NO_THROW(
+        res = nvidia_NPPST_RectStdDev(path, nvidiaTestOutputLevel);
+    );
+
+    ASSERT_TRUE(res);
+}
+
+TEST_P(NPPST, Resize) 
+{
+    PRINT_PARAM(devInfo);
+
+    bool res;
+
+    ASSERT_NO_THROW(
+        res = nvidia_NPPST_Resize(path, nvidiaTestOutputLevel);
+    );
+
+    ASSERT_TRUE(res);
+}
+
+TEST_P(NPPST, VectorOperations) 
+{
+    PRINT_PARAM(devInfo);
+
+    bool res;
+
+    ASSERT_NO_THROW(
+        res = nvidia_NPPST_Vector_Operations(path, nvidiaTestOutputLevel);
+    );
+
+    ASSERT_TRUE(res);
+}
+
+TEST_P(NPPST, Transpose) 
+{
+    PRINT_PARAM(devInfo);
+
+    bool res;
+
+    ASSERT_NO_THROW(
+        res = nvidia_NPPST_Transpose(path, nvidiaTestOutputLevel);
+    );
+
+    ASSERT_TRUE(res);
+}
+
+TEST_P(NCV, VectorOperations) 
+{
+    PRINT_PARAM(devInfo);
+
+    bool res;
+
+    ASSERT_NO_THROW(
+        res = nvidia_NCV_Vector_Operations(path, nvidiaTestOutputLevel);
+    );
+
+    ASSERT_TRUE(res);
+}
+
+TEST_P(NCV, HaarCascadeLoader) 
+{
+    PRINT_PARAM(devInfo);
+
+    bool res;
+
+    ASSERT_NO_THROW(
+        res = nvidia_NCV_Haar_Cascade_Loader(path, nvidiaTestOutputLevel);
+    );
+
+    ASSERT_TRUE(res);
+}
+
+TEST_P(NCV, HaarCascadeApplication) 
+{
+    PRINT_PARAM(devInfo);
+
+    bool res;
+
+    ASSERT_NO_THROW(
+        res = nvidia_NCV_Haar_Cascade_Application(path, nvidiaTestOutputLevel);
+    );
+
+    ASSERT_TRUE(res);
+}
+
+TEST_P(NCV, HypothesesFiltration) 
+{
+    PRINT_PARAM(devInfo);
+
+    bool res;
+
+    ASSERT_NO_THROW(
+        res = nvidia_NCV_Hypotheses_Filtration(path, nvidiaTestOutputLevel);
+    );
+
+    ASSERT_TRUE(res);
+}
+
+TEST_P(NCV, Visualization) 
+{
+    PRINT_PARAM(devInfo);
+
+    bool res;
+
+    ASSERT_NO_THROW(
+        res = nvidia_NCV_Visualization(path, nvidiaTestOutputLevel);
+    );
+
+    ASSERT_TRUE(res);
+}
+
+INSTANTIATE_TEST_CASE_P(NVidia, NPPST, testing::ValuesIn(devices()));
+INSTANTIATE_TEST_CASE_P(NVidia, NCV, testing::ValuesIn(devices()));
+
+#endif // HAVE_CUDA
diff --git a/modules/gpu/test/test_operator_async_call.cpp b/modules/gpu/test/test_operator_async_call.cpp
deleted file mode 100644
index 34843c79de..0000000000
--- a/modules/gpu/test/test_operator_async_call.cpp
+++ /dev/null
@@ -1,84 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                        Intel License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000, Intel Corporation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of Intel Corporation may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
-
-using namespace std;
-using namespace cv;
-using namespace cv::gpu;
-
-struct CV_AsyncGpuMatTest : public cvtest::BaseTest
-{
-    CV_AsyncGpuMatTest() {}
-
-    void run(int)
-    {
-        CudaMem src(Mat::zeros(100, 100, CV_8UC1));
-
-        GpuMat gpusrc;
-        GpuMat gpudst0, gpudst1(100, 100, CV_8UC1);
-
-        CudaMem cpudst0;
-        CudaMem cpudst1;
-
-        Stream stream0, stream1;
-
-        stream0.enqueueUpload(src, gpusrc);
-        bitwise_not(gpusrc, gpudst0, GpuMat(), stream0);
-        stream0.enqueueDownload(gpudst0, cpudst0);
-
-        stream1.enqueueMemSet(gpudst1, Scalar::all(128));
-        stream1.enqueueDownload(gpudst1, cpudst1);
-
-        stream0.waitForCompletion();
-        stream1.waitForCompletion();
-
-        Mat cpu_gold0(100, 100, CV_8UC1, Scalar::all(255));
-        Mat cpu_gold1(100, 100, CV_8UC1, Scalar::all(128));
-
-        if (norm((Mat)cpudst0, cpu_gold0, NORM_INF) > 0 || norm((Mat)cpudst1, cpu_gold1, NORM_INF) > 0)
-            ts->set_failed_test_info(cvtest::TS::FAIL_GENERIC);
-        else
-            ts->set_failed_test_info(cvtest::TS::OK);
-    }
-};
-
-TEST(GpuMat, async) { CV_AsyncGpuMatTest test; test.safe_run(); }
diff --git a/modules/gpu/test/test_operator_convert_to.cpp b/modules/gpu/test/test_operator_convert_to.cpp
deleted file mode 100644
index c5c55b3d95..0000000000
--- a/modules/gpu/test/test_operator_convert_to.cpp
+++ /dev/null
@@ -1,110 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                        Intel License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000, Intel Corporation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of Intel Corporation may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
-
-#include <fstream>
-#include <iterator>
-#include <numeric>
-
-using namespace cv;
-using namespace std;
-using namespace gpu;
-
-class CV_GpuMatOpConvertToTest : public cvtest::BaseTest
-{
-    public:
-        CV_GpuMatOpConvertToTest() {}
-        ~CV_GpuMatOpConvertToTest() {}
-
-    protected:
-        void run(int);
-};
-
-void CV_GpuMatOpConvertToTest::run(int /* start_from */)
-{
-    const Size img_size(67, 35);
-
-    const char* types_str[] = {"CV_8U", "CV_8S", "CV_16U", "CV_16S", "CV_32S", "CV_32F", "CV_64F"};
-
-    bool passed = true;
-    int lastType = CV_32F;
-
-    if (TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE))
-        lastType = CV_64F;
-
-    for (int i = 0; i <= lastType && passed; ++i)
-    {
-        for (int j = 0; j <= lastType && passed; ++j)
-        {
-            for (int c = 1; c < 5 && passed; ++c)
-            {
-                const int src_type = CV_MAKETYPE(i, c);
-                const int dst_type = j;
-
-                cv::RNG& rng = ts->get_rng();
-
-                Mat cpumatsrc(img_size, src_type);
-                rng.fill(cpumatsrc, RNG::UNIFORM, Scalar::all(0), Scalar::all(300));
-
-                GpuMat gpumatsrc(cpumatsrc);
-                Mat cpumatdst;
-                GpuMat gpumatdst;
-
-                cpumatsrc.convertTo(cpumatdst, dst_type, 0.5, 3.0);
-                gpumatsrc.convertTo(gpumatdst, dst_type, 0.5, 3.0);
-
-                double r = norm(cpumatdst, (Mat)gpumatdst, NORM_INF);
-                if (r > 1)
-                {
-                    ts->printf(cvtest::TS::LOG,
-                               "\nFAILED: SRC_TYPE=%sC%d DST_TYPE=%s NORM = %f\n",
-                               types_str[i], c, types_str[j], r);
-                    passed = false;
-                }
-            }
-        }
-    }
-
-    ts->set_failed_test_info(passed ? cvtest::TS::OK : cvtest::TS::FAIL_GENERIC);
-}
-
-TEST(GpuMat_convertTo, accuracy) { CV_GpuMatOpConvertToTest test; test.safe_run(); }
diff --git a/modules/gpu/test/test_operator_copy_to.cpp b/modules/gpu/test/test_operator_copy_to.cpp
deleted file mode 100644
index dc1e2280bd..0000000000
--- a/modules/gpu/test/test_operator_copy_to.cpp
+++ /dev/null
@@ -1,145 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                        Intel License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000, Intel Corporation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of Intel Corporation may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
-#include <fstream>
-#include <iterator>
-#include <numeric>
-#include <iomanip> // for  cout << setw()
-
-using namespace cv;
-using namespace std;
-using namespace gpu;
-
-class CV_GpuMatOpCopyToTest : public cvtest::BaseTest
-{
-    public:
-        CV_GpuMatOpCopyToTest()
-        {
-            rows = 234;
-            cols = 123;
-        }
-        ~CV_GpuMatOpCopyToTest() {}
-
-    protected:
-        void run(int);
-        template <typename T>
-        void print_mat(const T & mat, const std::string & name) const;
-        bool compare_matrix(cv::Mat & cpumat, gpu::GpuMat & gpumat);
-
-    private:
-        int rows;
-        int cols;
-};
-
-template<typename T>
-void CV_GpuMatOpCopyToTest::print_mat(const T & mat, const std::string & name) const { cv::imshow(name, mat); }
-
-bool CV_GpuMatOpCopyToTest::compare_matrix(cv::Mat & cpumat, gpu::GpuMat & gpumat)
-{
-    Mat cmat(cpumat.size(), cpumat.type(), Scalar::all(0));
-    GpuMat gmat(cmat);
-
-    Mat cpumask(cpumat.size(), CV_8U);
-
-    cv::RNG& rng = ts->get_rng();
-
-    rng.fill(cpumask, RNG::NORMAL, Scalar::all(0), Scalar::all(127));
-
-    threshold(cpumask, cpumask, 0, 127, THRESH_BINARY);
-
-    GpuMat gpumask(cpumask);
-
-    //int64 time = getTickCount();
-    cpumat.copyTo(cmat, cpumask);
-    //int64 time1 = getTickCount();
-    gpumat.copyTo(gmat, gpumask);
-    //int64 time2 = getTickCount();
-
-    //std::cout << "\ntime cpu: " << std::fixed << std::setprecision(12) << 1.0 / double((time1 - time)  / (double)getTickFrequency());
-    //std::cout << "\ntime gpu: " << std::fixed << std::setprecision(12) << 1.0 / double((time2 - time1) / (double)getTickFrequency());
-    //std::cout << "\n";
-
-#ifdef PRINT_MATRIX
-    print_mat(cmat, "cpu mat");
-    print_mat(gmat, "gpu mat");
-    print_mat(cpumask, "cpu mask");
-    print_mat(gpumask, "gpu mask");
-    cv::waitKey(0);
-#endif
-
-    double ret = norm(cmat, (Mat)gmat);
-
-    if (ret < 1.0)
-        return true;
-    else
-    {
-        ts->printf(cvtest::TS::LOG, "\nNorm: %f\n", ret);
-        return false;
-    }
-}
-
-void CV_GpuMatOpCopyToTest::run( int /* start_from */)
-{
-    bool is_test_good = true;
-
-    int lastType = CV_32F;
-
-    if (TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE))
-        lastType = CV_64F;
-
-    for (int i = 0 ; i <= lastType; i++)
-    {
-        Mat cpumat(rows, cols, i);
-        cpumat.setTo(Scalar::all(127));
-
-        GpuMat gpumat(cpumat);
-
-        is_test_good &= compare_matrix(cpumat, gpumat);
-    }
-
-    if (is_test_good == true)
-        ts->set_failed_test_info(cvtest::TS::OK);
-    else
-        ts->set_failed_test_info(cvtest::TS::FAIL_GENERIC);
-}
-
-TEST(GpuMat_copyTo, accuracy) { CV_GpuMatOpCopyToTest test; test.safe_run(); }
diff --git a/modules/gpu/test/test_operator_set_to.cpp b/modules/gpu/test/test_operator_set_to.cpp
deleted file mode 100644
index f09f411714..0000000000
--- a/modules/gpu/test/test_operator_set_to.cpp
+++ /dev/null
@@ -1,123 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                        Intel License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000, Intel Corporation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of Intel Corporation may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
-
-using namespace cv;
-using namespace std;
-using namespace gpu;
-
-class CV_GpuMatOpSetToTest : public cvtest::BaseTest
-{
-public:
-    CV_GpuMatOpSetToTest();
-    ~CV_GpuMatOpSetToTest() {}
-
-protected:
-    void run(int);
-
-    bool testSetTo(cv::Mat& cpumat, gpu::GpuMat& gpumat, const cv::Mat& cpumask = cv::Mat(), const cv::gpu::GpuMat& gpumask = cv::gpu::GpuMat());
-
-private:
-    int rows;
-    int cols;
-    Scalar s;
-};
-
-CV_GpuMatOpSetToTest::CV_GpuMatOpSetToTest()
-{
-    rows = 35;
-    cols = 67;
-
-    s.val[0] = 127.0;
-    s.val[1] = 127.0;
-    s.val[2] = 127.0;
-    s.val[3] = 127.0;
-}
-
-bool CV_GpuMatOpSetToTest::testSetTo(cv::Mat& cpumat, gpu::GpuMat& gpumat, const cv::Mat& cpumask, const cv::gpu::GpuMat& gpumask)
-{
-    cpumat.setTo(s, cpumask);
-    gpumat.setTo(s, gpumask);
-
-    double ret = norm(cpumat, (Mat)gpumat, NORM_INF);
-
-    if (ret < std::numeric_limits<double>::epsilon())
-        return true;
-    else
-    {
-        ts->printf(cvtest::TS::LOG, "\nNorm: %f\n", ret);
-        return false;
-    }
-}
-
-void CV_GpuMatOpSetToTest::run( int /* start_from */)
-{
-    bool is_test_good = true;
-
-    cv::Mat cpumask(rows, cols, CV_8UC1);
-    cv::RNG& rng = ts->get_rng();
-    rng.fill(cpumask, RNG::UNIFORM, cv::Scalar::all(0.0), cv::Scalar(1.5));
-    cv::gpu::GpuMat gpumask(cpumask);
-
-    int lastType = CV_32F;
-
-    if (TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE))
-        lastType = CV_64F;
-
-    for (int i = 0; i <= lastType; i++)
-    {
-        for (int cn = 1; cn <= 4; ++cn)
-        {
-            int mat_type = CV_MAKETYPE(i, cn);
-            Mat cpumat(rows, cols, mat_type, Scalar::all(0));
-            GpuMat gpumat(cpumat);
-            is_test_good &= testSetTo(cpumat, gpumat, cpumask, gpumask);
-        }
-    }
-
-    if (is_test_good == true)
-        ts->set_failed_test_info(cvtest::TS::OK);
-    else
-        ts->set_failed_test_info(cvtest::TS::FAIL_GENERIC);
-}
-
-TEST(GpuMat_setTo, accuracy) { CV_GpuMatOpSetToTest test; test.safe_run(); }
diff --git a/modules/gpu/test/test_precomp.cpp b/modules/gpu/test/test_precomp.cpp
index 26a8f3f678..34acf2ae91 100644
--- a/modules/gpu/test/test_precomp.cpp
+++ b/modules/gpu/test/test_precomp.cpp
@@ -1 +1,42 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                        Intel License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of Intel Corporation may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
 #include "test_precomp.hpp"
diff --git a/modules/gpu/test/test_precomp.hpp b/modules/gpu/test/test_precomp.hpp
index f114994f7a..5ddafaab39 100644
--- a/modules/gpu/test/test_precomp.hpp
+++ b/modules/gpu/test/test_precomp.hpp
@@ -1,13 +1,62 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                        Intel License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of Intel Corporation may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
 #ifndef __OPENCV_TEST_PRECOMP_HPP__
 #define __OPENCV_TEST_PRECOMP_HPP__
 
-#include <iostream>
+#include <cmath>
+#include <cstdio>
+#include <fstream>
+#include <sstream>
 #include <limits>
+#include <string>
+#include <algorithm>
+#include <iterator>
 #include "cvconfig.h"
 #include "opencv2/core/core.hpp"
 #include "opencv2/highgui/highgui.hpp"
 #include "opencv2/calib3d/calib3d.hpp"
+#include "opencv2/imgproc/imgproc.hpp"
 #include "opencv2/ts/ts.hpp"
 #include "opencv2/gpu/gpu.hpp"
+#include "test_gpu_base.hpp"
 
 #endif
diff --git a/modules/gpu/test/test_split_merge.cpp b/modules/gpu/test/test_split_merge.cpp
deleted file mode 100644
index ca5f4a9727..0000000000
--- a/modules/gpu/test/test_split_merge.cpp
+++ /dev/null
@@ -1,312 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other GpuMaterials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or bpied warranties, including, but not limited to, the bpied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
-
-using namespace std;
-using namespace cv;
-
-////////////////////////////////////////////////////////////////////////////////
-// Merge
-
-struct CV_MergeTest : public cvtest::BaseTest
-{
-    void can_merge(size_t rows, size_t cols);
-    void can_merge_submatrixes(size_t rows, size_t cols);
-    void run(int);
-};
-
-
-void CV_MergeTest::can_merge(size_t rows, size_t cols)
-{
-    bool double_ok = gpu::TargetArchs::builtWith(gpu::NATIVE_DOUBLE) && 
-                     gpu::DeviceInfo().supports(gpu::NATIVE_DOUBLE);
-    size_t depth_end = double_ok ? CV_64F : CV_32F;
-
-    for (size_t num_channels = 1; num_channels <= 4; ++num_channels)
-        for (size_t depth = CV_8U; depth <= depth_end; ++depth)
-        {
-            vector<Mat> src;
-            for (size_t i = 0; i < num_channels; ++i)
-                src.push_back(Mat(rows, cols, depth, Scalar::all(static_cast<double>(i))));
-            
-            Mat dst(rows, cols, CV_MAKETYPE(depth, num_channels));   
-
-            cv::merge(src, dst);   
-
-            vector<gpu::GpuMat> dev_src;
-            for (size_t i = 0; i < num_channels; ++i)
-                dev_src.push_back(gpu::GpuMat(src[i]));
-
-            gpu::GpuMat dev_dst(rows, cols, CV_MAKETYPE(depth, num_channels));
-            cv::gpu::merge(dev_src, dev_dst); 
-
-            Mat host_dst = dev_dst;
-
-            double err = norm(dst, host_dst, NORM_INF);
-
-            if (err > 1e-3)
-            {
-                //ts->printf(cvtest::TS::CONSOLE, "\nNorm: %f\n", err);
-                //ts->printf(cvtest::TS::CONSOLE, "Depth: %d\n", depth);
-                //ts->printf(cvtest::TS::CONSOLE, "Rows: %d\n", rows);
-                //ts->printf(cvtest::TS::CONSOLE, "Cols: %d\n", cols);
-                //ts->printf(cvtest::TS::CONSOLE, "NumChannels: %d\n", num_channels);
-                ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-                return;
-            }
-        }
-}
-
-
-void CV_MergeTest::can_merge_submatrixes(size_t rows, size_t cols)
-{
-    bool double_ok = gpu::TargetArchs::builtWith(gpu::NATIVE_DOUBLE) && 
-                     gpu::DeviceInfo().supports(gpu::NATIVE_DOUBLE);
-    size_t depth_end = double_ok ? CV_64F : CV_32F;
-
-    for (size_t num_channels = 1; num_channels <= 4; ++num_channels)
-        for (size_t depth = CV_8U; depth <= depth_end; ++depth)
-        {
-            vector<Mat> src;
-            for (size_t i = 0; i < num_channels; ++i) 
-            {
-                Mat m(rows * 2, cols * 2, depth, Scalar::all(static_cast<double>(i)));
-                src.push_back(m(Range(rows / 2, rows / 2 + rows), Range(cols / 2, cols / 2 + cols)));
-            }
-
-            Mat dst(rows, cols, CV_MAKETYPE(depth, num_channels));   
-
-            cv::merge(src, dst);   
-
-            vector<gpu::GpuMat> dev_src;
-            for (size_t i = 0; i < num_channels; ++i)
-                dev_src.push_back(gpu::GpuMat(src[i]));
-
-            gpu::GpuMat dev_dst(rows, cols, CV_MAKETYPE(depth, num_channels));
-            cv::gpu::merge(dev_src, dev_dst);
-
-            Mat host_dst = dev_dst;
-
-            double err = norm(dst, host_dst, NORM_INF);
-
-            if (err > 1e-3)
-            {
-                //ts->printf(cvtest::TS::CONSOLE, "\nNorm: %f\n", err);
-                //ts->printf(cvtest::TS::CONSOLE, "Depth: %d\n", depth);
-                //ts->printf(cvtest::TS::CONSOLE, "Rows: %d\n", rows);
-                //ts->printf(cvtest::TS::CONSOLE, "Cols: %d\n", cols);
-                //ts->printf(cvtest::TS::CONSOLE, "NumChannels: %d\n", num_channels);
-                ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-                return;
-            }
-        }
-}
-
-void CV_MergeTest::run(int) 
-{
-    can_merge(1, 1);
-    can_merge(1, 7);
-    can_merge(53, 7);
-    can_merge_submatrixes(1, 1);
-    can_merge_submatrixes(1, 7);
-    can_merge_submatrixes(53, 7);
-}
-
-
-////////////////////////////////////////////////////////////////////////////////
-// Split
-
-struct CV_SplitTest : public cvtest::BaseTest
-{
-    void can_split(size_t rows, size_t cols);    
-    void can_split_submatrix(size_t rows, size_t cols);
-    void run(int);
-};
-
-void CV_SplitTest::can_split(size_t rows, size_t cols)
-{
-    bool double_ok = gpu::TargetArchs::builtWith(gpu::NATIVE_DOUBLE) && 
-                     gpu::DeviceInfo().supports(gpu::NATIVE_DOUBLE);
-    size_t depth_end = double_ok ? CV_64F : CV_32F;
-
-    for (size_t num_channels = 1; num_channels <= 4; ++num_channels)
-        for (size_t depth = CV_8U; depth <= depth_end; ++depth)
-        {
-            Mat src(rows, cols, CV_MAKETYPE(depth, num_channels), Scalar(1.0, 2.0, 3.0, 4.0));   
-            vector<Mat> dst;
-            cv::split(src, dst);   
-
-            gpu::GpuMat dev_src(src);
-            vector<gpu::GpuMat> dev_dst;
-            cv::gpu::split(dev_src, dev_dst);
-
-            if (dev_dst.size() != dst.size())
-            {
-                ts->printf(cvtest::TS::CONSOLE, "Bad output sizes");
-                ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-            }
-
-            for (size_t i = 0; i < num_channels; ++i)
-            {
-                Mat host_dst = dev_dst[i];
-                double err = norm(dst[i], host_dst, NORM_INF);
-
-                if (err > 1e-3)
-                {
-                    //ts->printf(cvtest::TS::CONSOLE, "\nNorm: %f\n", err);
-                    //ts->printf(cvtest::TS::CONSOLE, "Depth: %d\n", depth);
-                    //ts->printf(cvtest::TS::CONSOLE, "Rows: %d\n", rows);
-                    //ts->printf(cvtest::TS::CONSOLE, "Cols: %d\n", cols);
-                    //ts->printf(cvtest::TS::CONSOLE, "NumChannels: %d\n", num_channels);
-                    ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-                    return;
-                }
-            }
-        }
-}
-
-void CV_SplitTest::can_split_submatrix(size_t rows, size_t cols)
-{
-    bool double_ok = gpu::TargetArchs::builtWith(gpu::NATIVE_DOUBLE) && 
-                     gpu::DeviceInfo().supports(gpu::NATIVE_DOUBLE);
-    size_t depth_end = double_ok ? CV_64F : CV_32F;
-
-    for (size_t num_channels = 1; num_channels <= 4; ++num_channels)
-        for (size_t depth = CV_8U; depth <= depth_end; ++depth)
-        {
-            Mat src_data(rows * 2, cols * 2, CV_MAKETYPE(depth, num_channels), Scalar(1.0, 2.0, 3.0, 4.0));   
-            Mat src(src_data(Range(rows / 2, rows / 2 + rows), Range(cols / 2, cols / 2 + cols)));
-            vector<Mat> dst;
-            cv::split(src, dst);   
-
-            gpu::GpuMat dev_src(src);
-            vector<gpu::GpuMat> dev_dst;
-            cv::gpu::split(dev_src, dev_dst);
-
-            if (dev_dst.size() != dst.size())
-            {
-                ts->printf(cvtest::TS::CONSOLE, "Bad output sizes");
-                ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-            }
-
-            for (size_t i = 0; i < num_channels; ++i)
-            {
-                Mat host_dst = dev_dst[i];
-                double err = norm(dst[i], host_dst, NORM_INF);
-
-                if (err > 1e-3)
-                {
-                    //ts->printf(cvtest::TS::CONSOLE, "\nNorm: %f\n", err);
-                    //ts->printf(cvtest::TS::CONSOLE, "Depth: %d\n", depth);
-                    //ts->printf(cvtest::TS::CONSOLE, "Rows: %d\n", rows);
-                    //ts->printf(cvtest::TS::CONSOLE, "Cols: %d\n", cols);
-                    //ts->printf(cvtest::TS::CONSOLE, "NumChannels: %d\n", num_channels);
-                    ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-                    return;
-                }
-            }
-        }
-}
-
-void CV_SplitTest::run(int)
-{
-    can_split(1, 1);
-    can_split(1, 7);
-    can_split(7, 53);
-    can_split_submatrix(1, 1);
-    can_split_submatrix(1, 7);
-    can_split_submatrix(7, 53);
-}
-
-
-////////////////////////////////////////////////////////////////////////////////
-// Split and merge
-
-struct CV_SplitMergeTest : public cvtest::BaseTest
-{
-    void can_split_merge(size_t rows, size_t cols);    
-    void run(int);
-};
-
-void CV_SplitMergeTest::can_split_merge(size_t rows, size_t cols) {
-    bool double_ok = gpu::TargetArchs::builtWith(gpu::NATIVE_DOUBLE) && 
-                     gpu::DeviceInfo().supports(gpu::NATIVE_DOUBLE);
-    size_t depth_end = double_ok ? CV_64F : CV_32F;
-
-    for (size_t num_channels = 1; num_channels <= 4; ++num_channels)
-        for (size_t depth = CV_8U; depth <= depth_end; ++depth)
-        {
-            Mat orig(rows, cols, CV_MAKETYPE(depth, num_channels), Scalar(1.0, 2.0, 3.0, 4.0));   
-            gpu::GpuMat dev_orig(orig);
-            vector<gpu::GpuMat> dev_vec;
-            cv::gpu::split(dev_orig, dev_vec);
-
-            gpu::GpuMat dev_final(rows, cols, CV_MAKETYPE(depth, num_channels));
-            cv::gpu::merge(dev_vec, dev_final);
-
-            double err = cv::norm((Mat)dev_orig, (Mat)dev_final, NORM_INF);
-            if (err > 1e-3)
-            {
-                //ts->printf(cvtest::TS::CONSOLE, "\nNorm: %f\n", err);
-                //ts->printf(cvtest::TS::CONSOLE, "Depth: %d\n", depth);
-                //ts->printf(cvtest::TS::CONSOLE, "Rows: %d\n", rows);
-                //ts->printf(cvtest::TS::CONSOLE, "Cols: %d\n", cols);
-                //ts->printf(cvtest::TS::CONSOLE, "NumChannels: %d\n", num_channels);
-                ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT);
-                return;
-            }
-        }
-}
-
-
-void CV_SplitMergeTest::run(int) 
-{
-    can_split_merge(1, 1);
-    can_split_merge(1, 7);
-    can_split_merge(7, 53);
-}
-
-
-TEST(merge, accuracy) { CV_MergeTest test; test.safe_run(); }
-TEST(split, accuracy) { CV_SplitTest test; test.safe_run(); }
-TEST(split, merge_consistency) { CV_SplitMergeTest test; test.safe_run(); }
diff --git a/modules/gpu/test/test_stereo_bm.cpp b/modules/gpu/test/test_stereo_bm.cpp
deleted file mode 100644
index 7a4806e6f5..0000000000
--- a/modules/gpu/test/test_stereo_bm.cpp
+++ /dev/null
@@ -1,131 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                        Intel License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000, Intel Corporation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of Intel Corporation may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
-
-using namespace cv;
-using namespace cv::gpu;
-
-struct CV_GpuStereoBMTest : public cvtest::BaseTest
-{
-    void run_stress()
-    {                
-        RNG rng;
-
-        for(int i = 0; i < 10; ++i)
-        {
-            int winSize = cvRound(rng.uniform(2, 11)) * 2 + 1;
-
-            for(int j = 0; j < 10; ++j)
-            {
-                int ndisp = cvRound(rng.uniform(5, 32)) * 8;
-
-                for(int s = 0; s < 10; ++s)
-                {
-                    int w =  cvRound(rng.uniform(1024, 2048));
-                    int h =  cvRound(rng.uniform(768, 1152));
-
-                    for(int p = 0; p < 2; ++p)
-                    {
-                        //int winSize = winsz[i];
-                        //int disp = disps[j];
-                        Size imgSize(w, h);//res[s];
-                        int preset = p;
-
-                        printf("Preset = %d, nidsp = %d, winsz = %d, width = %d, height = %d\n", p, ndisp, winSize, imgSize.width, imgSize.height);
-
-                        GpuMat l(imgSize, CV_8U);
-                        GpuMat r(imgSize, CV_8U);
-
-                        GpuMat disparity;
-                        StereoBM_GPU bm(preset, ndisp, winSize);
-                        bm(l, r, disparity);
-
-            
-                    }
-                }
-            }
-        }
-    }
-
-    void run(int )
-    {
-        /*run_stress();
-        return;*/
-
-	    cv::Mat img_l = cv::imread(std::string(ts->get_data_path()) + "stereobm/aloe-L.png", 0);
-	    cv::Mat img_r = cv::imread(std::string(ts->get_data_path()) + "stereobm/aloe-R.png", 0);
-	    cv::Mat img_reference = cv::imread(std::string(ts->get_data_path()) + "stereobm/aloe-disp.png", 0);
-
-        if (img_l.empty() || img_r.empty() || img_reference.empty())
-        {
-            ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA);
-            return;
-        }
-
-        cv::gpu::GpuMat disp;
-        cv::gpu::StereoBM_GPU bm(0, 128, 19);
-        bm(cv::gpu::GpuMat(img_l), cv::gpu::GpuMat(img_r), disp);
-
-        disp.convertTo(disp, img_reference.type());
-        double norm = cv::norm((Mat)disp, img_reference, cv::NORM_INF);
-
-        //cv::imwrite(std::string(ts->get_data_path()) + "stereobm/aloe-disp.png", disp);
-
-        /*cv::imshow("disp", disp);
-        cv::imshow("img_reference", img_reference);
-
-        cv::Mat diff = (cv::Mat)disp - (cv::Mat)img_reference;
-        cv::imshow("diff", diff);
-        cv::waitKey();*/
-
-        if (norm >= 100)
-        {
-            ts->printf(cvtest::TS::LOG, "\nStereoBM norm = %f\n", norm);
-            ts->set_failed_test_info(cvtest::TS::FAIL_GENERIC);
-            return;
-        }
-
-        ts->set_failed_test_info(cvtest::TS::OK);
-    }
-};
-
-TEST(StereoBM, regression) { CV_GpuStereoBMTest test; test.safe_run(); }
diff --git a/modules/gpu/test/test_stereo_bm_async.cpp b/modules/gpu/test/test_stereo_bm_async.cpp
deleted file mode 100644
index f9632dd283..0000000000
--- a/modules/gpu/test/test_stereo_bm_async.cpp
+++ /dev/null
@@ -1,86 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                        Intel License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000, Intel Corporation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of Intel Corporation may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
-
-using namespace cv;
-using namespace std;
-
-struct CV_AsyncStereoBMTest : public cvtest::BaseTest
-{
-    void run( int /* start_from */)
-    {
-	    cv::Mat img_l = cv::imread(std::string(ts->get_data_path()) + "stereobm/aloe-L.png", 0);
-	    cv::Mat img_r = cv::imread(std::string(ts->get_data_path()) + "stereobm/aloe-R.png", 0);
-	    cv::Mat img_reference = cv::imread(std::string(ts->get_data_path()) + "stereobm/aloe-disp.png", 0);
-
-        if (img_l.empty() || img_r.empty() || img_reference.empty())
-        {
-            ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA);
-            return;
-        }
-
-        cv::gpu::GpuMat disp;
-        cv::gpu::StereoBM_GPU bm(0, 128, 19);
-
-        cv::gpu::Stream stream;
-
-        for (size_t i = 0; i < 50; i++)
-        {
-            bm(cv::gpu::GpuMat(img_l), cv::gpu::GpuMat(img_r), disp, stream);
-        }
-
-        stream.waitForCompletion();
-        disp.convertTo(disp, img_reference.type());
-        double norm = cv::norm((Mat)disp, img_reference, cv::NORM_INF);
-
-        if (norm >= 100)
-        {
-            ts->printf(cvtest::TS::LOG, "\nStereoBM norm = %f\n", norm);
-            ts->set_failed_test_info(cvtest::TS::FAIL_GENERIC);
-            return;
-        }
-
-        ts->set_failed_test_info(cvtest::TS::OK);
-    }
-};
-
-TEST(StereoBM, async) { CV_AsyncStereoBMTest test; test.safe_run(); }
diff --git a/modules/gpu/test/test_stereo_bp.cpp b/modules/gpu/test/test_stereo_bp.cpp
deleted file mode 100644
index 7788917375..0000000000
--- a/modules/gpu/test/test_stereo_bp.cpp
+++ /dev/null
@@ -1,82 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                        Intel License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000, Intel Corporation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of Intel Corporation may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
-
-struct CV_GpuStereoBPTest : public cvtest::BaseTest
-{
-    void run(int)
-    {
-        cv::Mat img_l = cv::imread(std::string(ts->get_data_path()) + "stereobp/aloe-L.png");
-        cv::Mat img_r = cv::imread(std::string(ts->get_data_path()) + "stereobp/aloe-R.png");
-        cv::Mat img_template = cv::imread(std::string(ts->get_data_path()) + "stereobp/aloe-disp.png", 0);
-
-        if (img_l.empty() || img_r.empty() || img_template.empty())
-        {
-            ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA);
-            return;
-        }
-
-        {cv::Mat temp; cv::cvtColor(img_l, temp, CV_BGR2BGRA); cv::swap(temp, img_l);}
-        {cv::Mat temp; cv::cvtColor(img_r, temp, CV_BGR2BGRA); cv::swap(temp, img_r);}
-
-        cv::gpu::StereoBeliefPropagation bpm(64, 8, 2, 25, 0.1f, 15, 1, CV_16S);
-        cv::gpu::GpuMat disp;
-
-        bpm(cv::gpu::GpuMat(img_l), cv::gpu::GpuMat(img_r), disp);
-
-        //cv::imwrite(std::string(ts->get_data_path()) + "stereobp/aloe-disp.png", disp);
-
-        disp.convertTo(disp, img_template.type());
-
-        double norm = cv::norm((cv::Mat)disp, img_template, cv::NORM_INF);
-	    if (norm >= 0.5)
-        {
-	        ts->printf(cvtest::TS::LOG, "\nStereoBP norm = %f\n", norm);
-	        ts->set_failed_test_info(cvtest::TS::FAIL_GENERIC);
-	        return;
-        }
-
-        ts->set_failed_test_info(cvtest::TS::OK);
-    }
-};
-
-TEST(StereoBP, regression) { CV_GpuStereoBPTest test; test.safe_run(); }
diff --git a/modules/gpu/test/test_stereo_csbp.cpp b/modules/gpu/test/test_stereo_csbp.cpp
deleted file mode 100644
index a2f794f21b..0000000000
--- a/modules/gpu/test/test_stereo_csbp.cpp
+++ /dev/null
@@ -1,89 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                        Intel License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000, Intel Corporation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of Intel Corporation may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "test_precomp.hpp"
-
-struct CV_GpuStereoCSBPTest : public cvtest::BaseTest
-{
-    void run(int )
-    {
-        cv::Mat img_l = cv::imread(std::string(ts->get_data_path()) + "csstereobp/aloe-L.png");
-        cv::Mat img_r = cv::imread(std::string(ts->get_data_path()) + "csstereobp/aloe-R.png");
-
-        cv::Mat img_template;
-
-        if (cv::gpu::TargetArchs::builtWith(cv::gpu::FEATURE_SET_COMPUTE_20) &&
-            cv::gpu::DeviceInfo().supports(cv::gpu::FEATURE_SET_COMPUTE_20))
-            img_template = cv::imread(std::string(ts->get_data_path()) + "csstereobp/aloe-disp.png", CV_LOAD_IMAGE_GRAYSCALE);
-        else
-            img_template = cv::imread(std::string(ts->get_data_path()) + "csstereobp/aloe-disp_CC1X.png", CV_LOAD_IMAGE_GRAYSCALE);
-
-        if (img_l.empty() || img_r.empty() || img_template.empty())
-        {
-            ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA);
-            return;
-        }
-
-        {cv::Mat temp; cv::cvtColor(img_l, temp, CV_BGR2BGRA); cv::swap(temp, img_l);}
-        {cv::Mat temp; cv::cvtColor(img_r, temp, CV_BGR2BGRA); cv::swap(temp, img_r);}
-
-        cv::gpu::GpuMat disp;
-        cv::gpu::StereoConstantSpaceBP bpm(128, 16, 4, 4);
-
-        bpm(cv::gpu::GpuMat(img_l), cv::gpu::GpuMat(img_r), disp);
-
-        //cv::imwrite(std::string(ts->get_data_path()) + "csstereobp/aloe-disp_CC1X.png", cv::Mat(disp));
-
-        disp.convertTo(disp, img_template.type());
-
-        double norm = cv::norm((cv::Mat)disp, img_template, cv::NORM_INF);
-        if (norm >= 1.5)
-        {
-            ts->printf(cvtest::TS::LOG, "\nConstantSpaceStereoBP norm = %f\n", norm);
-            ts->set_failed_test_info(cvtest::TS::FAIL_GENERIC);
-            return;
-        }
-
-        ts->set_failed_test_info(cvtest::TS::OK);
-    }
-};
-
-TEST(StereoConstantSpaceBP, regression) { CV_GpuStereoCSBPTest test; test.safe_run(); }