diff --git a/modules/gpu/perf/perf_arithm.cpp b/modules/gpu/perf/perf_arithm.cpp
deleted file mode 100644
index 31f2f45404..0000000000
--- a/modules/gpu/perf/perf_arithm.cpp
+++ /dev/null
@@ -1,1333 +0,0 @@
-#include "perf_precomp.hpp"
-
-#ifdef HAVE_CUDA
-
-//////////////////////////////////////////////////////////////////////
-// Transpose
-
-GPU_PERF_TEST(Transpose, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src_host(size, type);
-
-    declare.in(src_host, WARMUP_RNG);
-
-    cv::gpu::GpuMat src(src_host);
-    cv::gpu::GpuMat dst;
-
-    TEST_CYCLE()
-    {
-        cv::gpu::transpose(src, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, Transpose, testing::Combine(
-                        ALL_DEVICES, 
-                        GPU_TYPICAL_MAT_SIZES, 
-                        testing::Values(CV_8UC1, CV_32SC1, CV_64FC1)));
-
-//////////////////////////////////////////////////////////////////////
-// Flip
-
-GPU_PERF_TEST(Flip, cv::gpu::DeviceInfo, cv::Size, perf::MatType, FlipCode)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-    int flipCode = GET_PARAM(3);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src_host(size, type);
-
-    declare.in(src_host, WARMUP_RNG);
-
-    cv::gpu::GpuMat src(src_host);
-    cv::gpu::GpuMat dst;
-
-    TEST_CYCLE()
-    {
-        cv::gpu::flip(src, dst, flipCode);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, Flip, testing::Combine(
-                        ALL_DEVICES, 
-                        GPU_TYPICAL_MAT_SIZES, 
-                        testing::Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
-                        testing::Values((int) HORIZONTAL_AXIS, (int) VERTICAL_AXIS, (int) BOTH_AXIS)));
-
-//////////////////////////////////////////////////////////////////////
-// LUT
-
-GPU_PERF_TEST(LUT, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src_host(size, type);
-    cv::Mat lut(1, 256, CV_8UC1);
-
-    declare.in(src_host, lut, WARMUP_RNG);
-
-    cv::gpu::GpuMat src(src_host);
-    cv::gpu::GpuMat dst;
-
-    TEST_CYCLE()
-    {
-        cv::gpu::LUT(src, lut, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, LUT, testing::Combine(
-                        ALL_DEVICES, 
-                        GPU_TYPICAL_MAT_SIZES, 
-                        testing::Values(CV_8UC1, CV_8UC3)));
-
-//////////////////////////////////////////////////////////////////////
-// CartToPolar
-
-GPU_PERF_TEST(CartToPolar, cv::gpu::DeviceInfo, cv::Size)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat x_host(size, CV_32FC1);
-    cv::Mat y_host(size, CV_32FC1);
-
-    fill(x_host, -100.0, 100.0);
-    fill(y_host, -100.0, 100.0);
-
-    cv::gpu::GpuMat x(x_host);
-    cv::gpu::GpuMat y(y_host);
-    cv::gpu::GpuMat magnitude;
-    cv::gpu::GpuMat angle;
-
-    TEST_CYCLE()
-    {
-        cv::gpu::cartToPolar(x, y, magnitude, angle);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, CartToPolar, testing::Combine(
-                        ALL_DEVICES, 
-                        GPU_TYPICAL_MAT_SIZES));
-
-//////////////////////////////////////////////////////////////////////
-// PolarToCart
-
-GPU_PERF_TEST(PolarToCart, cv::gpu::DeviceInfo, cv::Size)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat magnitude_host(size, CV_32FC1);
-    cv::Mat angle_host(size, CV_32FC1);
-
-    fill(magnitude_host, 0.0, 100.0);
-    fill(angle_host, 0.0, 360.0);
-
-    cv::gpu::GpuMat magnitude(magnitude_host);
-    cv::gpu::GpuMat angle(angle_host);
-    cv::gpu::GpuMat x;
-    cv::gpu::GpuMat y;
-
-    TEST_CYCLE()
-    {
-        cv::gpu::polarToCart(magnitude, angle, x, y, true);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, PolarToCart, testing::Combine(
-                        ALL_DEVICES, 
-                        GPU_TYPICAL_MAT_SIZES));
-
-//////////////////////////////////////////////////////////////////////
-// AddMat
-
-GPU_PERF_TEST(AddMat, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src1_host(size, type);
-    cv::Mat src2_host(size, type);
-
-    fill(src1_host, 0.0, 100.0);
-    fill(src2_host, 0.0, 100.0);
-
-    cv::gpu::GpuMat src1(src1_host);
-    cv::gpu::GpuMat src2(src2_host);
-    cv::gpu::GpuMat dst;
-
-    TEST_CYCLE()
-    {
-        cv::gpu::add(src1, src2, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, AddMat, testing::Combine(
-                        ALL_DEVICES, 
-                        GPU_TYPICAL_MAT_SIZES, 
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1)));
-
-//////////////////////////////////////////////////////////////////////
-// AddScalar
-
-GPU_PERF_TEST(AddScalar, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src_host(size, type);
-
-    fill(src_host, 0.0, 100.0);
-
-    cv::gpu::GpuMat src(src_host);
-    cv::Scalar s(1,2,3,4);
-    cv::gpu::GpuMat dst;
-
-    TEST_CYCLE()
-    {
-        cv::gpu::add(src, s, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, AddScalar, testing::Combine(
-                        ALL_DEVICES, 
-                        GPU_TYPICAL_MAT_SIZES, 
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1)));
-
-
-//////////////////////////////////////////////////////////////////////
-// SubtractMat
-
-GPU_PERF_TEST(SubtractMat, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src1_host(size, type);
-    cv::Mat src2_host(size, type);
-
-    fill(src1_host, 0.0, 100.0);
-    fill(src2_host, 0.0, 100.0);
-
-    cv::gpu::GpuMat src1(src1_host);
-    cv::gpu::GpuMat src2(src2_host);
-    cv::gpu::GpuMat dst;
-
-    TEST_CYCLE()
-    {
-        cv::gpu::subtract(src1, src2, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, SubtractMat, testing::Combine(
-                        ALL_DEVICES, 
-                        GPU_TYPICAL_MAT_SIZES, 
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1)));
-
-//////////////////////////////////////////////////////////////////////
-// SubtractScalar
-
-GPU_PERF_TEST(SubtractScalar, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src_host(size, type);
-
-    fill(src_host, 0.0, 100.0);
-
-    cv::gpu::GpuMat src(src_host);
-    cv::Scalar s(1,2,3,4);
-    cv::gpu::GpuMat dst;
-
-    TEST_CYCLE()
-    {
-        cv::gpu::subtract(src, s, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, SubtractScalar, testing::Combine(
-                        ALL_DEVICES, 
-                        GPU_TYPICAL_MAT_SIZES, 
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1)));
-
-
-//////////////////////////////////////////////////////////////////////
-// MultiplyMat
-
-GPU_PERF_TEST(MultiplyMat, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src1_host(size, type);
-    cv::Mat src2_host(size, type);
-
-    fill(src1_host, 0.0, 100.0);
-    fill(src2_host, 0.0, 100.0);
-
-    cv::gpu::GpuMat src1(src1_host);
-    cv::gpu::GpuMat src2(src2_host);
-    cv::gpu::GpuMat dst;
-
-    TEST_CYCLE()
-    {
-        cv::gpu::multiply(src1, src2, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, MultiplyMat, testing::Combine(
-                        ALL_DEVICES, 
-                        GPU_TYPICAL_MAT_SIZES, 
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1)));
-
-//////////////////////////////////////////////////////////////////////
-// MultiplyScalar
-
-GPU_PERF_TEST(MultiplyScalar, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src_host(size, type);
-
-    fill(src_host, 0.0, 100.0);
-
-    cv::gpu::GpuMat src(src_host);
-    cv::Scalar s(1,2,3,4);
-    cv::gpu::GpuMat dst;
-
-    TEST_CYCLE()
-    {
-        cv::gpu::multiply(src, s, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, MultiplyScalar, testing::Combine(
-                        ALL_DEVICES, 
-                        GPU_TYPICAL_MAT_SIZES, 
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1)));
-
-
-//////////////////////////////////////////////////////////////////////
-// DivideMat
-
-GPU_PERF_TEST(DivideMat, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src1_host(size, type);
-    cv::Mat src2_host(size, type);
-
-    fill(src1_host, 0.0, 100.0);
-    fill(src2_host, 0.0, 100.0);
-
-    cv::gpu::GpuMat src1(src1_host);
-    cv::gpu::GpuMat src2(src2_host);
-    cv::gpu::GpuMat dst;
-
-    TEST_CYCLE()
-    {
-        cv::gpu::divide(src1, src2, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, DivideMat, testing::Combine(
-                        ALL_DEVICES, 
-                        GPU_TYPICAL_MAT_SIZES, 
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1)));
-
-//////////////////////////////////////////////////////////////////////
-// DivideScalar
-
-GPU_PERF_TEST(DivideScalar, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src_host(size, type);
-
-    fill(src_host, 0.0, 100.0);
-
-    cv::gpu::GpuMat src(src_host);
-    cv::Scalar s(1,2,3,4);
-    cv::gpu::GpuMat dst;
-
-    TEST_CYCLE()
-    {
-        cv::gpu::divide(src, s, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, DivideScalar, testing::Combine(
-                        ALL_DEVICES, 
-                        GPU_TYPICAL_MAT_SIZES, 
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1)));
-
-//////////////////////////////////////////////////////////////////////
-// Exp
-
-GPU_PERF_TEST(Exp, cv::gpu::DeviceInfo, cv::Size)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src_host(size, CV_32FC1);
-
-    fill(src_host, 0.0, 10.0);
-
-    cv::gpu::GpuMat src(src_host);
-    cv::gpu::GpuMat dst;
-
-    TEST_CYCLE()
-    {
-        cv::gpu::exp(src, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, Exp, testing::Combine(
-                        ALL_DEVICES, 
-                        GPU_TYPICAL_MAT_SIZES));
-
-//////////////////////////////////////////////////////////////////////
-// Pow
-
-GPU_PERF_TEST(Pow, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src_host(size, type);
-
-    declare.in(src_host, WARMUP_RNG);
-
-    cv::gpu::GpuMat src(src_host);
-    cv::gpu::GpuMat dst;
-
-    TEST_CYCLE()
-    {
-        cv::gpu::pow(src, 0.5, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, Pow, testing::Combine(
-                        ALL_DEVICES, 
-                        GPU_TYPICAL_MAT_SIZES, 
-                        testing::Values(CV_8UC1, CV_8UC4, CV_16UC1, CV_32FC1)));
-
-//////////////////////////////////////////////////////////////////////
-// Log
-
-GPU_PERF_TEST(Log, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src_host(size, type);
-
-    declare.in(src_host, WARMUP_RNG);
-
-    cv::gpu::GpuMat src(src_host);
-    cv::gpu::GpuMat dst;
-
-    TEST_CYCLE()
-    {
-        cv::gpu::log(src, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, Log, testing::Combine(
-                        ALL_DEVICES, 
-                        GPU_TYPICAL_MAT_SIZES, 
-                        testing::Values(CV_8UC1, CV_8UC4, CV_16UC1, CV_32FC1)));
-
-//////////////////////////////////////////////////////////////////////
-// Sqrt
-
-GPU_PERF_TEST(Sqrt, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src_host(size, type);
-
-    declare.in(src_host, WARMUP_RNG);
-
-    cv::gpu::GpuMat src(src_host);
-    cv::gpu::GpuMat dst;
-
-    TEST_CYCLE()
-    {
-        cv::gpu::sqrt(src, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, Sqrt, testing::Combine(
-                        ALL_DEVICES, 
-                        GPU_TYPICAL_MAT_SIZES, 
-                        testing::Values(CV_8UC1, CV_8UC4, CV_16UC1, CV_32FC1)));
-
-
-//////////////////////////////////////////////////////////////////////
-// Compare
-
-GPU_PERF_TEST(Compare, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src1_host(size, type);
-    cv::Mat src2_host(size, type);
-
-    declare.in(src1_host, src2_host, WARMUP_RNG);
-
-    cv::gpu::GpuMat src1(src1_host);
-    cv::gpu::GpuMat src2(src2_host);
-    cv::gpu::GpuMat dst;
-
-    TEST_CYCLE()
-    {
-        cv::gpu::compare(src1, src2, dst, cv::CMP_EQ);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, Compare, testing::Combine(
-                        ALL_DEVICES, 
-                        GPU_TYPICAL_MAT_SIZES, 
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1)));
-
-//////////////////////////////////////////////////////////////////////
-// CompareScalar
-
-GPU_PERF_TEST(CompareScalar, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src1_host(size, type);
-    
-    declare.in(src1_host, WARMUP_RNG);
-
-    cv::gpu::GpuMat src1(src1_host);
-    cv::Scalar src2 = cv::Scalar::all(123);
-    cv::gpu::GpuMat dst;
-
-    TEST_CYCLE()
-    {
-        cv::gpu::compare(src1, src2, dst, cv::CMP_EQ);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, CompareScalar, testing::Combine(
-                        ALL_DEVICES, 
-                        GPU_TYPICAL_MAT_SIZES, 
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1)));
-
-//////////////////////////////////////////////////////////////////////
-// BitwiseNot
-
-GPU_PERF_TEST(BitwiseNot, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src_host(size, type);
-
-    declare.in(src_host, WARMUP_RNG);
-
-    cv::gpu::GpuMat src(src_host);
-    cv::gpu::GpuMat dst;
-
-    TEST_CYCLE()
-    {
-        cv::gpu::bitwise_not(src, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, BitwiseNot, testing::Combine(
-                        ALL_DEVICES, 
-                        GPU_TYPICAL_MAT_SIZES, 
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32SC1)));
-
-//////////////////////////////////////////////////////////////////////
-// BitwiseAnd
-
-GPU_PERF_TEST(BitwiseAnd, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src1_host(size, type);
-    cv::Mat src2_host(size, type);
-
-    declare.in(src1_host, src2_host, WARMUP_RNG);
-
-    cv::gpu::GpuMat src1(src1_host);
-    cv::gpu::GpuMat src2(src2_host);
-    cv::gpu::GpuMat dst;
-
-    TEST_CYCLE()
-    {
-        cv::gpu::bitwise_and(src1, src2, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, BitwiseAnd, testing::Combine(
-                        ALL_DEVICES, 
-                        GPU_TYPICAL_MAT_SIZES, 
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32SC1)));
-
-//////////////////////////////////////////////////////////////////////
-// BitwiseScalarAnd
-
-GPU_PERF_TEST(BitwiseScalarAnd, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src_host(size, type);
-
-    declare.in(src_host, WARMUP_RNG);
-
-    cv::gpu::GpuMat src(src_host);
-    cv::gpu::GpuMat dst;
-    cv::Scalar sc = cv::Scalar(123, 123, 123, 123);
-
-    TEST_CYCLE()
-    {
-        cv::gpu::bitwise_and(src, sc, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, BitwiseScalarAnd, testing::Combine(
-                        ALL_DEVICES, 
-                        GPU_TYPICAL_MAT_SIZES, 
-                        testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32SC1, CV_32SC3, CV_32SC4)));
-
-
-//////////////////////////////////////////////////////////////////////
-// BitwiseOr
-
-GPU_PERF_TEST(BitwiseOr, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src1_host(size, type);
-    cv::Mat src2_host(size, type);
-
-    declare.in(src1_host, src2_host, WARMUP_RNG);
-
-    cv::gpu::GpuMat src1(src1_host);
-    cv::gpu::GpuMat src2(src2_host);
-    cv::gpu::GpuMat dst;
-
-    TEST_CYCLE()
-    {
-        cv::gpu::bitwise_or(src1, src2, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, BitwiseOr, testing::Combine(
-                        ALL_DEVICES, 
-                        GPU_TYPICAL_MAT_SIZES, 
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32SC1)));
-
-//////////////////////////////////////////////////////////////////////
-// BitwiseScalarOr
-
-GPU_PERF_TEST(BitwiseScalarOr, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src_host(size, type);
-
-    declare.in(src_host, WARMUP_RNG);
-
-    cv::gpu::GpuMat src(src_host);
-    cv::gpu::GpuMat dst;
-    cv::Scalar sc = cv::Scalar(123, 123, 123, 123);
-
-    TEST_CYCLE()
-    {
-        cv::gpu::bitwise_or(src, sc, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, BitwiseScalarOr, testing::Combine(
-                        ALL_DEVICES, 
-                        GPU_TYPICAL_MAT_SIZES, 
-                        testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32SC1, CV_32SC3, CV_32SC4)));
-
-
-//////////////////////////////////////////////////////////////////////
-// BitwiseXor
-
-GPU_PERF_TEST(BitwiseXor, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src1_host(size, type);
-    cv::Mat src2_host(size, type);
-
-    declare.in(src1_host, src2_host, WARMUP_RNG);
-
-    cv::gpu::GpuMat src1(src1_host);
-    cv::gpu::GpuMat src2(src2_host);
-    cv::gpu::GpuMat dst;
-
-    TEST_CYCLE()
-    {
-        cv::gpu::bitwise_xor(src1, src2, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, BitwiseXor, testing::Combine(
-                        ALL_DEVICES, 
-                        GPU_TYPICAL_MAT_SIZES, 
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32SC1)));
-
-//////////////////////////////////////////////////////////////////////
-// BitwiseScalarXor
-
-GPU_PERF_TEST(BitwiseScalarXor, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src_host(size, type);
-
-    declare.in(src_host, WARMUP_RNG);
-
-    cv::gpu::GpuMat src(src_host);
-    cv::gpu::GpuMat dst;
-    cv::Scalar sc = cv::Scalar(123, 123, 123, 123);
-
-    TEST_CYCLE()
-    {
-        cv::gpu::bitwise_xor(src, sc, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, BitwiseScalarXor, testing::Combine(
-                        ALL_DEVICES, 
-                        GPU_TYPICAL_MAT_SIZES, 
-                        testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32SC1, CV_32SC3, CV_32SC4)));
-
-
-
-//////////////////////////////////////////////////////////////////////
-// Min
-
-GPU_PERF_TEST(Min, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src1_host(size, type);
-    cv::Mat src2_host(size, type);
-
-    declare.in(src1_host, src2_host, WARMUP_RNG);
-
-    cv::gpu::GpuMat src1(src1_host);
-    cv::gpu::GpuMat src2(src2_host);
-    cv::gpu::GpuMat dst(size, type);
-
-    TEST_CYCLE()
-    {
-        cv::gpu::min(src1, src2, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, Min, testing::Combine(
-                        ALL_DEVICES, 
-                        GPU_TYPICAL_MAT_SIZES, 
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32SC1)));
-
-
-
-//////////////////////////////////////////////////////////////////////
-// Max
-
-GPU_PERF_TEST(Max, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src1_host(size, type);
-    cv::Mat src2_host(size, type);
-
-    declare.in(src1_host, src2_host, WARMUP_RNG);
-
-    cv::gpu::GpuMat src1(src1_host);
-    cv::gpu::GpuMat src2(src2_host);
-    cv::gpu::GpuMat dst(size, type);
-
-    TEST_CYCLE()
-    {
-        cv::gpu::max(src1, src2, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, Max, testing::Combine(
-                        ALL_DEVICES, 
-                        GPU_TYPICAL_MAT_SIZES, 
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32SC1)));
-
-
-//////////////////////////////////////////////////////////////////////
-// Abs
-
-GPU_PERF_TEST(Abs, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src1_host(size, type);
-    
-    declare.in(src1_host, WARMUP_RNG);
-
-    cv::gpu::GpuMat src1(src1_host);    
-    cv::gpu::GpuMat dst(size, type);
-
-    TEST_CYCLE()
-    {
-        cv::gpu::abs(src1, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, Abs, testing::Combine(
-                        ALL_DEVICES, 
-                        GPU_TYPICAL_MAT_SIZES, 
-                        testing::Values(CV_16S, CV_32F)));
-
-
-
-//////////////////////////////////////////////////////////////////////
-// AbsDiff
-
-GPU_PERF_TEST(AbsDiff, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src1_host(size, type);
-    cv::Mat src2_host(size, type);
-    
-    declare.in(src1_host, src2_host, WARMUP_RNG);
-
-    cv::gpu::GpuMat src1(src1_host);    
-    cv::gpu::GpuMat src2(src2_host);
-    cv::gpu::GpuMat dst(size, type);
-
-    TEST_CYCLE()
-    {
-        cv::gpu::absdiff(src1, src2, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, AbsDiff, testing::Combine(
-                        ALL_DEVICES, 
-                        GPU_TYPICAL_MAT_SIZES, 
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32SC1)));
-
-//////////////////////////////////////////////////////////////////////
-// Phase
-
-GPU_PERF_TEST(Phase, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::Mat src1_host(size, type);
-    cv::Mat src2_host(size, type);
-        
-    declare.in(src1_host, src2_host, WARMUP_RNG);
-
-    cv::gpu::GpuMat src1(src1_host);    
-    cv::gpu::GpuMat src2(src2_host);
-    cv::gpu::GpuMat dst(size, type);
-
-    TEST_CYCLE()
-    {
-        cv::gpu::phase(src1, src2, dst, true);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, Phase, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values<perf::MatType>(CV_32FC1)));
-
-
-//////////////////////////////////////////////////////////////////////
-// Magnitude
-
-GPU_PERF_TEST(Magnitude, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::Mat src1_host(size, type);
-    cv::Mat src2_host(size, type);
-        
-    declare.in(src1_host, src2_host, WARMUP_RNG);
-
-    cv::gpu::GpuMat src1(src1_host);    
-    cv::gpu::GpuMat src2(src2_host);
-    cv::gpu::GpuMat dst(size, type);
-
-    TEST_CYCLE()
-    {
-        cv::gpu::magnitude(src1, src2, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, Magnitude, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values<perf::MatType>(CV_32FC1)));
-
-
-
-//////////////////////////////////////////////////////////////////////
-// AbsDiffScalar
-
-GPU_PERF_TEST(AbsDiffScalar, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src1_host(size, type);        
-    declare.in(src1_host, WARMUP_RNG);
-
-    cv::gpu::GpuMat src1(src1_host);    
-    cv::Scalar src2 = cv::Scalar::all(123);
-    cv::gpu::GpuMat dst(size, type);
-
-    TEST_CYCLE()
-    {
-        cv::gpu::absdiff(src1, src2, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, AbsDiffScalar, testing::Combine(
-                        ALL_DEVICES, 
-                        GPU_TYPICAL_MAT_SIZES, 
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32SC1)));
-
-
-//////////////////////////////////////////////////////////////////////
-// MeanStdDev
-
-GPU_PERF_TEST(MeanStdDev, cv::gpu::DeviceInfo, cv::Size)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src_host(size, CV_8UC1);
-
-    declare.in(src_host, WARMUP_RNG);
-
-    cv::gpu::GpuMat src(src_host); 
-    cv::Scalar mean;
-    cv::Scalar stddev;
-    cv::gpu::GpuMat buf;
-
-    TEST_CYCLE()
-    {
-        cv::gpu::meanStdDev(src, mean, stddev, buf);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, MeanStdDev, testing::Combine(
-                        ALL_DEVICES, 
-                        GPU_TYPICAL_MAT_SIZES));
-
-//////////////////////////////////////////////////////////////////////
-// Norm
-
-GPU_PERF_TEST(Norm, cv::gpu::DeviceInfo, cv::Size, perf::MatType, NormType)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-    int normType = GET_PARAM(3);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src_host(size, type);
-
-    declare.in(src_host, WARMUP_RNG);
-
-    cv::gpu::GpuMat src(src_host);
-    double dst;
-    cv::gpu::GpuMat buf;
-
-    TEST_CYCLE()
-    {
-        dst = cv::gpu::norm(src, normType, buf);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, Norm, testing::Combine(
-                        ALL_DEVICES, 
-                        GPU_TYPICAL_MAT_SIZES, 
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32SC1),
-                        testing::Values((int) cv::NORM_INF, (int) cv::NORM_L1, (int) cv::NORM_L2)));
-
-//////////////////////////////////////////////////////////////////////
-// NormDiff
-
-GPU_PERF_TEST(NormDiff, cv::gpu::DeviceInfo, cv::Size, NormType)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int normType = GET_PARAM(2);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src1_host(size, CV_8UC1);
-    cv::Mat src2_host(size, CV_8UC1);
-
-    declare.in(src1_host, src2_host, WARMUP_RNG);
-
-    cv::gpu::GpuMat src1(src1_host);
-    cv::gpu::GpuMat src2(src2_host);
-    double dst;
-
-    TEST_CYCLE()
-    {
-        dst = cv::gpu::norm(src1, src2, normType);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, NormDiff, testing::Combine(
-                        ALL_DEVICES, 
-                        GPU_TYPICAL_MAT_SIZES, 
-                        testing::Values((int) cv::NORM_INF, (int) cv::NORM_L1, (int) cv::NORM_L2)));
-
-//////////////////////////////////////////////////////////////////////
-// Sum
-
-GPU_PERF_TEST(Sum, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src_host(size, type);
-
-    declare.in(src_host, WARMUP_RNG);
-
-    cv::gpu::GpuMat src(src_host);
-    cv::Scalar dst;
-    cv::gpu::GpuMat buf;
-
-    TEST_CYCLE()
-    {
-        dst = cv::gpu::sum(src, buf);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, Sum, testing::Combine(
-                        ALL_DEVICES, 
-                        GPU_TYPICAL_MAT_SIZES, 
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1)));
-
-//////////////////////////////////////////////////////////////////////
-// MinMax
-
-GPU_PERF_TEST(MinMax, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src_host(size, type);
-
-    declare.in(src_host, WARMUP_RNG);
-
-    cv::gpu::GpuMat src(src_host);
-    double minVal, maxVal;
-    cv::gpu::GpuMat buf;
-
-    TEST_CYCLE()
-    {
-        cv::gpu::minMax(src, &minVal, &maxVal, cv::gpu::GpuMat(), buf);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, MinMax, testing::Combine(
-                        ALL_DEVICES, 
-                        GPU_TYPICAL_MAT_SIZES, 
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1)));
-
-//////////////////////////////////////////////////////////////////////
-// MinMaxLoc
-
-GPU_PERF_TEST(MinMaxLoc, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src_host(size, type);
-
-    declare.in(src_host, WARMUP_RNG);
-
-    cv::gpu::GpuMat src(src_host);
-    double minVal, maxVal;
-    cv::Point minLoc, maxLoc;
-    cv::gpu::GpuMat valbuf, locbuf;
-
-    TEST_CYCLE()
-    {
-        cv::gpu::minMaxLoc(src, &minVal, &maxVal, &minLoc, &maxLoc, cv::gpu::GpuMat(), valbuf, locbuf);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, MinMaxLoc, testing::Combine(
-                        ALL_DEVICES, 
-                        GPU_TYPICAL_MAT_SIZES, 
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1)));
-
-//////////////////////////////////////////////////////////////////////
-// CountNonZero
-
-GPU_PERF_TEST(CountNonZero, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src_host(size, type);
-
-    fill(src_host, 0.0, 1.0);
-
-    cv::gpu::GpuMat src(src_host);
-    int dst;
-    cv::gpu::GpuMat buf;
-
-    TEST_CYCLE()
-    {
-        dst = cv::gpu::countNonZero(src, buf);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, CountNonZero, testing::Combine(
-                        ALL_DEVICES, 
-                        GPU_TYPICAL_MAT_SIZES, 
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1)));
-
-//////////////////////////////////////////////////////////////////////
-// AddWeighted
-
-GPU_PERF_TEST(AddWeighted, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src1_host(size, type);
-    cv::Mat src2_host(size, type);
-
-    fill(src1_host, 0.0, 100.0);
-    fill(src2_host, 0.0, 100.0);
-
-    cv::gpu::GpuMat src1(src1_host);
-    cv::gpu::GpuMat src2(src2_host);
-    cv::gpu::GpuMat dst;
-
-    TEST_CYCLE()
-    {
-        cv::gpu::addWeighted(src1, 0.5, src2, 0.5, 0.0, dst);
-    }
-
-    cv::Mat dst_host(dst);
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, AddWeighted, testing::Combine(
-                        ALL_DEVICES, 
-                        GPU_TYPICAL_MAT_SIZES, 
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1)));
-
-//////////////////////////////////////////////////////////////////////
-// Reduce
-
-GPU_PERF_TEST(Reduce, cv::gpu::DeviceInfo, cv::Size, perf::MatType, FlipCode)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-    int dim = GET_PARAM(3);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src_host(size, type);
-
-    fill(src_host, 0.0, 10.0);
-
-    cv::gpu::GpuMat src(src_host);
-    cv::gpu::GpuMat dst;
-
-    TEST_CYCLE()
-    {
-        cv::gpu::reduce(src, dst, dim, CV_REDUCE_MIN);
-    }
-
-    cv::Mat dst_host(dst);
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, Reduce, testing::Combine(
-                        ALL_DEVICES, 
-                        GPU_TYPICAL_MAT_SIZES, 
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1), 
-                        testing::Values((int) HORIZONTAL_AXIS, (int) VERTICAL_AXIS)));
-
-//////////////////////////////////////////////////////////////////////
-// GEMM
-
-GPU_PERF_TEST(GEMM, cv::gpu::DeviceInfo, cv::Size)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src1_host(size, CV_32FC1);
-    cv::Mat src2_host(size, CV_32FC1);
-    cv::Mat src3_host(size, CV_32FC1);
-
-    fill(src1_host, 0.0, 10.0);
-    fill(src2_host, 0.0, 10.0);
-    fill(src3_host, 0.0, 10.0);
-
-    cv::gpu::GpuMat src1(src1_host);
-    cv::gpu::GpuMat src2(src2_host);
-    cv::gpu::GpuMat src3(src3_host);
-    cv::gpu::GpuMat dst;
-
-    declare.time(5.0);
-
-    TEST_CYCLE()
-    {
-        cv::gpu::gemm(src1, src2, 1.0, src3, 1.0, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, GEMM, testing::Combine(
-                        ALL_DEVICES, 
-                        testing::Values(cv::Size(512, 512), cv::Size(1024, 1024), cv::Size(2048, 2048))));
-
-#endif
diff --git a/modules/gpu/perf/perf_calib3d.cpp b/modules/gpu/perf/perf_calib3d.cpp
index 0175527b37..7a9c6c3950 100644
--- a/modules/gpu/perf/perf_calib3d.cpp
+++ b/modules/gpu/perf/perf_calib3d.cpp
@@ -3,200 +3,300 @@
 #ifdef HAVE_CUDA
 
 //////////////////////////////////////////////////////////////////////
-// TransformPoints
+// StereoBM
 
-GPU_PERF_TEST_1(TransformPoints, cv::gpu::DeviceInfo)
+GPU_PERF_TEST_1(StereoBM, cv::gpu::DeviceInfo)
 {
     cv::gpu::DeviceInfo devInfo = GetParam();
-
     cv::gpu::setDevice(devInfo.deviceID());
 
-    cv::Mat src_host(1, 10000, CV_32FC3);
+    cv::Mat img_l_host = readImage("gpu/perf/aloe.jpg", cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(img_l_host.empty());
 
-    declare.in(src_host, WARMUP_RNG);
+    cv::Mat img_r_host = readImage("gpu/perf/aloeR.jpg", cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(img_r_host.empty());
 
-    cv::gpu::GpuMat src(src_host);
+    cv::gpu::StereoBM_GPU bm(0, 256);
+    cv::gpu::GpuMat img_l(img_l_host);
+    cv::gpu::GpuMat img_r(img_r_host);
     cv::gpu::GpuMat dst;
 
+    bm(img_l, img_r, dst);
+
+    declare.time(5.0);
+
     TEST_CYCLE()
     {
-        cv::gpu::transformPoints(src, cv::Mat::ones(1, 3, CV_32FC1), cv::Mat::ones(1, 3, CV_32FC1), dst);
+        bm(img_l, img_r, dst);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(Calib3D, TransformPoints, ALL_DEVICES);
+INSTANTIATE_TEST_CASE_P(Calib3D, StereoBM, ALL_DEVICES);
 
 //////////////////////////////////////////////////////////////////////
-// ProjectPoints
+// StereoBeliefPropagation
 
-GPU_PERF_TEST_1(ProjectPoints, cv::gpu::DeviceInfo)
+GPU_PERF_TEST_1(StereoBeliefPropagation, cv::gpu::DeviceInfo)
 {
     cv::gpu::DeviceInfo devInfo = GetParam();
-
     cv::gpu::setDevice(devInfo.deviceID());
 
-    cv::Mat src_host(1, 10000, CV_32FC3);
+    cv::Mat img_l_host = readImage("gpu/stereobp/aloe-L.png");
+    ASSERT_FALSE(img_l_host.empty());
 
-    declare.in(src_host, WARMUP_RNG);
+    cv::Mat img_r_host = readImage("gpu/stereobp/aloe-R.png");
+    ASSERT_FALSE(img_r_host.empty());
 
-    cv::gpu::GpuMat src(src_host);
+    cv::gpu::StereoBeliefPropagation bp(64);
+    cv::gpu::GpuMat img_l(img_l_host);
+    cv::gpu::GpuMat img_r(img_r_host);
     cv::gpu::GpuMat dst;
 
+    bp(img_l, img_r, dst);
+
+    declare.time(10.0);
+
     TEST_CYCLE()
     {
-        cv::gpu::projectPoints(src, cv::Mat::ones(1, 3, CV_32FC1), cv::Mat::ones(1, 3, CV_32FC1), cv::Mat::ones(3, 3, CV_32FC1), cv::Mat(), dst);
+        bp(img_l, img_r, dst);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(Calib3D, ProjectPoints, ALL_DEVICES);
+INSTANTIATE_TEST_CASE_P(Calib3D, StereoBeliefPropagation, ALL_DEVICES);
 
 //////////////////////////////////////////////////////////////////////
-// SolvePnPRansac
+// StereoConstantSpaceBP
 
-GPU_PERF_TEST_1(SolvePnPRansac, cv::gpu::DeviceInfo)
+GPU_PERF_TEST_1(StereoConstantSpaceBP, cv::gpu::DeviceInfo)
 {
     cv::gpu::DeviceInfo devInfo = GetParam();
-
     cv::gpu::setDevice(devInfo.deviceID());
 
-    cv::Mat object(1, 10000, CV_32FC3);
-    cv::Mat image(1, 10000, CV_32FC2);
+    cv::Mat img_l_host = readImage("gpu/stereobm/aloe-L.png", cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(img_l_host.empty());
 
-    declare.in(object, image, WARMUP_RNG);
+    cv::Mat img_r_host = readImage("gpu/stereobm/aloe-R.png", cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(img_r_host.empty());
+
+    cv::gpu::StereoConstantSpaceBP csbp(128);
+    cv::gpu::GpuMat img_l(img_l_host);
+    cv::gpu::GpuMat img_r(img_r_host);
+    cv::gpu::GpuMat dst;
 
-    cv::Mat rvec, tvec;
+    csbp(img_l, img_r, dst);
 
-    declare.time(3.0);
+    declare.time(10.0);
 
     TEST_CYCLE()
     {
-        cv::gpu::solvePnPRansac(object, image, cv::Mat::ones(3, 3, CV_32FC1), cv::Mat(1, 8, CV_32F, cv::Scalar::all(0)), rvec, tvec);
+        csbp(img_l, img_r, dst);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(Calib3D, SolvePnPRansac, ALL_DEVICES);
+INSTANTIATE_TEST_CASE_P(Calib3D, StereoConstantSpaceBP, ALL_DEVICES);
 
 //////////////////////////////////////////////////////////////////////
-// StereoBM
+// DisparityBilateralFilter
 
-GPU_PERF_TEST_1(StereoBM, cv::gpu::DeviceInfo)
+GPU_PERF_TEST_1(DisparityBilateralFilter, cv::gpu::DeviceInfo)
 {
     cv::gpu::DeviceInfo devInfo = GetParam();
-
     cv::gpu::setDevice(devInfo.deviceID());
 
-    cv::Mat img_l_host = readImage("gpu/perf/aloe.jpg", cv::IMREAD_GRAYSCALE);
-    cv::Mat img_r_host = readImage("gpu/perf/aloeR.jpg", cv::IMREAD_GRAYSCALE);
+    cv::Mat img_host = readImage("gpu/stereobm/aloe-L.png", cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(img_host.empty());
 
-    ASSERT_FALSE(img_l_host.empty());
-    ASSERT_FALSE(img_r_host.empty());
+    cv::Mat disp_host = readImage("gpu/stereobm/aloe-disp.png", cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(disp_host.empty());
 
-    cv::gpu::GpuMat img_l(img_l_host);
-    cv::gpu::GpuMat img_r(img_r_host);
+    cv::gpu::DisparityBilateralFilter f(128);
+    cv::gpu::GpuMat img(img_host);
+    cv::gpu::GpuMat disp(disp_host);
     cv::gpu::GpuMat dst;
 
-    cv::gpu::StereoBM_GPU bm(0, 256);
-
-    declare.time(5.0);
+    f(disp, img, dst);
 
     TEST_CYCLE()
     {
-        bm(img_l, img_r, dst);
+        f(disp, img, dst);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(Calib3D, StereoBM, ALL_DEVICES);
+INSTANTIATE_TEST_CASE_P(Calib3D, DisparityBilateralFilter, ALL_DEVICES);
 
 //////////////////////////////////////////////////////////////////////
-// StereoBeliefPropagation
+// TransformPoints
 
-GPU_PERF_TEST_1(StereoBeliefPropagation, cv::gpu::DeviceInfo)
-{
-    cv::gpu::DeviceInfo devInfo = GetParam();
+IMPLEMENT_PARAM_CLASS(Count, int)
 
+GPU_PERF_TEST(TransformPoints, cv::gpu::DeviceInfo, Count)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
     cv::gpu::setDevice(devInfo.deviceID());
 
-    cv::Mat img_l_host = readImage("gpu/stereobp/aloe-L.png");
-    cv::Mat img_r_host = readImage("gpu/stereobp/aloe-R.png");
+    int count = GET_PARAM(1);
 
-    ASSERT_FALSE(img_l_host.empty());
-    ASSERT_FALSE(img_r_host.empty());
+    cv::Mat src_host(1, count, CV_32FC3);
+    fill(src_host, -100, 100);
 
-    cv::gpu::GpuMat img_l(img_l_host);
-    cv::gpu::GpuMat img_r(img_r_host);
+    cv::gpu::GpuMat src(src_host);
+    cv::Mat rvec = cv::Mat::ones(1, 3, CV_32FC1);
+    cv::Mat tvec = cv::Mat::ones(1, 3, CV_32FC1);
     cv::gpu::GpuMat dst;
 
-    cv::gpu::StereoBeliefPropagation bp(64);
-
-    declare.time(10.0);
+    cv::gpu::transformPoints(src, rvec, tvec, dst);
 
     TEST_CYCLE()
     {
-        bp(img_l, img_r, dst);
+        cv::gpu::transformPoints(src, rvec, tvec, dst);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(Calib3D, StereoBeliefPropagation, ALL_DEVICES);
+INSTANTIATE_TEST_CASE_P(Calib3D, TransformPoints, testing::Combine(
+    ALL_DEVICES,
+    testing::Values<Count>(5000, 10000, 20000)));
 
 //////////////////////////////////////////////////////////////////////
-// StereoConstantSpaceBP
+// ProjectPoints
 
-GPU_PERF_TEST_1(StereoConstantSpaceBP, cv::gpu::DeviceInfo)
+GPU_PERF_TEST(ProjectPoints, cv::gpu::DeviceInfo, Count)
 {
-    cv::gpu::DeviceInfo devInfo = GetParam();
-
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
     cv::gpu::setDevice(devInfo.deviceID());
 
-    cv::Mat img_l_host = readImage("gpu/stereobm/aloe-L.png", cv::IMREAD_GRAYSCALE);
-    cv::Mat img_r_host = readImage("gpu/stereobm/aloe-R.png", cv::IMREAD_GRAYSCALE);
+    int count = GET_PARAM(1);
 
-    ASSERT_FALSE(img_l_host.empty());
-    ASSERT_FALSE(img_r_host.empty());
+    cv::Mat src_host(1, count, CV_32FC3);
+    fill(src_host, -100, 100);
 
-    cv::gpu::GpuMat img_l(img_l_host);
-    cv::gpu::GpuMat img_r(img_r_host);
+    cv::gpu::GpuMat src(src_host);
+    cv::Mat rvec = cv::Mat::ones(1, 3, CV_32FC1);
+    cv::Mat tvec = cv::Mat::ones(1, 3, CV_32FC1);
+    cv::Mat camera_mat = cv::Mat::ones(3, 3, CV_32FC1);
     cv::gpu::GpuMat dst;
 
-    cv::gpu::StereoConstantSpaceBP bp(128);
+    cv::gpu::projectPoints(src, rvec, tvec, camera_mat, cv::Mat(), dst);
 
-    declare.time(10.0);
+    TEST_CYCLE()
+    {
+        cv::gpu::projectPoints(src, rvec, tvec, camera_mat, cv::Mat(), dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Calib3D, ProjectPoints, testing::Combine(
+    ALL_DEVICES,
+    testing::Values<Count>(5000, 10000, 20000)));
+
+//////////////////////////////////////////////////////////////////////
+// SolvePnPRansac
+
+GPU_PERF_TEST(SolvePnPRansac, cv::gpu::DeviceInfo, Count)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    int count = GET_PARAM(1);
+
+    cv::Mat object(1, count, CV_32FC3);
+    fill(object, -100, 100);
+
+    cv::Mat camera_mat(3, 3, CV_32FC1);
+    fill(camera_mat, 0.5, 1);
+    camera_mat.at<float>(0, 1) = 0.f;
+    camera_mat.at<float>(1, 0) = 0.f;
+    camera_mat.at<float>(2, 0) = 0.f;
+    camera_mat.at<float>(2, 1) = 0.f;
+
+    cv::Mat dist_coef(1, 8, CV_32F, cv::Scalar::all(0));
+
+    std::vector<cv::Point2f> image_vec;
+    cv::Mat rvec_gold(1, 3, CV_32FC1);
+    fill(rvec_gold, 0, 1);
+    cv::Mat tvec_gold(1, 3, CV_32FC1);
+    fill(tvec_gold, 0, 1);
+    cv::projectPoints(object, rvec_gold, tvec_gold, camera_mat, dist_coef, image_vec);
+
+    cv::Mat image(1, count, CV_32FC2, &image_vec[0]);
+
+    cv::Mat rvec;
+    cv::Mat tvec;
+
+    cv::gpu::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
+
+    declare.time(3.0);
 
     TEST_CYCLE()
     {
-        bp(img_l, img_r, dst);
+        cv::gpu::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(Calib3D, StereoConstantSpaceBP, ALL_DEVICES);
+INSTANTIATE_TEST_CASE_P(Calib3D, SolvePnPRansac, testing::Combine(
+    ALL_DEVICES,
+    testing::Values<Count>(5000, 10000, 20000)));
 
 //////////////////////////////////////////////////////////////////////
-// DisparityBilateralFilter
+// ReprojectImageTo3D
 
-GPU_PERF_TEST_1(DisparityBilateralFilter, cv::gpu::DeviceInfo)
+GPU_PERF_TEST(ReprojectImageTo3D, cv::gpu::DeviceInfo, cv::Size, MatDepth)
 {
-    cv::gpu::DeviceInfo devInfo = GetParam();
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src_host(size, depth);
+    fill(src_host, 5.0, 30.0);
+
+    cv::Mat Q(4, 4, CV_32FC1);
+    fill(Q, 0.1, 1.0);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::reprojectImageTo3D(src, dst, Q);
 
+    TEST_CYCLE()
+    {
+        cv::gpu::reprojectImageTo3D(src, dst, Q);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Calib3D, ReprojectImageTo3D, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16S)));
+
+//////////////////////////////////////////////////////////////////////
+// DrawColorDisp
+
+GPU_PERF_TEST(DrawColorDisp, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
     cv::gpu::setDevice(devInfo.deviceID());
 
-    cv::Mat img_host = readImage("gpu/stereobm/aloe-L.png", cv::IMREAD_GRAYSCALE);
-    cv::Mat disp_host = readImage("gpu/stereobm/aloe-disp.png", cv::IMREAD_GRAYSCALE);
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
 
-    ASSERT_FALSE(img_host.empty());
-    ASSERT_FALSE(disp_host.empty());
+    cv::Mat src_host(size, type);
+    fill(src_host, 0, 255);
 
-    cv::gpu::GpuMat img(img_host);
-    cv::gpu::GpuMat disp(disp_host);
+    cv::gpu::GpuMat src(src_host);
     cv::gpu::GpuMat dst;
 
-    cv::gpu::DisparityBilateralFilter f(128);
+    cv::gpu::drawColorDisp(src, dst, 255);
 
     TEST_CYCLE()
     {
-        f(disp, img, dst);
+        cv::gpu::drawColorDisp(src, dst, 255);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(Calib3D, DisparityBilateralFilter, ALL_DEVICES);
+INSTANTIATE_TEST_CASE_P(Calib3D, DrawColorDisp, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(MatDepth(CV_8U), MatDepth(CV_16S))));
 
 #endif
 
diff --git a/modules/gpu/perf/perf_core.cpp b/modules/gpu/perf/perf_core.cpp
new file mode 100644
index 0000000000..cbe7148443
--- /dev/null
+++ b/modules/gpu/perf/perf_core.cpp
@@ -0,0 +1,1881 @@
+#include "perf_precomp.hpp"
+
+#ifdef HAVE_CUDA
+
+//////////////////////////////////////////////////////////////////////
+// Merge
+
+GPU_PERF_TEST(Merge, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+    int channels = GET_PARAM(3);
+
+    std::vector<cv::gpu::GpuMat> src(channels);
+    for (int i = 0; i < channels; ++i)
+        src[i] = cv::gpu::GpuMat(size, depth, cv::Scalar::all(i));
+
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::merge(src, dst);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::merge(src, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Merge, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32F, CV_64F),
+    testing::Values<Channels>(2, 3, 4)));
+
+//////////////////////////////////////////////////////////////////////
+// Split
+
+GPU_PERF_TEST(Split, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+    int channels = GET_PARAM(3);
+
+    cv::gpu::GpuMat src(size, CV_MAKE_TYPE(depth, channels), cv::Scalar(1, 2, 3, 4));
+
+    std::vector<cv::gpu::GpuMat> dst;
+
+    cv::gpu::split(src, dst);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::split(src, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Split, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32F, CV_64F),
+    testing::Values<Channels>(2, 3, 4)));
+
+//////////////////////////////////////////////////////////////////////
+// Add_Mat
+
+GPU_PERF_TEST(Add_Mat, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src1_host(size, depth);
+    fill(src1_host, 0.0, 100.0);
+
+    cv::Mat src2_host(size, depth);
+    fill(src2_host, 0.0, 100.0);
+
+    cv::gpu::GpuMat src1(src1_host);
+    cv::gpu::GpuMat src2(src2_host);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::add(src1, src2, dst);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::add(src1, src2, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Add_Mat, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32F, CV_64F)));
+
+//////////////////////////////////////////////////////////////////////
+// Add_Scalar
+
+GPU_PERF_TEST(Add_Scalar, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src_host(size, depth);
+    fill(src_host, 0.0, 100.0);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::Scalar s(1, 2, 3, 4);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::add(src, s, dst);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::add(src, s, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Add_Scalar, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32F, CV_64F)));
+
+//////////////////////////////////////////////////////////////////////
+// Subtract_Mat
+
+GPU_PERF_TEST(Subtract_Mat, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src1_host(size, depth);
+    fill(src1_host, 0.0, 100.0);
+
+    cv::Mat src2_host(size, depth);
+    fill(src2_host, 0.0, 100.0);
+
+    cv::gpu::GpuMat src1(src1_host);
+    cv::gpu::GpuMat src2(src2_host);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::subtract(src1, src2, dst);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::subtract(src1, src2, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Subtract_Mat, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32F, CV_64F)));
+
+//////////////////////////////////////////////////////////////////////
+// Subtract_Scalar
+
+GPU_PERF_TEST(Subtract_Scalar, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src_host(size, depth);
+    fill(src_host, 0.0, 100.0);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::Scalar s(1, 2, 3, 4);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::subtract(src, s, dst);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::subtract(src, s, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Subtract_Scalar, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32F, CV_64F)));
+
+//////////////////////////////////////////////////////////////////////
+// Multiply_Mat
+
+GPU_PERF_TEST(Multiply_Mat, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src1_host(size, depth);
+    fill(src1_host, 0.0, 100.0);
+
+    cv::Mat src2_host(size, depth);
+    fill(src2_host, 0.0, 100.0);
+
+    cv::gpu::GpuMat src1(src1_host);
+    cv::gpu::GpuMat src2(src2_host);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::multiply(src1, src2, dst);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::multiply(src1, src2, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Multiply_Mat, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32F, CV_64F)));
+
+//////////////////////////////////////////////////////////////////////
+// Multiply_Scalar
+
+GPU_PERF_TEST(Multiply_Scalar, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src_host(size, depth);
+    fill(src_host, 0.0, 100.0);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::Scalar s(1, 2, 3, 4);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::multiply(src, s, dst);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::multiply(src, s, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Multiply_Scalar, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32F, CV_64F)));
+
+//////////////////////////////////////////////////////////////////////
+// Divide_Mat
+
+GPU_PERF_TEST(Divide_Mat, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src1_host(size, depth);
+    fill(src1_host, 0.0, 100.0);
+
+    cv::Mat src2_host(size, depth);
+    fill(src2_host, 0.0, 100.0);
+
+    cv::gpu::GpuMat src1(src1_host);
+    cv::gpu::GpuMat src2(src2_host);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::divide(src1, src2, dst);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::divide(src1, src2, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Divide_Mat, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32F, CV_64F)));
+
+//////////////////////////////////////////////////////////////////////
+// Divide_Scalar
+
+GPU_PERF_TEST(Divide_Scalar, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src_host(size, depth);
+    fill(src_host, 0.0, 100.0);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::Scalar s(1, 2, 3, 4);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::divide(src, s, dst);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::divide(src, s, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Divide_Scalar, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32F, CV_64F)));
+
+//////////////////////////////////////////////////////////////////////
+// Divide_Scalar_Inv
+
+GPU_PERF_TEST(Divide_Scalar_Inv, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src_host(size, depth);
+    fill(src_host, 0.0, 100.0);
+
+    cv::gpu::GpuMat src(src_host);
+    double scale = 100.0;
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::divide(scale, src, dst);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::divide(scale, src, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Divide_Scalar_Inv, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32F, CV_64F)));
+
+//////////////////////////////////////////////////////////////////////
+// AbsDiff_Mat
+
+GPU_PERF_TEST(AbsDiff_Mat, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src1_host(size, depth);
+    fill(src1_host, 0.0, 100.0);
+
+    cv::Mat src2_host(size, depth);
+    fill(src2_host, 0.0, 100.0);
+
+    cv::gpu::GpuMat src1(src1_host);
+    cv::gpu::GpuMat src2(src2_host);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::absdiff(src1, src2, dst);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::absdiff(src1, src2, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, AbsDiff_Mat, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32F, CV_64F)));
+
+//////////////////////////////////////////////////////////////////////
+// AbsDiff_Scalar
+
+GPU_PERF_TEST(AbsDiff_Scalar, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src_host(size, depth);
+    fill(src_host, 0.0, 100.0);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::Scalar s(1, 2, 3, 4);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::absdiff(src, s, dst);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::absdiff(src, s, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, AbsDiff_Scalar, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32F, CV_64F)));
+
+//////////////////////////////////////////////////////////////////////
+// Abs
+
+GPU_PERF_TEST(Abs, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src_host(size, depth);
+    fill(src_host, 0.0, 100.0);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::abs(src, dst);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::abs(src, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Abs, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_16S, CV_32F)));
+
+//////////////////////////////////////////////////////////////////////
+// Sqr
+
+GPU_PERF_TEST(Sqr, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src_host(size, depth);
+    fill(src_host, 0.0, 100.0);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::sqr(src, dst);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::sqr(src, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Sqr, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16S, CV_32F)));
+
+//////////////////////////////////////////////////////////////////////
+// Sqrt
+
+GPU_PERF_TEST(Sqrt, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src_host(size, depth);
+    fill(src_host, 0.0, 100.0);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::sqrt(src, dst);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::sqrt(src, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Sqrt, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16S, CV_32F)));
+
+//////////////////////////////////////////////////////////////////////
+// Log
+
+GPU_PERF_TEST(Log, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src_host(size, depth);
+    fill(src_host, 1.0, 100.0);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::log(src, dst);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::log(src, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Log, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16S, CV_32F)));
+
+//////////////////////////////////////////////////////////////////////
+// Exp
+
+GPU_PERF_TEST(Exp, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src_host(size, depth);
+    fill(src_host, 1.0, 10.0);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::exp(src, dst);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::exp(src, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Exp, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16S, CV_32F)));
+
+//////////////////////////////////////////////////////////////////////
+// Pow
+
+GPU_PERF_TEST(Pow, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src_host(size, depth);
+    fill(src_host, 1.0, 10.0);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::pow(src, 2.3, dst);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::pow(src, 2.3, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Pow, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16S, CV_32F)));
+
+//////////////////////////////////////////////////////////////////////
+// Compare_Mat
+
+CV_ENUM(CmpCode, cv::CMP_EQ, cv::CMP_GT, cv::CMP_GE, cv::CMP_LT, cv::CMP_LE, cv::CMP_NE)
+#define ALL_CMP_CODES testing::Values(CmpCode(cv::CMP_EQ), CmpCode(cv::CMP_NE), CmpCode(cv::CMP_GT), CmpCode(cv::CMP_GE), CmpCode(cv::CMP_LT), CmpCode(cv::CMP_LE))
+
+GPU_PERF_TEST(Compare_Mat, cv::gpu::DeviceInfo, cv::Size, MatDepth, CmpCode)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+    int cmp_code = GET_PARAM(3);
+
+    cv::Mat src1_host(size, depth);
+    fill(src1_host, 0, 100.0);
+
+    cv::Mat src2_host(size, depth);
+    fill(src2_host, 0, 100.0);
+
+    cv::gpu::GpuMat src1(src1_host);
+    cv::gpu::GpuMat src2(src2_host);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::compare(src1, src2, dst, cmp_code);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::compare(src1, src2, dst, cmp_code);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Compare_Mat, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32F, CV_64F),
+    ALL_CMP_CODES));
+
+//////////////////////////////////////////////////////////////////////
+// Compare_Scalar
+
+GPU_PERF_TEST(Compare_Scalar, cv::gpu::DeviceInfo, cv::Size, MatDepth, CmpCode)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+    int cmp_code = GET_PARAM(3);
+
+    cv::Mat src_host(size, depth);
+    fill(src_host, 0, 100.0);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::Scalar s = cv::Scalar::all(50);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::compare(src, s, dst, cmp_code);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::compare(src, s, dst, cmp_code);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Compare_Scalar, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32F, CV_64F),
+    ALL_CMP_CODES));
+
+//////////////////////////////////////////////////////////////////////
+// Bitwise_Not
+
+GPU_PERF_TEST(Bitwise_Not, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src_host(size, depth);
+    fill(src_host, 0, 100.0);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::bitwise_not(src, dst);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::bitwise_not(src, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Bitwise_Not, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32S)));
+
+//////////////////////////////////////////////////////////////////////
+// Bitwise_And_Mat
+
+GPU_PERF_TEST(Bitwise_And_Mat, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src1_host(size, depth);
+    fill(src1_host, 0, 100.0);
+
+    cv::Mat src2_host(size, depth);
+    fill(src2_host, 0, 100.0);
+
+    cv::gpu::GpuMat src1(src1_host);
+    cv::gpu::GpuMat src2(src2_host);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::bitwise_and(src1, src2, dst);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::bitwise_and(src1, src2, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Bitwise_And_Mat, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32S)));
+
+//////////////////////////////////////////////////////////////////////
+// Bitwise_And_Scalar
+
+GPU_PERF_TEST(Bitwise_And_Scalar, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+    int channels = GET_PARAM(3);
+
+    int type = CV_MAKE_TYPE(depth, channels);
+
+    cv::Mat src_host(size, type);
+    fill(src_host, 0, 100.0);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::Scalar s = cv::Scalar(50, 50, 50, 50);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::bitwise_and(src, s, dst);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::bitwise_and(src, s, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Bitwise_And_Scalar, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32S),
+    testing::Values<Channels>(1, 3, 4)));
+
+//////////////////////////////////////////////////////////////////////
+// Bitwise_Or_Mat
+
+GPU_PERF_TEST(Bitwise_Or_Mat, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src1_host(size, depth);
+    fill(src1_host, 0, 100.0);
+
+    cv::Mat src2_host(size, depth);
+    fill(src2_host, 0, 100.0);
+
+    cv::gpu::GpuMat src1(src1_host);
+    cv::gpu::GpuMat src2(src2_host);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::bitwise_or(src1, src2, dst);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::bitwise_or(src1, src2, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Bitwise_Or_Mat, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32S)));
+
+//////////////////////////////////////////////////////////////////////
+// Bitwise_Or_Scalar
+
+GPU_PERF_TEST(Bitwise_Or_Scalar, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+    int channels = GET_PARAM(3);
+
+    int type = CV_MAKE_TYPE(depth, channels);
+
+    cv::Mat src_host(size, type);
+    fill(src_host, 0, 100.0);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::Scalar s = cv::Scalar(50, 50, 50, 50);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::bitwise_or(src, s, dst);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::bitwise_or(src, s, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Bitwise_Or_Scalar, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32S),
+    testing::Values<Channels>(1, 3, 4)));
+
+//////////////////////////////////////////////////////////////////////
+// Bitwise_Xor_Mat
+
+GPU_PERF_TEST(Bitwise_Xor_Mat, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src1_host(size, depth);
+    fill(src1_host, 0, 100.0);
+
+    cv::Mat src2_host(size, depth);
+    fill(src2_host, 0, 100.0);
+
+    cv::gpu::GpuMat src1(src1_host);
+    cv::gpu::GpuMat src2(src2_host);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::bitwise_xor(src1, src2, dst);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::bitwise_xor(src1, src2, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Bitwise_Xor_Mat, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32S)));
+
+//////////////////////////////////////////////////////////////////////
+// Bitwise_Xor_Scalar
+
+GPU_PERF_TEST(Bitwise_Xor_Scalar, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+    int channels = GET_PARAM(3);
+
+    int type = CV_MAKE_TYPE(depth, channels);
+
+    cv::Mat src_host(size, type);
+    fill(src_host, 0, 100.0);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::Scalar s = cv::Scalar(50, 50, 50, 50);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::bitwise_xor(src, s, dst);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::bitwise_xor(src, s, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Bitwise_Xor_Scalar, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32S),
+    testing::Values<Channels>(1, 3, 4)));
+
+//////////////////////////////////////////////////////////////////////
+// RShift
+
+GPU_PERF_TEST(RShift, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+    int channels = GET_PARAM(3);
+
+    int type = CV_MAKE_TYPE(depth, channels);
+
+    cv::Mat src_host(size, type);
+    fill(src_host, 0, 255.0);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::Scalar_<int> val = cv::Scalar_<int>::all(4);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::rshift(src, val, dst);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::rshift(src, val, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, RShift, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32S),
+    testing::Values<Channels>(1, 3, 4)));
+
+//////////////////////////////////////////////////////////////////////
+// LShift
+
+GPU_PERF_TEST(LShift, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+    int channels = GET_PARAM(3);
+
+    int type = CV_MAKE_TYPE(depth, channels);
+
+    cv::Mat src_host(size, type);
+    fill(src_host, 0, 255.0);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::Scalar_<int> val = cv::Scalar_<int>::all(4);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::lshift(src, val, dst);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::lshift(src, val, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, LShift, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32S),
+    testing::Values<Channels>(1, 3, 4)));
+
+//////////////////////////////////////////////////////////////////////
+// Min_Mat
+
+GPU_PERF_TEST(Min_Mat, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src1_host(size, depth);
+    fill(src1_host, 0, 255.0);
+
+    cv::Mat src2_host(size, depth);
+    fill(src2_host, 0, 255.0);
+
+    cv::gpu::GpuMat src1(src1_host);
+    cv::gpu::GpuMat src2(src2_host);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::min(src1, src2, dst);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::min(src1, src2, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Min_Mat, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32F)));
+
+//////////////////////////////////////////////////////////////////////
+// Min_Scalar
+
+GPU_PERF_TEST(Min_Scalar, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src_host(size, depth);
+    fill(src_host, 0, 255.0);
+
+    cv::gpu::GpuMat src(src_host);
+    double val = 50.0;
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::min(src, val, dst);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::min(src, val, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Min_Scalar, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32F)));
+
+//////////////////////////////////////////////////////////////////////
+// Max_Mat
+
+GPU_PERF_TEST(Max_Mat, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src1_host(size, depth);
+    fill(src1_host, 0, 255.0);
+
+    cv::Mat src2_host(size, depth);
+    fill(src2_host, 0, 255.0);
+
+    cv::gpu::GpuMat src1(src1_host);
+    cv::gpu::GpuMat src2(src2_host);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::max(src1, src2, dst);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::max(src1, src2, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Max_Mat, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32F)));
+
+//////////////////////////////////////////////////////////////////////
+// Max_Scalar
+
+GPU_PERF_TEST(Max_Scalar, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src_host(size, depth);
+    fill(src_host, 0, 255.0);
+
+    cv::gpu::GpuMat src(src_host);
+    double val = 50.0;
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::max(src, val, dst);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::max(src, val, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Max_Scalar, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32F)));
+
+//////////////////////////////////////////////////////////////////////
+// AddWeighted
+
+GPU_PERF_TEST(AddWeighted, cv::gpu::DeviceInfo, cv::Size, MatDepth, MatDepth, MatDepth)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int depth1 = GET_PARAM(2);
+    int depth2 = GET_PARAM(3);
+    int dst_depth = GET_PARAM(4);
+
+    cv::Mat src1_host(size, depth1);
+    fill(src1_host, 0, 100.0);
+
+    cv::Mat src2_host(size, depth2);
+    fill(src2_host, 0, 100.0);
+
+    cv::gpu::GpuMat src1(src1_host);
+    cv::gpu::GpuMat src2(src2_host);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::addWeighted(src1, 0.5, src2, 0.5, 10.0, dst, dst_depth);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::addWeighted(src1, 0.5, src2, 0.5, 10.0, dst, dst_depth);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, AddWeighted, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32F, CV_64F),
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32F, CV_64F),
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32F, CV_64F)));
+
+//////////////////////////////////////////////////////////////////////
+// GEMM
+
+CV_FLAGS(GemmFlags, 0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_3_T)
+#define ALL_GEMM_FLAGS testing::Values(GemmFlags(0), GemmFlags(cv::GEMM_1_T), GemmFlags(cv::GEMM_2_T), GemmFlags(cv::GEMM_3_T), GemmFlags(cv::GEMM_1_T | cv::GEMM_2_T), GemmFlags(cv::GEMM_1_T | cv::GEMM_3_T), GemmFlags(cv::GEMM_1_T | cv::GEMM_2_T | cv::GEMM_3_T))
+
+GPU_PERF_TEST(GEMM, cv::gpu::DeviceInfo, cv::Size, MatType, GemmFlags)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+    int flags = GET_PARAM(3);
+
+    cv::Mat src1_host(size, type);
+    fill(src1_host, 0.0, 10.0);
+
+    cv::Mat src2_host(size, type);
+    fill(src2_host, 0.0, 10.0);
+
+    cv::Mat src3_host(size, type);
+    fill(src3_host, 0.0, 10.0);
+
+    cv::gpu::GpuMat src1(src1_host);
+    cv::gpu::GpuMat src2(src2_host);
+    cv::gpu::GpuMat src3(src3_host);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::gemm(src1, src2, 1.0, src3, 1.0, dst, flags);
+
+    declare.time(5.0);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::gemm(src1, src2, 1.0, src3, 1.0, dst, flags);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, GEMM, testing::Combine(
+    ALL_DEVICES,
+    testing::Values(cv::Size(512, 512), cv::Size(1024, 1024)),
+    testing::Values<MatType>(CV_32FC1, CV_32FC2, CV_64FC1, CV_64FC2),
+    ALL_GEMM_FLAGS));
+
+//////////////////////////////////////////////////////////////////////
+// Transpose
+
+GPU_PERF_TEST(Transpose, cv::gpu::DeviceInfo, cv::Size, MatType)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+
+    cv::Mat src_host(size, type);
+    fill(src_host, 0.0, 100.0);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::transpose(src, dst);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::transpose(src, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Transpose, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatType>(CV_8UC1, CV_8UC4, CV_16UC2, CV_16SC2, CV_32SC1, CV_32SC2, CV_64FC1)));
+
+//////////////////////////////////////////////////////////////////////
+// Flip
+
+enum {FLIP_BOTH = 0, FLIP_X = 1, FLIP_Y = -1};
+CV_ENUM(FlipCode, FLIP_BOTH, FLIP_X, FLIP_Y)
+#define ALL_FLIP_CODES testing::Values(FlipCode(FLIP_BOTH), FlipCode(FLIP_X), FlipCode(FLIP_Y))
+
+GPU_PERF_TEST(Flip, cv::gpu::DeviceInfo, cv::Size, MatType, FlipCode)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+    int flipCode = GET_PARAM(3);
+
+    cv::Mat src_host(size, type);
+    fill(src_host, 0.0, 100.0);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::flip(src, dst, flipCode);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::flip(src, dst, flipCode);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Flip, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatType>(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32FC1, CV_32FC3, CV_32FC4),
+    ALL_FLIP_CODES));
+
+//////////////////////////////////////////////////////////////////////
+// LUT_OneChannel
+
+GPU_PERF_TEST(LUT_OneChannel, cv::gpu::DeviceInfo, cv::Size, MatType)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+
+    cv::Mat src_host(size, type);
+    fill(src_host, 0.0, 100.0);
+
+    cv::Mat lut(1, 256, CV_8UC1);
+    fill(lut, 0.0, 100.0);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::LUT(src, lut, dst);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::LUT(src, lut, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, LUT_OneChannel, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatType>(CV_8UC1, CV_8UC3)));
+
+//////////////////////////////////////////////////////////////////////
+// LUT_MultiChannel
+
+GPU_PERF_TEST(LUT_MultiChannel, cv::gpu::DeviceInfo, cv::Size, MatType)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+
+    cv::Mat src_host(size, type);
+    fill(src_host, 0.0, 100.0);
+
+    cv::Mat lut(1, 256, CV_MAKE_TYPE(CV_8U, src_host.channels()));
+    fill(lut, 0.0, 100.0);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::LUT(src, lut, dst);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::LUT(src, lut, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, LUT_MultiChannel, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatType>(CV_8UC3)));
+
+//////////////////////////////////////////////////////////////////////
+// Magnitude_Complex
+
+GPU_PERF_TEST(Magnitude_Complex, cv::gpu::DeviceInfo, cv::Size)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+
+    cv::Mat src_host(size, CV_32FC2);
+    fill(src_host, -100.0, 100.0);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::magnitude(src, dst);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::magnitude(src, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Magnitude_Complex, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES));
+
+//////////////////////////////////////////////////////////////////////
+// Magnitude_Sqr_Complex
+
+GPU_PERF_TEST(Magnitude_Sqr_Complex, cv::gpu::DeviceInfo, cv::Size)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+
+    cv::Mat src_host(size, CV_32FC2);
+    fill(src_host, -100.0, 100.0);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::magnitudeSqr(src, dst);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::magnitudeSqr(src, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Magnitude_Sqr_Complex, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES));
+
+//////////////////////////////////////////////////////////////////////
+// Magnitude
+
+GPU_PERF_TEST(Magnitude, cv::gpu::DeviceInfo, cv::Size)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+
+    cv::Mat src1_host(size, CV_32FC1);
+    fill(src1_host, -100.0, 100.0);
+
+    cv::Mat src2_host(size, CV_32FC1);
+    fill(src2_host, -100.0, 100.0);
+
+    cv::gpu::GpuMat src1(src1_host);
+    cv::gpu::GpuMat src2(src2_host);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::magnitude(src1, src2, dst);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::magnitude(src1, src2, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Magnitude, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES));
+
+//////////////////////////////////////////////////////////////////////
+// Magnitude_Sqr
+
+GPU_PERF_TEST(Magnitude_Sqr, cv::gpu::DeviceInfo, cv::Size)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+
+    cv::Mat src1_host(size, CV_32FC1);
+    fill(src1_host, -100.0, 100.0);
+
+    cv::Mat src2_host(size, CV_32FC1);
+    fill(src2_host, -100.0, 100.0);
+
+    cv::gpu::GpuMat src1(src1_host);
+    cv::gpu::GpuMat src2(src2_host);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::magnitudeSqr(src1, src2, dst);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::magnitudeSqr(src1, src2, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Magnitude_Sqr, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES));
+
+//////////////////////////////////////////////////////////////////////
+// Phase
+
+IMPLEMENT_PARAM_CLASS(AngleInDegrees, bool)
+
+GPU_PERF_TEST(Phase, cv::gpu::DeviceInfo, cv::Size, AngleInDegrees)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    bool angleInDegrees = GET_PARAM(2);
+
+    cv::Mat src1_host(size, CV_32FC1);
+    fill(src1_host, -100.0, 100.0);
+
+    cv::Mat src2_host(size, CV_32FC1);
+    fill(src2_host, -100.0, 100.0);
+
+    cv::gpu::GpuMat src1(src1_host);
+    cv::gpu::GpuMat src2(src2_host);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::phase(src1, src2, dst, angleInDegrees);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::phase(src1, src2, dst, angleInDegrees);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Phase, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<AngleInDegrees>(false, true)));
+
+//////////////////////////////////////////////////////////////////////
+// CartToPolar
+
+GPU_PERF_TEST(CartToPolar, cv::gpu::DeviceInfo, cv::Size, AngleInDegrees)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    bool angleInDegrees = GET_PARAM(2);
+
+    cv::Mat src1_host(size, CV_32FC1);
+    fill(src1_host, -100.0, 100.0);
+
+    cv::Mat src2_host(size, CV_32FC1);
+    fill(src2_host, -100.0, 100.0);
+
+    cv::gpu::GpuMat src1(src1_host);
+    cv::gpu::GpuMat src2(src2_host);
+    cv::gpu::GpuMat magnitude;
+    cv::gpu::GpuMat angle;
+
+    cv::gpu::cartToPolar(src1, src2, magnitude, angle, angleInDegrees);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::cartToPolar(src1, src2, magnitude, angle, angleInDegrees);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, CartToPolar, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<AngleInDegrees>(false, true)));
+
+//////////////////////////////////////////////////////////////////////
+// PolarToCart
+
+GPU_PERF_TEST(PolarToCart, cv::gpu::DeviceInfo, cv::Size, AngleInDegrees)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    bool angleInDegrees = GET_PARAM(2);
+
+    cv::Mat magnitude_host(size, CV_32FC1);
+    fill(magnitude_host, 0.0, 100.0);
+
+    cv::Mat angle_host(size, CV_32FC1);
+    fill(angle_host, 0.0, angleInDegrees ? 360.0 : 2 * CV_PI);
+
+    cv::gpu::GpuMat magnitude(magnitude_host);
+    cv::gpu::GpuMat angle(angle_host);
+    cv::gpu::GpuMat x;
+    cv::gpu::GpuMat y;
+
+    cv::gpu::polarToCart(magnitude, angle, x, y, angleInDegrees);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::polarToCart(magnitude, angle, x, y, angleInDegrees);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, PolarToCart, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<AngleInDegrees>(false, true)));
+
+//////////////////////////////////////////////////////////////////////
+// MeanStdDev
+
+GPU_PERF_TEST(MeanStdDev, cv::gpu::DeviceInfo, cv::Size)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+
+    cv::Mat src_host(size, CV_8UC1);
+    fill(src_host, 0.0, 255.0);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::Scalar mean;
+    cv::Scalar stddev;
+    cv::gpu::GpuMat buf;
+
+    cv::gpu::meanStdDev(src, mean, stddev, buf);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::meanStdDev(src, mean, stddev, buf);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, MeanStdDev, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES));
+
+//////////////////////////////////////////////////////////////////////
+// Norm
+
+GPU_PERF_TEST(Norm, cv::gpu::DeviceInfo, cv::Size, MatDepth, NormType)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+    int normType = GET_PARAM(3);
+
+    cv::Mat src_host(size, depth);
+    fill(src_host, 0.0, 255.0);
+
+    cv::gpu::GpuMat src(src_host);
+    double dst;
+    cv::gpu::GpuMat buf;
+
+    dst = cv::gpu::norm(src, normType, buf);
+
+    TEST_CYCLE()
+    {
+        dst = cv::gpu::norm(src, normType, buf);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Norm, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32S, CV_32F),
+    testing::Values(NormType(cv::NORM_INF), NormType(cv::NORM_L1), NormType(cv::NORM_L2))));
+
+//////////////////////////////////////////////////////////////////////
+// NormDiff
+
+GPU_PERF_TEST(NormDiff, cv::gpu::DeviceInfo, cv::Size, NormType)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int normType = GET_PARAM(2);
+
+    cv::Mat src1_host(size, CV_8UC1);
+    fill(src1_host, 0.0, 255.0);
+
+    cv::Mat src2_host(size, CV_8UC1);
+    fill(src2_host, 0.0, 255.0);
+
+    cv::gpu::GpuMat src1(src1_host);
+    cv::gpu::GpuMat src2(src2_host);
+    double dst;
+
+    dst = cv::gpu::norm(src1, src2, normType);
+
+    TEST_CYCLE()
+    {
+        dst = cv::gpu::norm(src1, src2, normType);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, NormDiff, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(NormType(cv::NORM_INF), NormType(cv::NORM_L1), NormType(cv::NORM_L2))));
+
+//////////////////////////////////////////////////////////////////////
+// Sum
+
+GPU_PERF_TEST(Sum, cv::gpu::DeviceInfo, cv::Size, MatType)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+
+    cv::Mat src_host(size, type);
+    fill(src_host, 0.0, 255.0);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::Scalar dst;
+    cv::gpu::GpuMat buf;
+
+    dst = cv::gpu::sum(src, buf);
+
+    TEST_CYCLE()
+    {
+        dst = cv::gpu::sum(src, buf);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Sum, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatType>(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32FC1, CV_32FC3, CV_32FC4)));
+
+//////////////////////////////////////////////////////////////////////
+// Sum_Abs
+
+GPU_PERF_TEST(Sum_Abs, cv::gpu::DeviceInfo, cv::Size, MatType)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+
+    cv::Mat src_host(size, type);
+    fill(src_host, 0.0, 255.0);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::Scalar dst;
+    cv::gpu::GpuMat buf;
+
+    dst = cv::gpu::absSum(src, buf);
+
+    TEST_CYCLE()
+    {
+        dst = cv::gpu::absSum(src, buf);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Sum_Abs, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatType>(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32FC1, CV_32FC3, CV_32FC4)));
+
+//////////////////////////////////////////////////////////////////////
+// Sum_Sqr
+
+GPU_PERF_TEST(Sum_Sqr, cv::gpu::DeviceInfo, cv::Size, MatType)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+
+    cv::Mat src_host(size, type);
+    fill(src_host, 0.0, 255.0);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::Scalar dst;
+    cv::gpu::GpuMat buf;
+
+    dst = cv::gpu::sqrSum(src, buf);
+
+    TEST_CYCLE()
+    {
+        dst = cv::gpu::sqrSum(src, buf);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Sum_Sqr, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatType>(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32FC1, CV_32FC3, CV_32FC4)));
+
+//////////////////////////////////////////////////////////////////////
+// MinMax
+
+GPU_PERF_TEST(MinMax, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src_host(size, depth);
+    fill(src_host, 0.0, 255.0);
+
+    cv::gpu::GpuMat src(src_host);
+    double minVal, maxVal;
+    cv::gpu::GpuMat buf;
+
+    cv::gpu::minMax(src, &minVal, &maxVal, cv::gpu::GpuMat(), buf);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::minMax(src, &minVal, &maxVal, cv::gpu::GpuMat(), buf);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, MinMax, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32F, CV_64F)));
+
+//////////////////////////////////////////////////////////////////////
+// MinMaxLoc
+
+GPU_PERF_TEST(MinMaxLoc, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src_host(size, depth);
+    fill(src_host, 0.0, 255.0);
+
+    cv::gpu::GpuMat src(src_host);
+    double minVal, maxVal;
+    cv::Point minLoc, maxLoc;
+    cv::gpu::GpuMat valbuf, locbuf;
+
+    cv::gpu::minMaxLoc(src, &minVal, &maxVal, &minLoc, &maxLoc, cv::gpu::GpuMat(), valbuf, locbuf);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::minMaxLoc(src, &minVal, &maxVal, &minLoc, &maxLoc, cv::gpu::GpuMat(), valbuf, locbuf);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, MinMaxLoc, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32F, CV_64F)));
+
+//////////////////////////////////////////////////////////////////////
+// CountNonZero
+
+GPU_PERF_TEST(CountNonZero, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src_host(size, depth);
+    fill(src_host, 0.0, 1.5);
+
+    cv::gpu::GpuMat src(src_host);
+    int dst;
+    cv::gpu::GpuMat buf;
+
+    dst = cv::gpu::countNonZero(src, buf);
+
+    TEST_CYCLE()
+    {
+        dst = cv::gpu::countNonZero(src, buf);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, CountNonZero, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32F, CV_64F)));
+
+//////////////////////////////////////////////////////////////////////
+// Reduce
+
+CV_ENUM(ReduceCode, CV_REDUCE_SUM, CV_REDUCE_AVG, CV_REDUCE_MAX, CV_REDUCE_MIN)
+#define ALL_REDUCE_CODES testing::Values<ReduceCode>(CV_REDUCE_SUM, CV_REDUCE_AVG, CV_REDUCE_MAX, CV_REDUCE_MIN)
+
+enum {Rows = 0, Cols = 1};
+CV_ENUM(ReduceDim, Rows, Cols)
+
+GPU_PERF_TEST(Reduce, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels, ReduceCode, ReduceDim)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+    int channels = GET_PARAM(3);
+    int reduceOp = GET_PARAM(4);
+    int dim = GET_PARAM(5);
+
+    int type = CV_MAKE_TYPE(depth, channels);
+
+    cv::Mat src_host(size, type);
+    fill(src_host, 0.0, 10.0);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::reduce(src, dst, dim, reduceOp);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::reduce(src, dst, dim, reduceOp);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Reduce, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_16S, CV_32F),
+    testing::Values<Channels>(1, 2, 3, 4),
+    ALL_REDUCE_CODES,
+    testing::Values(ReduceDim(Rows), ReduceDim(Cols))));
+
+#endif
diff --git a/modules/gpu/perf/perf_features2d.cpp b/modules/gpu/perf/perf_features2d.cpp
index 18e1497871..8a81860176 100644
--- a/modules/gpu/perf/perf_features2d.cpp
+++ b/modules/gpu/perf/perf_features2d.cpp
@@ -3,185 +3,207 @@
 #ifdef HAVE_CUDA
 
 //////////////////////////////////////////////////////////////////////
-// BruteForceMatcher_match
+// SURF
 
-GPU_PERF_TEST(BruteForceMatcher_match, cv::gpu::DeviceInfo, int)
+GPU_PERF_TEST_1(SURF, cv::gpu::DeviceInfo)
 {
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    int desc_size = GET_PARAM(1);
-
+    cv::gpu::DeviceInfo devInfo = GetParam();
     cv::gpu::setDevice(devInfo.deviceID());
 
-    cv::Mat query_host(3000, desc_size, CV_32FC1);
-    cv::Mat train_host(3000, desc_size, CV_32FC1);
+    cv::Mat img_host = readImage("gpu/perf/aloe.jpg", cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(img_host.empty());
 
-    declare.in(query_host, train_host, WARMUP_RNG);
+    cv::gpu::SURF_GPU surf;
 
-    cv::gpu::GpuMat query(query_host);
-    cv::gpu::GpuMat train(train_host);
-    cv::gpu::GpuMat trainIdx, distance;
+    cv::gpu::GpuMat img(img_host);
+    cv::gpu::GpuMat keypoints, descriptors;
 
-    cv::gpu::BFMatcher_GPU matcher(cv::NORM_L2);
+    surf(img, cv::gpu::GpuMat(), keypoints, descriptors);
 
-    declare.time(3.0);
+    declare.time(2.0);
 
     TEST_CYCLE()
     {
-        matcher.matchSingle(query, train, trainIdx, distance);
+        surf(img, cv::gpu::GpuMat(), keypoints, descriptors);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(Features2D, BruteForceMatcher_match, testing::Combine(
-                        ALL_DEVICES, 
-                        testing::Values(64, 128, 256)));
+INSTANTIATE_TEST_CASE_P(Features2D, SURF, ALL_DEVICES);
 
 //////////////////////////////////////////////////////////////////////
-// BruteForceMatcher_knnMatch
+// FAST
 
-GPU_PERF_TEST(BruteForceMatcher_knnMatch, cv::gpu::DeviceInfo, int, int)
+GPU_PERF_TEST_1(FAST, cv::gpu::DeviceInfo)
 {
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    int desc_size = GET_PARAM(1);
-    int k = GET_PARAM(2);
-
+    cv::gpu::DeviceInfo devInfo = GetParam();
     cv::gpu::setDevice(devInfo.deviceID());
 
-    cv::Mat query_host(3000, desc_size, CV_32FC1);
-    cv::Mat train_host(3000, desc_size, CV_32FC1);
+    cv::Mat img_host = readImage("gpu/perf/aloe.jpg", cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(img_host.empty());
 
-    declare.in(query_host, train_host, WARMUP_RNG);
+    cv::gpu::FAST_GPU fast(20);
 
-    cv::gpu::GpuMat query(query_host);
-    cv::gpu::GpuMat train(train_host);
-    cv::gpu::GpuMat trainIdx, distance, allDist;
-
-    cv::gpu::BFMatcher_GPU matcher(cv::NORM_L2);
+    cv::gpu::GpuMat img(img_host);
+    cv::gpu::GpuMat keypoints;
 
-    declare.time(3.0);
+    fast(img, cv::gpu::GpuMat(), keypoints);
 
     TEST_CYCLE()
     {
-        matcher.knnMatchSingle(query, train, trainIdx, distance, allDist, k);
+        fast(img, cv::gpu::GpuMat(), keypoints);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(Features2D, BruteForceMatcher_knnMatch, testing::Combine(
-                        ALL_DEVICES, 
-                        testing::Values(64, 128, 256),
-                        testing::Values(2, 3)));
+INSTANTIATE_TEST_CASE_P(Features2D, FAST, ALL_DEVICES);
 
 //////////////////////////////////////////////////////////////////////
-// BruteForceMatcher_radiusMatch
+// ORB
 
-GPU_PERF_TEST(BruteForceMatcher_radiusMatch, cv::gpu::DeviceInfo, int)
+GPU_PERF_TEST_1(ORB, cv::gpu::DeviceInfo)
 {
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    int desc_size = GET_PARAM(1);
-
+    cv::gpu::DeviceInfo devInfo = GetParam();
     cv::gpu::setDevice(devInfo.deviceID());
 
-    cv::Mat query_host(3000, desc_size, CV_32FC1);
-    cv::Mat train_host(3000, desc_size, CV_32FC1);
-
-    fill(query_host, 0, 1);
-    fill(train_host, 0, 1);
-
-    cv::gpu::GpuMat query(query_host);
-    cv::gpu::GpuMat train(train_host);
-    cv::gpu::GpuMat trainIdx, nMatches, distance;
+    cv::Mat img_host = readImage("gpu/perf/aloe.jpg", cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(img_host.empty());
 
-    cv::gpu::BFMatcher_GPU matcher(cv::NORM_L2);
+    cv::gpu::ORB_GPU orb(4000);
 
-    declare.time(3.0);
+    cv::gpu::GpuMat img(img_host);
+    cv::gpu::GpuMat keypoints, descriptors;
 
     TEST_CYCLE()
     {
-        matcher.radiusMatchSingle(query, train, trainIdx, distance, nMatches, 2.0);
+        orb(img, cv::gpu::GpuMat(), keypoints, descriptors);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(Features2D, BruteForceMatcher_radiusMatch, testing::Combine(
-                        ALL_DEVICES, 
-                        testing::Values(64, 128, 256)));
+INSTANTIATE_TEST_CASE_P(Features2D, ORB, ALL_DEVICES);
 
 //////////////////////////////////////////////////////////////////////
-// SURF
+// BruteForceMatcher_match
 
-GPU_PERF_TEST_1(SURF, cv::gpu::DeviceInfo)
-{
-    cv::gpu::DeviceInfo devInfo = GetParam();
+IMPLEMENT_PARAM_CLASS(DescriptorSize, int)
 
+GPU_PERF_TEST(BruteForceMatcher_match, cv::gpu::DeviceInfo, DescriptorSize, NormType)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
     cv::gpu::setDevice(devInfo.deviceID());
 
-    cv::Mat img_host = readImage("gpu/perf/aloe.jpg", cv::IMREAD_GRAYSCALE);
+    int desc_size = GET_PARAM(1);
+    int normType = GET_PARAM(2);
 
-    ASSERT_FALSE(img_host.empty());
+    int type = normType == cv::NORM_HAMMING ? CV_8U : CV_32F;
 
-    cv::gpu::GpuMat img(img_host);
-    cv::gpu::GpuMat keypoints, descriptors;
+    cv::Mat query_host(3000, desc_size, type);
+    fill(query_host, 0.0, 10.0);
 
-    cv::gpu::SURF_GPU surf;
+    cv::Mat train_host(3000, desc_size, type);
+    fill(train_host, 0.0, 10.0);
 
-    declare.time(2.0);
+    cv::gpu::BFMatcher_GPU matcher(normType);
+
+    cv::gpu::GpuMat query(query_host);
+    cv::gpu::GpuMat train(train_host);
+    cv::gpu::GpuMat trainIdx, distance;
+
+    matcher.matchSingle(query, train, trainIdx, distance);
+
+    declare.time(3.0);
 
     TEST_CYCLE()
     {
-        surf(img, cv::gpu::GpuMat(), keypoints, descriptors);
+        matcher.matchSingle(query, train, trainIdx, distance);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(Features2D, SURF, DEVICES(cv::gpu::GLOBAL_ATOMICS));
+INSTANTIATE_TEST_CASE_P(Features2D, BruteForceMatcher_match, testing::Combine(
+    ALL_DEVICES,
+    testing::Values(DescriptorSize(64), DescriptorSize(128), DescriptorSize(256)),
+    testing::Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING))));
 
 //////////////////////////////////////////////////////////////////////
-// FAST
+// BruteForceMatcher_knnMatch
 
-GPU_PERF_TEST_1(FAST, cv::gpu::DeviceInfo)
-{
-    cv::gpu::DeviceInfo devInfo = GetParam();
+IMPLEMENT_PARAM_CLASS(K, int)
 
+GPU_PERF_TEST(BruteForceMatcher_knnMatch, cv::gpu::DeviceInfo, DescriptorSize, K, NormType)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
     cv::gpu::setDevice(devInfo.deviceID());
 
-    cv::Mat img_host = readImage("gpu/perf/aloe.jpg", cv::IMREAD_GRAYSCALE);
+    int desc_size = GET_PARAM(1);
+    int k = GET_PARAM(2);
+    int normType = GET_PARAM(3);
 
-    ASSERT_FALSE(img_host.empty());
+    int type = normType == cv::NORM_HAMMING ? CV_8U : CV_32F;
 
-    cv::gpu::GpuMat img(img_host);
-    cv::gpu::GpuMat keypoints, descriptors;
+    cv::Mat query_host(3000, desc_size, type);
+    fill(query_host, 0.0, 10.0);
 
-    cv::gpu::FAST_GPU fastGPU(20);
+    cv::Mat train_host(3000, desc_size, type);
+    fill(train_host, 0.0, 10.0);
+
+    cv::gpu::BFMatcher_GPU matcher(normType);
+
+    cv::gpu::GpuMat query(query_host);
+    cv::gpu::GpuMat train(train_host);
+    cv::gpu::GpuMat trainIdx, distance, allDist;
+
+    matcher.knnMatchSingle(query, train, trainIdx, distance, allDist, k);
+
+    declare.time(3.0);
 
     TEST_CYCLE()
     {
-        fastGPU(img, cv::gpu::GpuMat(), keypoints);
+        matcher.knnMatchSingle(query, train, trainIdx, distance, allDist, k);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(Features2D, FAST, DEVICES(cv::gpu::GLOBAL_ATOMICS));
+INSTANTIATE_TEST_CASE_P(Features2D, BruteForceMatcher_knnMatch, testing::Combine(
+    ALL_DEVICES,
+    testing::Values(DescriptorSize(64), DescriptorSize(128), DescriptorSize(256)),
+    testing::Values(K(2), K(3)),
+    testing::Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING))));
 
 //////////////////////////////////////////////////////////////////////
-// ORB
+// BruteForceMatcher_radiusMatch
 
-GPU_PERF_TEST_1(ORB, cv::gpu::DeviceInfo)
+GPU_PERF_TEST(BruteForceMatcher_radiusMatch, cv::gpu::DeviceInfo, DescriptorSize, NormType)
 {
-    cv::gpu::DeviceInfo devInfo = GetParam();
-
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
     cv::gpu::setDevice(devInfo.deviceID());
 
-    cv::Mat img_host = readImage("gpu/perf/aloe.jpg", cv::IMREAD_GRAYSCALE);
+    int desc_size = GET_PARAM(1);
+    int normType = GET_PARAM(2);
 
-    ASSERT_FALSE(img_host.empty());
+    int type = normType == cv::NORM_HAMMING ? CV_8U : CV_32F;
 
-    cv::gpu::GpuMat img(img_host);
-    cv::gpu::GpuMat keypoints, descriptors;
+    cv::Mat query_host(3000, desc_size, type);
+    fill(query_host, 0.0, 1.0);
+
+    cv::Mat train_host(3000, desc_size, type);
+    fill(train_host, 0.0, 1.0);
 
-    cv::gpu::ORB_GPU orbGPU(4000);
+    cv::gpu::BFMatcher_GPU matcher(normType);
+
+    cv::gpu::GpuMat query(query_host);
+    cv::gpu::GpuMat train(train_host);
+    cv::gpu::GpuMat trainIdx, nMatches, distance;
+
+    matcher.radiusMatchSingle(query, train, trainIdx, distance, nMatches, 2.0);
+
+    declare.time(3.0);
 
     TEST_CYCLE()
     {
-        orbGPU(img, cv::gpu::GpuMat(), keypoints, descriptors);
+        matcher.radiusMatchSingle(query, train, trainIdx, distance, nMatches, 2.0);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(Features2D, ORB, DEVICES(cv::gpu::GLOBAL_ATOMICS));
+INSTANTIATE_TEST_CASE_P(Features2D, BruteForceMatcher_radiusMatch, testing::Combine(
+    ALL_DEVICES,
+    testing::Values(DescriptorSize(64), DescriptorSize(128), DescriptorSize(256)),
+    testing::Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING))));
 
 #endif
diff --git a/modules/gpu/perf/perf_filters.cpp b/modules/gpu/perf/perf_filters.cpp
index 4975033239..0263e8063f 100644
--- a/modules/gpu/perf/perf_filters.cpp
+++ b/modules/gpu/perf/perf_filters.cpp
@@ -2,11 +2,11 @@
 
 #ifdef HAVE_CUDA
 
-IMPLEMENT_PARAM_CLASS(KernelSize, int)
-
 //////////////////////////////////////////////////////////////////////
 // Blur
 
+IMPLEMENT_PARAM_CLASS(KernelSize, int)
+
 GPU_PERF_TEST(Blur, cv::gpu::DeviceInfo, cv::Size, MatType, KernelSize)
 {
     cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
@@ -17,19 +17,20 @@ GPU_PERF_TEST(Blur, cv::gpu::DeviceInfo, cv::Size, MatType, KernelSize)
     int ksize = GET_PARAM(3);
 
     cv::Mat src_host(size, type);
-
-    declare.in(src_host, WARMUP_RNG);
+    fill(src_host, 0.0, 255.0);
 
     cv::gpu::GpuMat src(src_host);
     cv::gpu::GpuMat dst;
 
+    cv::gpu::blur(src, dst, cv::Size(ksize, ksize));
+
     TEST_CYCLE()
     {
         cv::gpu::blur(src, dst, cv::Size(ksize, ksize));
     }
 }
 
-INSTANTIATE_TEST_CASE_P(Filter, Blur, testing::Combine(
+INSTANTIATE_TEST_CASE_P(Filters, Blur, testing::Combine(
     ALL_DEVICES,
     GPU_TYPICAL_MAT_SIZES,
     testing::Values(MatType(CV_8UC1), MatType(CV_8UC4)),
@@ -48,20 +49,21 @@ GPU_PERF_TEST(Sobel, cv::gpu::DeviceInfo, cv::Size, MatType, KernelSize)
     int ksize = GET_PARAM(3);
 
     cv::Mat src_host(size, type);
-
-    declare.in(src_host, WARMUP_RNG);
+    fill(src_host, 0.0, 255.0);
 
     cv::gpu::GpuMat src(src_host);
     cv::gpu::GpuMat dst;
     cv::gpu::GpuMat buf;
 
+    cv::gpu::Sobel(src, dst, -1, 1, 1, buf, ksize);
+
     TEST_CYCLE()
     {
         cv::gpu::Sobel(src, dst, -1, 1, 1, buf, ksize);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(Filter, Sobel, testing::Combine(
+INSTANTIATE_TEST_CASE_P(Filters, Sobel, testing::Combine(
     ALL_DEVICES,
     GPU_TYPICAL_MAT_SIZES,
     testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_32FC1)),
@@ -79,20 +81,21 @@ GPU_PERF_TEST(Scharr, cv::gpu::DeviceInfo, cv::Size, MatType)
     int type = GET_PARAM(2);
 
     cv::Mat src_host(size, type);
-
-    declare.in(src_host, WARMUP_RNG);
+    fill(src_host, 0.0, 255.0);
 
     cv::gpu::GpuMat src(src_host);
     cv::gpu::GpuMat dst;
     cv::gpu::GpuMat buf;
 
+    cv::gpu::Scharr(src, dst, -1, 1, 0, buf);
+
     TEST_CYCLE()
     {
         cv::gpu::Scharr(src, dst, -1, 1, 0, buf);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(Filter, Scharr, testing::Combine(
+INSTANTIATE_TEST_CASE_P(Filters, Scharr, testing::Combine(
     ALL_DEVICES,
     GPU_TYPICAL_MAT_SIZES,
     testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_32FC1))));
@@ -110,20 +113,21 @@ GPU_PERF_TEST(GaussianBlur, cv::gpu::DeviceInfo, cv::Size, MatType, KernelSize)
     int ksize = GET_PARAM(3);
 
     cv::Mat src_host(size, type);
-
-    declare.in(src_host, WARMUP_RNG);
+    fill(src_host, 0.0, 255.0);
 
     cv::gpu::GpuMat src(src_host);
     cv::gpu::GpuMat dst;
     cv::gpu::GpuMat buf;
 
+    cv::gpu::GaussianBlur(src, dst, cv::Size(ksize, ksize), buf, 0.5);
+
     TEST_CYCLE()
     {
         cv::gpu::GaussianBlur(src, dst, cv::Size(ksize, ksize), buf, 0.5);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(Filter, GaussianBlur, testing::Combine(
+INSTANTIATE_TEST_CASE_P(Filters, GaussianBlur, testing::Combine(
     ALL_DEVICES,
     GPU_TYPICAL_MAT_SIZES,
     testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_32FC1)),
@@ -142,19 +146,20 @@ GPU_PERF_TEST(Laplacian, cv::gpu::DeviceInfo, cv::Size, MatType, KernelSize)
     int ksize = GET_PARAM(3);
 
     cv::Mat src_host(size, type);
-
-    declare.in(src_host, WARMUP_RNG);
+    fill(src_host, 0.0, 255.0);
 
     cv::gpu::GpuMat src(src_host);
     cv::gpu::GpuMat dst;
 
+    cv::gpu::Laplacian(src, dst, -1, ksize);
+
     TEST_CYCLE()
     {
         cv::gpu::Laplacian(src, dst, -1, ksize);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(Filter, Laplacian, testing::Combine(
+INSTANTIATE_TEST_CASE_P(Filters, Laplacian, testing::Combine(
     ALL_DEVICES,
     GPU_TYPICAL_MAT_SIZES,
     testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC4)),
@@ -172,8 +177,7 @@ GPU_PERF_TEST(Erode, cv::gpu::DeviceInfo, cv::Size, MatType)
     int type = GET_PARAM(2);
 
     cv::Mat src_host(size, type);
-
-    declare.in(src_host, WARMUP_RNG);
+    fill(src_host, 0.0, 255.0);
 
     cv::Mat ker = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
 
@@ -181,13 +185,15 @@ GPU_PERF_TEST(Erode, cv::gpu::DeviceInfo, cv::Size, MatType)
     cv::gpu::GpuMat dst;
     cv::gpu::GpuMat buf;
 
+    cv::gpu::erode(src, dst, ker, buf);
+
     TEST_CYCLE()
     {
         cv::gpu::erode(src, dst, ker, buf);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(Filter, Erode, testing::Combine(
+INSTANTIATE_TEST_CASE_P(Filters, Erode, testing::Combine(
     ALL_DEVICES,
     GPU_TYPICAL_MAT_SIZES,
     testing::Values(MatType(CV_8UC1), MatType(CV_8UC4))));
@@ -204,8 +210,7 @@ GPU_PERF_TEST(Dilate, cv::gpu::DeviceInfo, cv::Size, MatType)
     int type = GET_PARAM(2);
 
     cv::Mat src_host(size, type);
-
-    declare.in(src_host, WARMUP_RNG);
+    fill(src_host, 0.0, 255.0);
 
     cv::Mat ker = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
 
@@ -213,13 +218,15 @@ GPU_PERF_TEST(Dilate, cv::gpu::DeviceInfo, cv::Size, MatType)
     cv::gpu::GpuMat dst;
     cv::gpu::GpuMat buf;
 
+    cv::gpu::dilate(src, dst, ker, buf);
+
     TEST_CYCLE()
     {
         cv::gpu::dilate(src, dst, ker, buf);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(Filter, Dilate, testing::Combine(
+INSTANTIATE_TEST_CASE_P(Filters, Dilate, testing::Combine(
     ALL_DEVICES,
     GPU_TYPICAL_MAT_SIZES,
     testing::Values(MatType(CV_8UC1), MatType(CV_8UC4))));
@@ -240,8 +247,7 @@ GPU_PERF_TEST(MorphologyEx, cv::gpu::DeviceInfo, cv::Size, MatType, MorphOp)
     int morphOp = GET_PARAM(3);
 
     cv::Mat src_host(size, type);
-
-    declare.in(src_host, WARMUP_RNG);
+    fill(src_host, 0.0, 255.0);
 
     cv::Mat ker = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
 
@@ -250,13 +256,15 @@ GPU_PERF_TEST(MorphologyEx, cv::gpu::DeviceInfo, cv::Size, MatType, MorphOp)
     cv::gpu::GpuMat buf1;
     cv::gpu::GpuMat buf2;
 
+    cv::gpu::morphologyEx(src, dst, morphOp, ker, buf1, buf2);
+
     TEST_CYCLE()
     {
         cv::gpu::morphologyEx(src, dst, morphOp, ker, buf1, buf2);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(Filter, MorphologyEx, testing::Combine(
+INSTANTIATE_TEST_CASE_P(Filters, MorphologyEx, testing::Combine(
     ALL_DEVICES,
     GPU_TYPICAL_MAT_SIZES,
     testing::Values(MatType(CV_8UC1), MatType(CV_8UC4)),
@@ -275,20 +283,23 @@ GPU_PERF_TEST(Filter2D, cv::gpu::DeviceInfo, cv::Size, MatType, KernelSize)
     int ksize = GET_PARAM(3);
 
     cv::Mat src_host(size, type);
-    cv::Mat kernel(ksize, ksize, CV_32FC1);
+    fill(src_host, 0.0, 255.0);
 
-    declare.in(src_host, kernel, WARMUP_RNG);
+    cv::Mat kernel(ksize, ksize, CV_32FC1);
+    fill(kernel, 0.0, 1.0);
 
     cv::gpu::GpuMat src(src_host);
     cv::gpu::GpuMat dst;
 
+    cv::gpu::filter2D(src, dst, -1, kernel);
+
     TEST_CYCLE()
     {
         cv::gpu::filter2D(src, dst, -1, kernel);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(Filter, Filter2D, testing::Combine(
+INSTANTIATE_TEST_CASE_P(Filters, Filter2D, testing::Combine(
     ALL_DEVICES,
     GPU_TYPICAL_MAT_SIZES,
     testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC4)),
diff --git a/modules/gpu/perf/perf_imgproc.cpp b/modules/gpu/perf/perf_imgproc.cpp
index 5ef5fe80d1..42c5eb2007 100644
--- a/modules/gpu/perf/perf_imgproc.cpp
+++ b/modules/gpu/perf/perf_imgproc.cpp
@@ -5,27 +5,32 @@
 //////////////////////////////////////////////////////////////////////
 // Remap
 
-GPU_PERF_TEST(Remap, cv::gpu::DeviceInfo, cv::Size, perf::MatType, Interpolation, BorderMode)
+GPU_PERF_TEST(Remap, cv::gpu::DeviceInfo, cv::Size, MatType, Interpolation, BorderMode)
 {
     cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
     cv::Size size = GET_PARAM(1);
     int type = GET_PARAM(2);
     int interpolation = GET_PARAM(3);
     int borderMode = GET_PARAM(4);
 
-    cv::gpu::setDevice(devInfo.deviceID());
-
     cv::Mat src_host(size, type);
+    fill(src_host, 0, 255);
+
     cv::Mat xmap_host(size, CV_32FC1);
-    cv::Mat ymap_host(size, CV_32FC1);
+    fill(xmap_host, 0, size.width);
 
-    declare.in(src_host, xmap_host, ymap_host, WARMUP_RNG);
+    cv::Mat ymap_host(size, CV_32FC1);
+    fill(ymap_host, 0, size.height);
 
     cv::gpu::GpuMat src(src_host);
     cv::gpu::GpuMat xmap(xmap_host);
     cv::gpu::GpuMat ymap(ymap_host);
     cv::gpu::GpuMat dst;
 
+    cv::gpu::remap(src, dst, xmap, ymap, interpolation, borderMode);
+
     declare.time(3.0);
 
     TEST_CYCLE()
@@ -35,578 +40,814 @@ GPU_PERF_TEST(Remap, cv::gpu::DeviceInfo, cv::Size, perf::MatType, Interpolation
 }
 
 INSTANTIATE_TEST_CASE_P(ImgProc, Remap, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_8UC4, CV_16UC1, CV_32FC1),
-                        testing::Values((int) cv::INTER_NEAREST, (int) cv::INTER_LINEAR, (int) cv::INTER_CUBIC),
-                        testing::Values((int) cv::BORDER_REFLECT101, (int) cv::BORDER_REPLICATE, (int) cv::BORDER_CONSTANT)));
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
+                    MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
+                    MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
+    testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)),
+    testing::Values(BorderMode(cv::BORDER_REFLECT101), BorderMode(cv::BORDER_REPLICATE), BorderMode(cv::BORDER_CONSTANT), BorderMode(cv::BORDER_REFLECT), BorderMode(cv::BORDER_WRAP))));
+
 
 //////////////////////////////////////////////////////////////////////
-// MeanShiftFiltering
+// Resize
 
-GPU_PERF_TEST_1(MeanShiftFiltering, cv::gpu::DeviceInfo)
-{
-    cv::gpu::DeviceInfo devInfo = GetParam();
+IMPLEMENT_PARAM_CLASS(Scale, double)
 
+GPU_PERF_TEST(Resize, cv::gpu::DeviceInfo, cv::Size, MatType, Interpolation, Scale)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
     cv::gpu::setDevice(devInfo.deviceID());
 
-    cv::Mat img = readImage("gpu/meanshift/cones.png");
-    ASSERT_FALSE(img.empty());
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+    int interpolation = GET_PARAM(3);
+    double f = GET_PARAM(4);
 
-    cv::Mat rgba;
-    cv::cvtColor(img, rgba, cv::COLOR_BGR2BGRA);
+    cv::Mat src_host(size, type);
+    fill(src_host, 0, 255);
 
-    cv::gpu::GpuMat src(rgba);
+    cv::gpu::GpuMat src(src_host);
     cv::gpu::GpuMat dst;
 
-    declare.time(5.0);
+    cv::gpu::resize(src, dst, cv::Size(), f, f, interpolation);
+
+    declare.time(1.0);
 
     TEST_CYCLE()
     {
-        cv::gpu::meanShiftFiltering(src, dst, 50, 50);
+        cv::gpu::resize(src, dst, cv::Size(), f, f, interpolation);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, MeanShiftFiltering, ALL_DEVICES);
+INSTANTIATE_TEST_CASE_P(ImgProc, Resize, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
+                    MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
+                    MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
+    testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)),
+    testing::Values(Scale(0.5), Scale(2.0))));
 
 //////////////////////////////////////////////////////////////////////
-// MeanShiftProc
+// WarpAffine
 
-GPU_PERF_TEST_1(MeanShiftProc, cv::gpu::DeviceInfo)
+GPU_PERF_TEST(WarpAffine, cv::gpu::DeviceInfo, cv::Size, MatType, Interpolation, BorderMode)
 {
-    cv::gpu::DeviceInfo devInfo = GetParam();
-
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
     cv::gpu::setDevice(devInfo.deviceID());
 
-    cv::Mat img = readImage("gpu/meanshift/cones.png");
-    ASSERT_FALSE(img.empty());
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+    int interpolation = GET_PARAM(3);
+    int borderMode = GET_PARAM(4);
 
-    cv::Mat rgba;
-    cv::cvtColor(img, rgba, cv::COLOR_BGR2BGRA);
+    cv::Mat src_host(size, type);
+    fill(src_host, 0, 255);
 
-    cv::gpu::GpuMat src(rgba);
-    cv::gpu::GpuMat dstr;
-    cv::gpu::GpuMat dstsp;
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
 
-    declare.time(5.0);
+    const double aplha = CV_PI / 4;
+    double mat[2][3] = { {std::cos(aplha), -std::sin(aplha), src.cols / 2},
+                         {std::sin(aplha),  std::cos(aplha), 0}};
+    cv::Mat M(2, 3, CV_64F, (void*) mat);
+
+    cv::gpu::warpAffine(src, dst, M, size, interpolation, borderMode);
 
     TEST_CYCLE()
     {
-        cv::gpu::meanShiftProc(src, dstr, dstsp, 50, 50);
+        cv::gpu::warpAffine(src, dst, M, size, interpolation, borderMode);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, MeanShiftProc, ALL_DEVICES);
+INSTANTIATE_TEST_CASE_P(ImgProc, WarpAffine, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
+                    MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
+                    MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
+    testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)),
+    testing::Values(BorderMode(cv::BORDER_REFLECT101), BorderMode(cv::BORDER_REPLICATE), BorderMode(cv::BORDER_CONSTANT), BorderMode(cv::BORDER_REFLECT), BorderMode(cv::BORDER_WRAP))));
 
 //////////////////////////////////////////////////////////////////////
-// MeanShiftSegmentation
+// WarpPerspective
 
-GPU_PERF_TEST_1(MeanShiftSegmentation, cv::gpu::DeviceInfo)
+GPU_PERF_TEST(WarpPerspective, cv::gpu::DeviceInfo, cv::Size, MatType, Interpolation, BorderMode)
 {
-    cv::gpu::DeviceInfo devInfo = GetParam();
-
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
     cv::gpu::setDevice(devInfo.deviceID());
 
-    cv::Mat img = readImage("gpu/meanshift/cones.png");
-    ASSERT_FALSE(img.empty());
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+    int interpolation = GET_PARAM(3);
+    int borderMode = GET_PARAM(4);
 
-    cv::Mat rgba;
-    cv::cvtColor(img, rgba, cv::COLOR_BGR2BGRA);
+    cv::Mat src_host(size, type);
+    fill(src_host, 0, 255);
 
-    cv::gpu::GpuMat src(rgba);
-    cv::Mat dst;
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
 
-    declare.time(5.0);
+    const double aplha = CV_PI / 4;
+    double mat[3][3] = { {std::cos(aplha), -std::sin(aplha), src.cols / 2},
+                         {std::sin(aplha),  std::cos(aplha), 0},
+                         {0.0,              0.0,             1.0}};
+    cv::Mat M(3, 3, CV_64F, (void*) mat);
+
+    cv::gpu::warpPerspective(src, dst, M, size, interpolation, borderMode);
 
     TEST_CYCLE()
     {
-        meanShiftSegmentation(src, dst, 10, 10, 20);
+        cv::gpu::warpPerspective(src, dst, M, size, interpolation, borderMode);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, MeanShiftSegmentation, ALL_DEVICES);
+INSTANTIATE_TEST_CASE_P(ImgProc, WarpPerspective, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
+                    MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
+                    MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
+    testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)),
+    testing::Values(BorderMode(cv::BORDER_REFLECT101), BorderMode(cv::BORDER_REPLICATE), BorderMode(cv::BORDER_CONSTANT), BorderMode(cv::BORDER_REFLECT), BorderMode(cv::BORDER_WRAP))));
 
 //////////////////////////////////////////////////////////////////////
-// DrawColorDisp
+// CopyMakeBorder
 
-GPU_PERF_TEST(DrawColorDisp, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+GPU_PERF_TEST(CopyMakeBorder, cv::gpu::DeviceInfo, cv::Size, MatType, BorderMode)
 {
     cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
     cv::Size size = GET_PARAM(1);
     int type = GET_PARAM(2);
-
-    cv::gpu::setDevice(devInfo.deviceID());
+    int borderType = GET_PARAM(3);
 
     cv::Mat src_host(size, type);
-
     fill(src_host, 0, 255);
 
     cv::gpu::GpuMat src(src_host);
     cv::gpu::GpuMat dst;
 
+    cv::gpu::copyMakeBorder(src, dst, 5, 5, 5, 5, borderType);
+
     TEST_CYCLE()
     {
-        cv::gpu::drawColorDisp(src, dst, 255);
+        cv::gpu::copyMakeBorder(src, dst, 5, 5, 5, 5, borderType);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, DrawColorDisp, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_16SC1)));
+INSTANTIATE_TEST_CASE_P(ImgProc, CopyMakeBorder, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
+                    MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
+                    MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
+    testing::Values(BorderMode(cv::BORDER_REFLECT101), BorderMode(cv::BORDER_REPLICATE), BorderMode(cv::BORDER_CONSTANT), BorderMode(cv::BORDER_REFLECT), BorderMode(cv::BORDER_WRAP))));
 
 //////////////////////////////////////////////////////////////////////
-// ReprojectImageTo3D
+// Threshold
+
+CV_ENUM(ThreshOp, cv::THRESH_BINARY, cv::THRESH_BINARY_INV, cv::THRESH_TRUNC, cv::THRESH_TOZERO, cv::THRESH_TOZERO_INV)
+#define ALL_THRESH_OPS testing::Values(ThreshOp(cv::THRESH_BINARY), ThreshOp(cv::THRESH_BINARY_INV), ThreshOp(cv::THRESH_TRUNC), ThreshOp(cv::THRESH_TOZERO), ThreshOp(cv::THRESH_TOZERO_INV))
 
-GPU_PERF_TEST(ReprojectImageTo3D, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+GPU_PERF_TEST(Threshold, cv::gpu::DeviceInfo, cv::Size, MatDepth, ThreshOp)
 {
     cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
     cv::gpu::setDevice(devInfo.deviceID());
 
-    cv::Mat src_host(size, type);
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+    int threshOp = GET_PARAM(3);
 
-    declare.in(src_host, WARMUP_RNG);
+    cv::Mat src_host(size, depth);
+    fill(src_host, 0, 255);
 
     cv::gpu::GpuMat src(src_host);
     cv::gpu::GpuMat dst;
 
+    cv::gpu::threshold(src, dst, 100.0, 255.0, threshOp);
+
     TEST_CYCLE()
     {
-        cv::gpu::reprojectImageTo3D(src, dst, cv::Mat::ones(4, 4, CV_32FC1));
+        cv::gpu::threshold(src, dst, 100.0, 255.0, threshOp);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, ReprojectImageTo3D, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_16SC1)));
+INSTANTIATE_TEST_CASE_P(ImgProc, Threshold, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_32F), MatDepth(CV_64F)),
+    ALL_THRESH_OPS));
 
 //////////////////////////////////////////////////////////////////////
-// CvtColor
+// Integral
 
-GPU_PERF_TEST(CvtColor, cv::gpu::DeviceInfo, cv::Size, perf::MatType, CvtColorInfo)
+GPU_PERF_TEST(Integral, cv::gpu::DeviceInfo, cv::Size)
 {
     cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-    CvtColorInfo info = GET_PARAM(3);
-
     cv::gpu::setDevice(devInfo.deviceID());
 
-    cv::Mat src_host(size, CV_MAKETYPE(type, info.scn));
+    cv::Size size = GET_PARAM(1);
 
-    declare.in(src_host, WARMUP_RNG);
+    cv::Mat src_host(size, CV_8UC1);
+    fill(src_host, 0, 255);
 
     cv::gpu::GpuMat src(src_host);
     cv::gpu::GpuMat dst;
+    cv::gpu::GpuMat buf;
+
+    cv::gpu::integralBuffered(src, dst, buf);
 
     TEST_CYCLE()
     {
-        cv::gpu::cvtColor(src, dst, info.code, info.dcn);
+        cv::gpu::integralBuffered(src, dst, buf);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, CvtColor, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1),
-                        testing::Values(
-                            CvtColorInfo(4, 4, cv::COLOR_RGBA2BGRA), CvtColorInfo(4, 1, cv::COLOR_BGRA2GRAY), CvtColorInfo(1, 4, cv::COLOR_GRAY2BGRA),
-                            CvtColorInfo(4, 4, cv::COLOR_BGR2XYZ), CvtColorInfo(4, 4, cv::COLOR_BGR2YCrCb), CvtColorInfo(4, 4, cv::COLOR_YCrCb2BGR),
-                            CvtColorInfo(4, 4, cv::COLOR_BGR2HSV), CvtColorInfo(4, 4, cv::COLOR_HSV2BGR))));
+INSTANTIATE_TEST_CASE_P(ImgProc, Integral, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES));
 
 //////////////////////////////////////////////////////////////////////
-// SwapChannels
+// Integral_Sqr
 
-GPU_PERF_TEST(SwapChannels, cv::gpu::DeviceInfo, cv::Size)
+GPU_PERF_TEST(Integral_Sqr, cv::gpu::DeviceInfo, cv::Size)
 {
     cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-
     cv::gpu::setDevice(devInfo.deviceID());
 
-    cv::Mat src_host(size, CV_8UC4);
+    cv::Size size = GET_PARAM(1);
 
-    declare.in(src_host, WARMUP_RNG);
+    cv::Mat src_host(size, CV_8UC1);
+    fill(src_host, 0, 255);
 
     cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
 
-    const int dstOrder[] = {2, 1, 0, 3};
+    cv::gpu::sqrIntegral(src, dst);
 
     TEST_CYCLE()
     {
-        cv::gpu::swapChannels(src, dstOrder);
+        cv::gpu::sqrIntegral(src, dst);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, SwapChannels, testing::Combine(ALL_DEVICES, GPU_TYPICAL_MAT_SIZES));
+INSTANTIATE_TEST_CASE_P(ImgProc, Integral_Sqr, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES));
 
 //////////////////////////////////////////////////////////////////////
-// Threshold
+// HistEven_OneChannel
 
-GPU_PERF_TEST(Threshold, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+GPU_PERF_TEST(HistEven_OneChannel, cv::gpu::DeviceInfo, cv::Size, MatDepth)
 {
     cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
     cv::gpu::setDevice(devInfo.deviceID());
 
-    cv::Mat src_host(size, type);
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
 
-    declare.in(src_host, WARMUP_RNG);
+    cv::Mat src_host(size, depth);
+    fill(src_host, 0, 255);
 
     cv::gpu::GpuMat src(src_host);
-    cv::gpu::GpuMat dst(size, type);
+    cv::gpu::GpuMat hist;
+    cv::gpu::GpuMat buf;
+
+    cv::gpu::histEven(src, hist, buf, 30, 0, 180);
 
     TEST_CYCLE()
     {
-        cv::gpu::threshold(src, dst, 100.0, 255.0, cv::THRESH_BINARY);
+        cv::gpu::histEven(src, hist, buf, 30, 0, 180);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, Threshold, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1)));
+INSTANTIATE_TEST_CASE_P(ImgProc, HistEven_OneChannel, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_16S))));
 
 //////////////////////////////////////////////////////////////////////
-// Resize
+// HistEven_FourChannel
 
-GPU_PERF_TEST(Resize, cv::gpu::DeviceInfo, cv::Size, perf::MatType, Interpolation, double)
+GPU_PERF_TEST(HistEven_FourChannel, cv::gpu::DeviceInfo, cv::Size, MatDepth)
 {
     cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-    int interpolation = GET_PARAM(3);
-    double f = GET_PARAM(4);
-
     cv::gpu::setDevice(devInfo.deviceID());
 
-    cv::Mat src_host(size, type);
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
 
-    declare.in(src_host, WARMUP_RNG);
+    cv::Mat src_host(size, CV_MAKE_TYPE(depth, 4));
+    fill(src_host, 0, 255);
 
     cv::gpu::GpuMat src(src_host);
-    cv::gpu::GpuMat dst;
+    cv::gpu::GpuMat hist[4];
+    cv::gpu::GpuMat buf;
+    int histSize[] = {30, 30, 30, 30};
+    int lowerLevel[] = {0, 0, 0, 0};
+    int upperLevel[] = {180, 180, 180, 180};
 
-    declare.time(1.0);
+    cv::gpu::histEven(src, hist, buf, histSize, lowerLevel, upperLevel);
 
     TEST_CYCLE()
     {
-        cv::gpu::resize(src, dst, cv::Size(), f, f, interpolation);
+        cv::gpu::histEven(src, hist, buf, histSize, lowerLevel, upperLevel);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, Resize, testing::Combine(
-                        ALL_DEVICES,
-                        testing::Values(perf::szSXGA, perf::sz1080p),
-                        testing::Values(CV_8UC1, CV_8UC4, CV_16UC1, CV_32FC1),
-                        testing::Values((int) cv::INTER_NEAREST, (int) cv::INTER_LINEAR, (int) cv::INTER_CUBIC),
-                        testing::Values(0.5, 2.0)));
+INSTANTIATE_TEST_CASE_P(ImgProc, HistEven_FourChannel, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_16S))));
 
 //////////////////////////////////////////////////////////////////////
-// WarpAffine
+// CalcHist
 
-GPU_PERF_TEST(WarpAffine, cv::gpu::DeviceInfo, cv::Size, perf::MatType, Interpolation)
+GPU_PERF_TEST(CalcHist, cv::gpu::DeviceInfo, cv::Size)
 {
     cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-    int interpolation = GET_PARAM(3);
-
     cv::gpu::setDevice(devInfo.deviceID());
 
-    cv::Mat src_host(size, type);
+    cv::Size size = GET_PARAM(1);
 
-    declare.in(src_host, WARMUP_RNG);
+    cv::Mat src_host(size, CV_8UC1);
+    fill(src_host, 0, 255);
 
     cv::gpu::GpuMat src(src_host);
-    cv::gpu::GpuMat dst;
+    cv::gpu::GpuMat hist;
+    cv::gpu::GpuMat buf;
 
-    const double aplha = CV_PI / 4;
-    double mat[2][3] = { {std::cos(aplha), -std::sin(aplha), src.cols / 2},
-                         {std::sin(aplha),  std::cos(aplha), 0}};
-    cv::Mat M(2, 3, CV_64F, (void*) mat);
+    cv::gpu::calcHist(src, hist, buf);
 
     TEST_CYCLE()
     {
-        cv::gpu::warpAffine(src, dst, M, size, interpolation, cv::BORDER_CONSTANT, cv::Scalar());
+        cv::gpu::calcHist(src, hist, buf);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, WarpAffine, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32FC1, CV_32FC3, CV_32FC4),
-                        testing::Values((int) cv::INTER_NEAREST, (int) cv::INTER_LINEAR, (int) cv::INTER_CUBIC)));
+INSTANTIATE_TEST_CASE_P(ImgProc, CalcHist, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES));
 
 //////////////////////////////////////////////////////////////////////
-// WarpPerspective
+// EqualizeHist
 
-GPU_PERF_TEST(WarpPerspective, cv::gpu::DeviceInfo, cv::Size, perf::MatType, Interpolation)
+GPU_PERF_TEST(EqualizeHist, cv::gpu::DeviceInfo, cv::Size)
 {
     cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-    int interpolation = GET_PARAM(3);
-
     cv::gpu::setDevice(devInfo.deviceID());
 
-    cv::Mat src_host(size, type);
+    cv::Size size = GET_PARAM(1);
 
-    declare.in(src_host, WARMUP_RNG);
+    cv::Mat src_host(size, CV_8UC1);
+    fill(src_host, 0, 255);
 
     cv::gpu::GpuMat src(src_host);
     cv::gpu::GpuMat dst;
+    cv::gpu::GpuMat hist;
+    cv::gpu::GpuMat buf;
 
-    const double aplha = CV_PI / 4;
-    double mat[3][3] = { {std::cos(aplha), -std::sin(aplha), src.cols / 2},
-                         {std::sin(aplha),  std::cos(aplha), 0},
-                         {0.0,              0.0,             1.0}};
-    cv::Mat M(3, 3, CV_64F, (void*) mat);
+    cv::gpu::equalizeHist(src, dst, hist, buf);
 
     TEST_CYCLE()
     {
-        cv::gpu::warpPerspective(src, dst, M, size, interpolation, cv::BORDER_CONSTANT, cv::Scalar());
+        cv::gpu::equalizeHist(src, dst, hist, buf);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, WarpPerspective, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32FC1, CV_32FC3, CV_32FC4),
-                        testing::Values((int) cv::INTER_NEAREST, (int) cv::INTER_LINEAR, (int) cv::INTER_CUBIC)));
+INSTANTIATE_TEST_CASE_P(ImgProc, EqualizeHist, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES));
 
 //////////////////////////////////////////////////////////////////////
-// BuildWarpPlaneMaps
+// ColumnSum
 
-GPU_PERF_TEST(BuildWarpPlaneMaps, cv::gpu::DeviceInfo, cv::Size)
+GPU_PERF_TEST(ColumnSum, cv::gpu::DeviceInfo, cv::Size)
 {
     cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
     cv::Size size = GET_PARAM(1);
 
-    cv::gpu::setDevice(devInfo.deviceID());
+    cv::Mat src_host(size, CV_32FC1);
+    fill(src_host, 0, 255);
 
-    cv::gpu::GpuMat map_x;
-    cv::gpu::GpuMat map_y;
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::columnSum(src, dst);
 
     TEST_CYCLE()
     {
-        cv::gpu::buildWarpPlaneMaps(size, cv::Rect(0, 0, size.width, size.height), cv::Mat::eye(3, 3, CV_32FC1),
-                                    cv::Mat::ones(3, 3, CV_32FC1), cv::Mat::zeros(1, 3, CV_32F), 1.0, map_x, map_y);
+        cv::gpu::columnSum(src, dst);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, BuildWarpPlaneMaps, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES));
+INSTANTIATE_TEST_CASE_P(ImgProc, ColumnSum, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES));
 
 //////////////////////////////////////////////////////////////////////
-// BuildWarpCylindricalMaps
+// Canny
 
-GPU_PERF_TEST(BuildWarpCylindricalMaps, cv::gpu::DeviceInfo, cv::Size)
+IMPLEMENT_PARAM_CLASS(AppertureSize, int)
+IMPLEMENT_PARAM_CLASS(L2gradient, bool)
+
+GPU_PERF_TEST(Canny, cv::gpu::DeviceInfo, AppertureSize, L2gradient)
 {
     cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    int apperture_size = GET_PARAM(1);
+    bool useL2gradient = GET_PARAM(2);
+
+    cv::Mat image_host = readImage("perf/1280x1024.jpg", cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(image_host.empty());
+
+    cv::gpu::GpuMat image(image_host);
+    cv::gpu::GpuMat dst;
+    cv::gpu::CannyBuf buf;
+
+    cv::gpu::Canny(image, buf, dst, 50.0, 100.0, apperture_size, useL2gradient);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::Canny(image, buf, dst, 50.0, 100.0, apperture_size, useL2gradient);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, Canny, testing::Combine(
+    ALL_DEVICES,
+    testing::Values(AppertureSize(3), AppertureSize(5)),
+    testing::Values(L2gradient(false), L2gradient(true))));
 
+//////////////////////////////////////////////////////////////////////
+// MeanShiftFiltering
+
+GPU_PERF_TEST_1(MeanShiftFiltering, cv::gpu::DeviceInfo)
+{
+    cv::gpu::DeviceInfo devInfo = GetParam();
     cv::gpu::setDevice(devInfo.deviceID());
 
-    cv::gpu::GpuMat map_x;
-    cv::gpu::GpuMat map_y;
+    cv::Mat img = readImage("gpu/meanshift/cones.png");
+    ASSERT_FALSE(img.empty());
+
+    cv::Mat rgba;
+    cv::cvtColor(img, rgba, cv::COLOR_BGR2BGRA);
+
+    cv::gpu::GpuMat src(rgba);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::meanShiftFiltering(src, dst, 50, 50);
+
+    declare.time(5.0);
 
     TEST_CYCLE()
     {
-        cv::gpu::buildWarpCylindricalMaps(size, cv::Rect(0, 0, size.width, size.height), cv::Mat::eye(3, 3, CV_32FC1),
-                                          cv::Mat::ones(3, 3, CV_32FC1), 1.0, map_x, map_y);
+        cv::gpu::meanShiftFiltering(src, dst, 50, 50);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, BuildWarpCylindricalMaps, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES));
+INSTANTIATE_TEST_CASE_P(ImgProc, MeanShiftFiltering, ALL_DEVICES);
 
 //////////////////////////////////////////////////////////////////////
-// BuildWarpSphericalMaps
+// MeanShiftProc
 
-GPU_PERF_TEST(BuildWarpSphericalMaps, cv::gpu::DeviceInfo, cv::Size)
+GPU_PERF_TEST_1(MeanShiftProc, cv::gpu::DeviceInfo)
 {
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
+    cv::gpu::DeviceInfo devInfo = GetParam();
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat img = readImage("gpu/meanshift/cones.png");
+    ASSERT_FALSE(img.empty());
+
+    cv::Mat rgba;
+    cv::cvtColor(img, rgba, cv::COLOR_BGR2BGRA);
+
+    cv::gpu::GpuMat src(rgba);
+    cv::gpu::GpuMat dstr;
+    cv::gpu::GpuMat dstsp;
+
+    cv::gpu::meanShiftProc(src, dstr, dstsp, 50, 50);
+
+    declare.time(5.0);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::meanShiftProc(src, dstr, dstsp, 50, 50);
+    }
+}
 
+INSTANTIATE_TEST_CASE_P(ImgProc, MeanShiftProc, ALL_DEVICES);
+
+//////////////////////////////////////////////////////////////////////
+// MeanShiftSegmentation
+
+GPU_PERF_TEST_1(MeanShiftSegmentation, cv::gpu::DeviceInfo)
+{
+    cv::gpu::DeviceInfo devInfo = GetParam();
     cv::gpu::setDevice(devInfo.deviceID());
 
-    cv::gpu::GpuMat map_x;
-    cv::gpu::GpuMat map_y;
+    cv::Mat img = readImage("gpu/meanshift/cones.png");
+    ASSERT_FALSE(img.empty());
+
+    cv::Mat rgba;
+    cv::cvtColor(img, rgba, cv::COLOR_BGR2BGRA);
+
+    cv::gpu::GpuMat src(rgba);
+    cv::Mat dst;
+
+    meanShiftSegmentation(src, dst, 10, 10, 20);
+
+    declare.time(5.0);
 
     TEST_CYCLE()
     {
-        cv::gpu::buildWarpSphericalMaps(size, cv::Rect(0, 0, size.width, size.height), cv::Mat::eye(3, 3, CV_32FC1),
-                                        cv::Mat::ones(3, 3, CV_32FC1), 1.0, map_x, map_y);
+        meanShiftSegmentation(src, dst, 10, 10, 20);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, BuildWarpSphericalMaps, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES));
+INSTANTIATE_TEST_CASE_P(ImgProc, MeanShiftSegmentation, ALL_DEVICES);
 
 //////////////////////////////////////////////////////////////////////
-// Rotate
+// BlendLinear
 
-GPU_PERF_TEST(Rotate, cv::gpu::DeviceInfo, cv::Size, perf::MatType, Interpolation)
+GPU_PERF_TEST(BlendLinear, cv::gpu::DeviceInfo, cv::Size, MatType)
 {
     cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
     cv::Size size = GET_PARAM(1);
     int type = GET_PARAM(2);
-    int interpolation = GET_PARAM(3);
-
-    cv::gpu::setDevice(devInfo.deviceID());
 
-    cv::Mat src_host(size, type);
+    cv::Mat img1_host(size, type);
+    fill(img1_host, 0, 255);
 
-    declare.in(src_host, WARMUP_RNG);
+    cv::Mat img2_host(size, type);
+    fill(img2_host, 0, 255);
 
-    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat img1(img1_host);
+    cv::gpu::GpuMat img2(img2_host);
+    cv::gpu::GpuMat weights1(size, CV_32FC1, cv::Scalar::all(0.5));
+    cv::gpu::GpuMat weights2(size, CV_32FC1, cv::Scalar::all(0.5));
     cv::gpu::GpuMat dst;
 
+    cv::gpu::blendLinear(img1, img2, weights1, weights2, dst);
+
     TEST_CYCLE()
     {
-        cv::gpu::rotate(src, dst, size, 30.0, 0, 0, interpolation);
+        cv::gpu::blendLinear(img1, img2, weights1, weights2, dst);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, Rotate, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32FC1, CV_32FC3, CV_32FC4),
-                        testing::Values((int) cv::INTER_NEAREST, (int) cv::INTER_LINEAR, (int) cv::INTER_CUBIC)));
+INSTANTIATE_TEST_CASE_P(ImgProc, BlendLinear, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
+                    MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4))));
 
 //////////////////////////////////////////////////////////////////////
-// CopyMakeBorder
+// Convolve
+
+IMPLEMENT_PARAM_CLASS(KSize, int)
+IMPLEMENT_PARAM_CLASS(Ccorr, bool)
 
-GPU_PERF_TEST(CopyMakeBorder, cv::gpu::DeviceInfo, cv::Size, perf::MatType, BorderMode)
+GPU_PERF_TEST(Convolve, cv::gpu::DeviceInfo, cv::Size, KSize, Ccorr)
 {
     cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
     cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-    int borderType = GET_PARAM(3);
+    int templ_size = GET_PARAM(2);
+    bool ccorr = GET_PARAM(3);
+
+    cv::gpu::GpuMat image = cv::gpu::createContinuous(size, CV_32FC1);
+    image.setTo(cv::Scalar(1.0));
+
+    cv::gpu::GpuMat templ = cv::gpu::createContinuous(templ_size, templ_size, CV_32FC1);
+    templ.setTo(cv::Scalar(1.0));
+
+    cv::gpu::GpuMat dst;
+    cv::gpu::ConvolveBuf buf;
 
+    cv::gpu::convolve(image, templ, dst, ccorr, buf);
+
+    declare.time(2.0);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::convolve(image, templ, dst, ccorr, buf);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, Convolve, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(KSize(3), KSize(9), KSize(17), KSize(27), KSize(32), KSize(64)),
+    testing::Values(Ccorr(false), Ccorr(true))));
+
+////////////////////////////////////////////////////////////////////////////////
+// MatchTemplate_8U
+
+CV_ENUM(TemplateMethod, cv::TM_SQDIFF, cv::TM_SQDIFF_NORMED, cv::TM_CCORR, cv::TM_CCORR_NORMED, cv::TM_CCOEFF, cv::TM_CCOEFF_NORMED)
+#define ALL_TEMPLATE_METHODS testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_SQDIFF_NORMED), TemplateMethod(cv::TM_CCORR), TemplateMethod(cv::TM_CCORR_NORMED), TemplateMethod(cv::TM_CCOEFF), TemplateMethod(cv::TM_CCOEFF_NORMED))
+
+IMPLEMENT_PARAM_CLASS(TemplateSize, cv::Size)
+
+GPU_PERF_TEST(MatchTemplate_8U, cv::gpu::DeviceInfo, cv::Size, TemplateSize, Channels, TemplateMethod)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
     cv::gpu::setDevice(devInfo.deviceID());
 
-    cv::Mat src_host(size, type);
+    cv::Size size = GET_PARAM(1);
+    cv::Size templ_size = GET_PARAM(2);
+    int cn = GET_PARAM(3);
+    int method = GET_PARAM(4);
 
-    declare.in(src_host, WARMUP_RNG);
+    cv::Mat image_host(size, CV_MAKE_TYPE(CV_8U, cn));
+    fill(image_host, 0, 255);
 
-    cv::gpu::GpuMat src(src_host);
+    cv::Mat templ_host(templ_size, CV_MAKE_TYPE(CV_8U, cn));
+    fill(templ_host, 0, 255);
+
+    cv::gpu::GpuMat image(image_host);
+    cv::gpu::GpuMat templ(templ_host);
     cv::gpu::GpuMat dst;
 
+    cv::gpu::matchTemplate(image, templ, dst, method);
+
     TEST_CYCLE()
     {
-        cv::gpu::copyMakeBorder(src, dst, 5, 5, 5, 5, borderType);
+        cv::gpu::matchTemplate(image, templ, dst, method);
     }
-}
+};
 
-INSTANTIATE_TEST_CASE_P(ImgProc, CopyMakeBorder, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_8UC4, CV_32FC1),
-                        testing::Values((int) cv::BORDER_REPLICATE, (int) cv::BORDER_REFLECT, (int) cv::BORDER_WRAP, (int) cv::BORDER_CONSTANT)));
+INSTANTIATE_TEST_CASE_P(ImgProc, MatchTemplate_8U, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16)), TemplateSize(cv::Size(30, 30))),
+    testing::Values(Channels(1), Channels(3), Channels(4)),
+    ALL_TEMPLATE_METHODS));
 
-//////////////////////////////////////////////////////////////////////
-// Integral
+////////////////////////////////////////////////////////////////////////////////
+// MatchTemplate_32F
 
-GPU_PERF_TEST(Integral, cv::gpu::DeviceInfo, cv::Size)
+GPU_PERF_TEST(MatchTemplate_32F, cv::gpu::DeviceInfo, cv::Size, TemplateSize, Channels, TemplateMethod)
 {
     cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
     cv::Size size = GET_PARAM(1);
+    cv::Size templ_size = GET_PARAM(2);
+    int cn = GET_PARAM(3);
+    int method = GET_PARAM(4);
+
+    cv::Mat image_host(size, CV_MAKE_TYPE(CV_32F, cn));
+    fill(image_host, 0, 255);
+
+    cv::Mat templ_host(templ_size, CV_MAKE_TYPE(CV_32F, cn));
+    fill(templ_host, 0, 255);
+
+    cv::gpu::GpuMat image(image_host);
+    cv::gpu::GpuMat templ(templ_host);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::matchTemplate(image, templ, dst, method);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::matchTemplate(image, templ, dst, method);
+    }
+};
+
+INSTANTIATE_TEST_CASE_P(ImgProc, MatchTemplate_32F, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16)), TemplateSize(cv::Size(30, 30))),
+    testing::Values(Channels(1), Channels(3), Channels(4)),
+    testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR))));
 
+//////////////////////////////////////////////////////////////////////
+// MulSpectrums
+
+CV_FLAGS(DftFlags, 0, cv::DFT_INVERSE, cv::DFT_SCALE, cv::DFT_ROWS, cv::DFT_COMPLEX_OUTPUT, cv::DFT_REAL_OUTPUT)
+
+GPU_PERF_TEST(MulSpectrums, cv::gpu::DeviceInfo, cv::Size, DftFlags)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
     cv::gpu::setDevice(devInfo.deviceID());
 
-    cv::Mat src_host(size, CV_8UC1);
+    cv::Size size = GET_PARAM(1);
+    int flag = GET_PARAM(2);
 
-    declare.in(src_host, WARMUP_RNG);
+    cv::Mat a_host(size, CV_32FC2);
+    fill(a_host, 0, 100);
 
-    cv::gpu::GpuMat src(src_host);
+    cv::Mat b_host(size, CV_32FC2);
+    fill(b_host, 0, 100);
+
+    cv::gpu::GpuMat a(a_host);
+    cv::gpu::GpuMat b(b_host);
     cv::gpu::GpuMat dst;
-    cv::gpu::GpuMat buf;
+
+    cv::gpu::mulSpectrums(a, b, dst, flag);
 
     TEST_CYCLE()
     {
-        cv::gpu::integralBuffered(src, dst, buf);
+        cv::gpu::mulSpectrums(a, b, dst, flag);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, Integral, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES));
+INSTANTIATE_TEST_CASE_P(ImgProc, MulSpectrums, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(DftFlags(0), DftFlags(cv::DFT_ROWS))));
 
 //////////////////////////////////////////////////////////////////////
-// IntegralSqr
+// MulAndScaleSpectrums
 
-GPU_PERF_TEST(IntegralSqr, cv::gpu::DeviceInfo, cv::Size)
+GPU_PERF_TEST(MulAndScaleSpectrums, cv::gpu::DeviceInfo, cv::Size)
 {
     cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
     cv::Size size = GET_PARAM(1);
 
-    cv::gpu::setDevice(devInfo.deviceID());
+    float scale = 1.f / size.area();
 
-    cv::Mat src_host(size, CV_8UC1);
+    cv::Mat src1_host(size, CV_32FC2);
+    fill(src1_host, 0, 100);
 
-    declare.in(src_host, WARMUP_RNG);
+    cv::Mat src2_host(size, CV_32FC2);
+    fill(src2_host, 0, 100);
 
-    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat src1(src1_host);
+    cv::gpu::GpuMat src2(src2_host);
     cv::gpu::GpuMat dst;
 
+    cv::gpu::mulAndScaleSpectrums(src1, src2, dst, cv::DFT_ROWS, scale, false);
+
     TEST_CYCLE()
     {
-        cv::gpu::sqrIntegral(src, dst);
+        cv::gpu::mulAndScaleSpectrums(src1, src2, dst, cv::DFT_ROWS, scale, false);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, IntegralSqr, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES));
+INSTANTIATE_TEST_CASE_P(ImgProc, MulAndScaleSpectrums, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES));
 
 //////////////////////////////////////////////////////////////////////
-// ColumnSum
+// Dft
 
-GPU_PERF_TEST(ColumnSum, cv::gpu::DeviceInfo, cv::Size)
+GPU_PERF_TEST(Dft, cv::gpu::DeviceInfo, cv::Size, DftFlags)
 {
     cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-
     cv::gpu::setDevice(devInfo.deviceID());
 
-    cv::Mat src_host(size, CV_32FC1);
+    cv::Size size = GET_PARAM(1);
+    int flag = GET_PARAM(2);
 
-    declare.in(src_host, WARMUP_RNG);
+    cv::Mat src_host(size, CV_32FC2);
+    fill(src_host, 0, 100);
 
     cv::gpu::GpuMat src(src_host);
     cv::gpu::GpuMat dst;
 
+    cv::gpu::dft(src, dst, size, flag);
+
+    declare.time(2.0);
+
     TEST_CYCLE()
     {
-        cv::gpu::columnSum(src, dst);
+        cv::gpu::dft(src, dst, size, flag);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, ColumnSum, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES));
+INSTANTIATE_TEST_CASE_P(ImgProc, Dft, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(DftFlags(0), DftFlags(cv::DFT_ROWS), DftFlags(cv::DFT_INVERSE))));
 
 //////////////////////////////////////////////////////////////////////
 // CornerHarris
 
-GPU_PERF_TEST(CornerHarris, cv::gpu::DeviceInfo, perf::MatType)
+IMPLEMENT_PARAM_CLASS(BlockSize, int)
+IMPLEMENT_PARAM_CLASS(ApertureSize, int)
+
+GPU_PERF_TEST(CornerHarris, cv::gpu::DeviceInfo, MatType, BorderMode, BlockSize, ApertureSize)
 {
     cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    int type = GET_PARAM(1);
-
     cv::gpu::setDevice(devInfo.deviceID());
 
+    int type = GET_PARAM(1);
+    int borderType = GET_PARAM(2);
+    int blockSize = GET_PARAM(3);
+    int apertureSize = GET_PARAM(4);
+
     cv::Mat img = readImage("gpu/stereobm/aloe-L.png", cv::IMREAD_GRAYSCALE);
     ASSERT_FALSE(img.empty());
 
@@ -616,31 +857,38 @@ GPU_PERF_TEST(CornerHarris, cv::gpu::DeviceInfo, perf::MatType)
     cv::gpu::GpuMat dst;
     cv::gpu::GpuMat Dx;
     cv::gpu::GpuMat Dy;
+    cv::gpu::GpuMat buf;
 
-    int blockSize = 3;
-    int ksize = 7;
     double k = 0.5;
 
+    cv::gpu::cornerHarris(src, dst, Dx, Dy, buf, blockSize, apertureSize, k, borderType);
+
     TEST_CYCLE()
     {
-        cv::gpu::cornerHarris(src, dst, Dx, Dy, blockSize, ksize, k);
+        cv::gpu::cornerHarris(src, dst, Dx, Dy, buf, blockSize, apertureSize, k, borderType);
     }
 }
 
 INSTANTIATE_TEST_CASE_P(ImgProc, CornerHarris, testing::Combine(
-                        ALL_DEVICES,
-                        testing::Values(CV_8UC1, CV_32FC1)));
+    ALL_DEVICES,
+    testing::Values(MatType(CV_8UC1), MatType(CV_32FC1)),
+    testing::Values(BorderMode(cv::BORDER_REFLECT101), BorderMode(cv::BORDER_REPLICATE), BorderMode(cv::BORDER_REFLECT)),
+    testing::Values(BlockSize(3), BlockSize(5), BlockSize(7)),
+    testing::Values(ApertureSize(0), ApertureSize(3), ApertureSize(5), ApertureSize(7))));
 
 //////////////////////////////////////////////////////////////////////
 // CornerMinEigenVal
 
-GPU_PERF_TEST(CornerMinEigenVal, cv::gpu::DeviceInfo, perf::MatType)
+GPU_PERF_TEST(CornerMinEigenVal, cv::gpu::DeviceInfo, MatType, BorderMode, BlockSize, ApertureSize)
 {
     cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    int type = GET_PARAM(1);
-
     cv::gpu::setDevice(devInfo.deviceID());
 
+    int type = GET_PARAM(1);
+    int borderType = GET_PARAM(2);
+    int blockSize = GET_PARAM(3);
+    int apertureSize = GET_PARAM(4);
+
     cv::Mat img = readImage("gpu/stereobm/aloe-L.png", cv::IMREAD_GRAYSCALE);
     ASSERT_FALSE(img.empty());
 
@@ -650,337 +898,344 @@ GPU_PERF_TEST(CornerMinEigenVal, cv::gpu::DeviceInfo, perf::MatType)
     cv::gpu::GpuMat dst;
     cv::gpu::GpuMat Dx;
     cv::gpu::GpuMat Dy;
+    cv::gpu::GpuMat buf;
 
-    int blockSize = 3;
-    int ksize = 7;
+    cv::gpu::cornerMinEigenVal(src, dst, Dx, Dy, buf, blockSize, apertureSize, borderType);
 
     TEST_CYCLE()
     {
-        cv::gpu::cornerMinEigenVal(src, dst, Dx, Dy, blockSize, ksize);
+        cv::gpu::cornerMinEigenVal(src, dst, Dx, Dy, buf, blockSize, apertureSize, borderType);
     }
 }
 
 INSTANTIATE_TEST_CASE_P(ImgProc, CornerMinEigenVal, testing::Combine(
-                        ALL_DEVICES,
-                        testing::Values(CV_8UC1, CV_32FC1)));
+    ALL_DEVICES,
+    testing::Values(MatType(CV_8UC1), MatType(CV_32FC1)),
+    testing::Values(BorderMode(cv::BORDER_REFLECT101), BorderMode(cv::BORDER_REPLICATE), BorderMode(cv::BORDER_REFLECT)),
+    testing::Values(BlockSize(3), BlockSize(5), BlockSize(7)),
+    testing::Values(ApertureSize(0), ApertureSize(3), ApertureSize(5), ApertureSize(7))));
 
 //////////////////////////////////////////////////////////////////////
-// MulSpectrums
+// BuildWarpPlaneMaps
 
-GPU_PERF_TEST(MulSpectrums, cv::gpu::DeviceInfo, cv::Size)
+GPU_PERF_TEST(BuildWarpPlaneMaps, cv::gpu::DeviceInfo, cv::Size)
 {
     cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-
     cv::gpu::setDevice(devInfo.deviceID());
 
-    cv::Mat a_host(size, CV_32FC2);
-    cv::Mat b_host(size, CV_32FC2);
+    cv::Size size = GET_PARAM(1);
 
-    declare.in(a_host, b_host, WARMUP_RNG);
+    cv::Mat K = cv::Mat::eye(3, 3, CV_32FC1);
+    cv::Mat R = cv::Mat::ones(3, 3, CV_32FC1);
+    cv::Mat T = cv::Mat::zeros(1, 3, CV_32F);
+    cv::gpu::GpuMat map_x;
+    cv::gpu::GpuMat map_y;
 
-    cv::gpu::GpuMat a(a_host);
-    cv::gpu::GpuMat b(b_host);
-    cv::gpu::GpuMat dst;
+    cv::gpu::buildWarpPlaneMaps(size, cv::Rect(0, 0, size.width, size.height), K, R, T, 1.0, map_x, map_y);
 
     TEST_CYCLE()
     {
-        cv::gpu::mulSpectrums(a, b, dst, 0);
+        cv::gpu::buildWarpPlaneMaps(size, cv::Rect(0, 0, size.width, size.height), K, R, T, 1.0, map_x, map_y);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, MulSpectrums, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES));
+INSTANTIATE_TEST_CASE_P(ImgProc, BuildWarpPlaneMaps, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES));
 
 //////////////////////////////////////////////////////////////////////
-// Dft
+// BuildWarpCylindricalMaps
 
-GPU_PERF_TEST(Dft, cv::gpu::DeviceInfo, cv::Size)
+GPU_PERF_TEST(BuildWarpCylindricalMaps, cv::gpu::DeviceInfo, cv::Size)
 {
     cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-
     cv::gpu::setDevice(devInfo.deviceID());
 
-    cv::Mat src_host(size, CV_32FC2);
-
-    declare.in(src_host, WARMUP_RNG);
+    cv::Size size = GET_PARAM(1);
 
-    cv::gpu::GpuMat src(src_host);
-    cv::gpu::GpuMat dst;
+    cv::Mat K = cv::Mat::eye(3, 3, CV_32FC1);
+    cv::Mat R = cv::Mat::ones(3, 3, CV_32FC1);
+    cv::gpu::GpuMat map_x;
+    cv::gpu::GpuMat map_y;
 
-    declare.time(2.0);
+    cv::gpu::buildWarpCylindricalMaps(size, cv::Rect(0, 0, size.width, size.height), K, R, 1.0, map_x, map_y);
 
     TEST_CYCLE()
     {
-        cv::gpu::dft(src, dst, size);
+        cv::gpu::buildWarpCylindricalMaps(size, cv::Rect(0, 0, size.width, size.height), K, R, 1.0, map_x, map_y);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, Dft, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES));
+INSTANTIATE_TEST_CASE_P(ImgProc, BuildWarpCylindricalMaps, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES));
 
 //////////////////////////////////////////////////////////////////////
-// Convolve
+// BuildWarpSphericalMaps
 
-GPU_PERF_TEST(Convolve, cv::gpu::DeviceInfo, cv::Size, int, bool)
+GPU_PERF_TEST(BuildWarpSphericalMaps, cv::gpu::DeviceInfo, cv::Size)
 {
     cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int templ_size = GET_PARAM(2);
-    bool ccorr = GET_PARAM(3);
-
     cv::gpu::setDevice(devInfo.deviceID());
 
-    cv::gpu::GpuMat image = cv::gpu::createContinuous(size, CV_32FC1);
-    cv::gpu::GpuMat templ = cv::gpu::createContinuous(templ_size, templ_size, CV_32FC1);
-
-    image.setTo(cv::Scalar(1.0));
-    templ.setTo(cv::Scalar(1.0));
+    cv::Size size = GET_PARAM(1);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::ConvolveBuf buf;
+    cv::Mat K = cv::Mat::eye(3, 3, CV_32FC1);
+    cv::Mat R = cv::Mat::ones(3, 3, CV_32FC1);
+    cv::gpu::GpuMat map_x;
+    cv::gpu::GpuMat map_y;
 
-    declare.time(2.0);
+    cv::gpu::buildWarpSphericalMaps(size, cv::Rect(0, 0, size.width, size.height), K, R, 1.0, map_x, map_y);
 
     TEST_CYCLE()
     {
-        cv::gpu::convolve(image, templ, dst, ccorr, buf);
+        cv::gpu::buildWarpSphericalMaps(size, cv::Rect(0, 0, size.width, size.height), K, R, 1.0, map_x, map_y);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, Convolve, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(3, 9, 27, 32, 64),
-                        testing::Bool()));
+INSTANTIATE_TEST_CASE_P(ImgProc, BuildWarpSphericalMaps, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES));
 
 //////////////////////////////////////////////////////////////////////
-// PyrDown
+// Rotate
 
-GPU_PERF_TEST(PyrDown, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+GPU_PERF_TEST(Rotate, cv::gpu::DeviceInfo, cv::Size, MatType, Interpolation)
 {
     cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
     cv::Size size = GET_PARAM(1);
     int type = GET_PARAM(2);
-
-    cv::gpu::setDevice(devInfo.deviceID());
+    int interpolation = GET_PARAM(3);
 
     cv::Mat src_host(size, type);
-
-    declare.in(src_host, WARMUP_RNG);
+    fill(src_host, 0, 255);
 
     cv::gpu::GpuMat src(src_host);
     cv::gpu::GpuMat dst;
 
+    cv::gpu::rotate(src, dst, size, 30.0, 0, 0, interpolation);
+
     TEST_CYCLE()
     {
-        cv::gpu::pyrDown(src, dst);
+        cv::gpu::rotate(src, dst, size, 30.0, 0, 0, interpolation);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, PyrDown, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_8UC4, CV_16SC3, CV_32FC1)));
+INSTANTIATE_TEST_CASE_P(ImgProc, Rotate, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
+                    MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
+                    MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
+    testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC))));
 
 //////////////////////////////////////////////////////////////////////
-// PyrUp
+// PyrDown
 
-GPU_PERF_TEST(PyrUp, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+GPU_PERF_TEST(PyrDown, cv::gpu::DeviceInfo, cv::Size, MatType)
 {
     cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
     cv::Size size = GET_PARAM(1);
     int type = GET_PARAM(2);
 
-    cv::gpu::setDevice(devInfo.deviceID());
-
     cv::Mat src_host(size, type);
-
-    declare.in(src_host, WARMUP_RNG);
+    fill(src_host, 0, 255);
 
     cv::gpu::GpuMat src(src_host);
     cv::gpu::GpuMat dst;
 
+    cv::gpu::pyrDown(src, dst);
+
     TEST_CYCLE()
     {
-        cv::gpu::pyrUp(src, dst);
+        cv::gpu::pyrDown(src, dst);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, PyrUp, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_8UC4, CV_16SC3, CV_32FC1)));
+INSTANTIATE_TEST_CASE_P(ImgProc, PyrDown, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
+                    MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
+                    MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4))));
 
 //////////////////////////////////////////////////////////////////////
-// BlendLinear
+// PyrUp
 
-GPU_PERF_TEST(BlendLinear, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+GPU_PERF_TEST(PyrUp, cv::gpu::DeviceInfo, cv::Size, MatType)
 {
     cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
     cv::gpu::setDevice(devInfo.deviceID());
 
-    cv::Mat img1_host(size, type);
-    cv::Mat img2_host(size, type);
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
 
-    declare.in(img1_host, img2_host, WARMUP_RNG);
+    cv::Mat src_host(size, type);
+    fill(src_host, 0, 255);
 
-    cv::gpu::GpuMat img1(img1_host);
-    cv::gpu::GpuMat img2(img2_host);
-    cv::gpu::GpuMat weights1(size, CV_32FC1, cv::Scalar::all(0.5));
-    cv::gpu::GpuMat weights2(size, CV_32FC1, cv::Scalar::all(0.5));
+    cv::gpu::GpuMat src(src_host);
     cv::gpu::GpuMat dst;
 
+    cv::gpu::pyrUp(src, dst);
+
     TEST_CYCLE()
     {
-        cv::gpu::blendLinear(img1, img2, weights1, weights2, dst);
+        cv::gpu::pyrUp(src, dst);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, BlendLinear, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_32FC1)));
+INSTANTIATE_TEST_CASE_P(ImgProc, PyrUp, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
+                    MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
+                    MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4))));
 
 //////////////////////////////////////////////////////////////////////
-// AlphaComp
+// CvtColor
 
-GPU_PERF_TEST(AlphaComp, cv::gpu::DeviceInfo, cv::Size, perf::MatType, AlphaOp)
+GPU_PERF_TEST(CvtColor, cv::gpu::DeviceInfo, cv::Size, MatDepth, CvtColorInfo)
 {
     cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-    int alpha_op = GET_PARAM(3);
-
     cv::gpu::setDevice(devInfo.deviceID());
 
-    cv::Mat img1_host(size, type);
-    cv::Mat img2_host(size, type);
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+    CvtColorInfo info = GET_PARAM(3);
 
-    declare.in(img1_host, img2_host, WARMUP_RNG);
+    cv::Mat src_host(size, CV_MAKETYPE(depth, info.scn));
+    fill(src_host, 0, 255);
 
-    cv::gpu::GpuMat img1(img1_host);
-    cv::gpu::GpuMat img2(img2_host);
+    cv::gpu::GpuMat src(src_host);
     cv::gpu::GpuMat dst;
 
-    TEST_CYCLE()
-    {
-        cv::gpu::alphaComp(img1, img2, dst, alpha_op);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(ImgProc, AlphaComp, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC4, CV_16UC4, CV_32SC4, CV_32FC4),
-                        testing::Values((int)cv::gpu::ALPHA_OVER, (int)cv::gpu::ALPHA_IN, (int)cv::gpu::ALPHA_OUT, (int)cv::gpu::ALPHA_ATOP, (int)cv::gpu::ALPHA_XOR, (int)cv::gpu::ALPHA_PLUS, (int)cv::gpu::ALPHA_OVER_PREMUL, (int)cv::gpu::ALPHA_IN_PREMUL, (int)cv::gpu::ALPHA_OUT_PREMUL, (int)cv::gpu::ALPHA_ATOP_PREMUL, (int)cv::gpu::ALPHA_XOR_PREMUL, (int)cv::gpu::ALPHA_PLUS_PREMUL, (int)cv::gpu::ALPHA_PREMUL)));
-
-//////////////////////////////////////////////////////////////////////
-// Canny
-
-GPU_PERF_TEST_1(Canny, cv::gpu::DeviceInfo)
-{
-    cv::gpu::DeviceInfo devInfo = GetParam();
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat image_host = readImage("perf/1280x1024.jpg", cv::IMREAD_GRAYSCALE);
-    ASSERT_FALSE(image_host.empty());
-
-    cv::gpu::GpuMat image(image_host);
-    cv::gpu::GpuMat dst;
-    cv::gpu::CannyBuf buf;
+    cv::gpu::cvtColor(src, dst, info.code, info.dcn);
 
     TEST_CYCLE()
     {
-        cv::gpu::Canny(image, buf, dst, 50.0, 100.0);
+        cv::gpu::cvtColor(src, dst, info.code, info.dcn);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, Canny, ALL_DEVICES);
+INSTANTIATE_TEST_CASE_P(ImgProc, CvtColor, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_32F)),
+    testing::Values(CvtColorInfo(4, 4, cv::COLOR_RGBA2BGRA),
+                    CvtColorInfo(4, 1, cv::COLOR_BGRA2GRAY),
+                    CvtColorInfo(1, 4, cv::COLOR_GRAY2BGRA),
+                    CvtColorInfo(3, 3, cv::COLOR_BGR2XYZ),
+                    CvtColorInfo(3, 3, cv::COLOR_XYZ2BGR),
+                    CvtColorInfo(3, 3, cv::COLOR_BGR2YCrCb),
+                    CvtColorInfo(3, 3, cv::COLOR_YCrCb2BGR),
+                    CvtColorInfo(3, 3, cv::COLOR_BGR2YUV),
+                    CvtColorInfo(3, 3, cv::COLOR_YUV2BGR),
+                    CvtColorInfo(3, 3, cv::COLOR_BGR2HSV),
+                    CvtColorInfo(3, 3, cv::COLOR_HSV2BGR),
+                    CvtColorInfo(3, 3, cv::COLOR_BGR2HLS),
+                    CvtColorInfo(3, 3, cv::COLOR_HLS2BGR))));
 
 //////////////////////////////////////////////////////////////////////
-// CalcHist
+// SwapChannels
 
-GPU_PERF_TEST(CalcHist, cv::gpu::DeviceInfo, cv::Size)
+GPU_PERF_TEST(SwapChannels, cv::gpu::DeviceInfo, cv::Size)
 {
     cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-
     cv::gpu::setDevice(devInfo.deviceID());
 
-    cv::Mat src_host(size, CV_8UC1);
+    cv::Size size = GET_PARAM(1);
 
-    declare.in(src_host, WARMUP_RNG);
+    cv::Mat src_host(size, CV_8UC4);
+    fill(src_host, 0, 255);
 
     cv::gpu::GpuMat src(src_host);
-    cv::gpu::GpuMat hist;
-    cv::gpu::GpuMat buf;
+
+    const int dstOrder[] = {2, 1, 0, 3};
+
+    cv::gpu::swapChannels(src, dstOrder);
 
     TEST_CYCLE()
     {
-        cv::gpu::calcHist(src, hist, buf);
+        cv::gpu::swapChannels(src, dstOrder);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, CalcHist, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES));
+INSTANTIATE_TEST_CASE_P(ImgProc, SwapChannels, testing::Combine(ALL_DEVICES, GPU_TYPICAL_MAT_SIZES));
 
 //////////////////////////////////////////////////////////////////////
-// EqualizeHist
+// AlphaComp
 
-GPU_PERF_TEST(EqualizeHist, cv::gpu::DeviceInfo, cv::Size)
+CV_ENUM(AlphaOp, cv::gpu::ALPHA_OVER, cv::gpu::ALPHA_IN, cv::gpu::ALPHA_OUT, cv::gpu::ALPHA_ATOP, cv::gpu::ALPHA_XOR, cv::gpu::ALPHA_PLUS, cv::gpu::ALPHA_OVER_PREMUL, cv::gpu::ALPHA_IN_PREMUL, cv::gpu::ALPHA_OUT_PREMUL, cv::gpu::ALPHA_ATOP_PREMUL, cv::gpu::ALPHA_XOR_PREMUL, cv::gpu::ALPHA_PLUS_PREMUL, cv::gpu::ALPHA_PREMUL)
+
+GPU_PERF_TEST(AlphaComp, cv::gpu::DeviceInfo, cv::Size, MatType, AlphaOp)
 {
     cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-
     cv::gpu::setDevice(devInfo.deviceID());
 
-    cv::Mat src_host(size, CV_8UC1);
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+    int alpha_op = GET_PARAM(3);
 
-    declare.in(src_host, WARMUP_RNG);
+    cv::Mat img1_host(size, type);
+    fill(img1_host, 0, 255);
 
-    cv::gpu::GpuMat src(src_host);
+    cv::Mat img2_host(size, type);
+    fill(img2_host, 0, 255);
+
+    cv::gpu::GpuMat img1(img1_host);
+    cv::gpu::GpuMat img2(img2_host);
     cv::gpu::GpuMat dst;
-    cv::gpu::GpuMat hist;
-    cv::gpu::GpuMat buf;
+
+    cv::gpu::alphaComp(img1, img2, dst, alpha_op);
 
     TEST_CYCLE()
     {
-        cv::gpu::equalizeHist(src, dst, hist, buf);
+        cv::gpu::alphaComp(img1, img2, dst, alpha_op);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, EqualizeHist, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES));
+INSTANTIATE_TEST_CASE_P(ImgProc, AlphaComp, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(MatType(CV_8UC4), MatType(CV_16UC4), MatType(CV_32SC4), MatType(CV_32FC4)),
+    testing::Values(AlphaOp(cv::gpu::ALPHA_OVER),
+                    AlphaOp(cv::gpu::ALPHA_IN),
+                    AlphaOp(cv::gpu::ALPHA_OUT),
+                    AlphaOp(cv::gpu::ALPHA_ATOP),
+                    AlphaOp(cv::gpu::ALPHA_XOR),
+                    AlphaOp(cv::gpu::ALPHA_PLUS),
+                    AlphaOp(cv::gpu::ALPHA_OVER_PREMUL),
+                    AlphaOp(cv::gpu::ALPHA_IN_PREMUL),
+                    AlphaOp(cv::gpu::ALPHA_OUT_PREMUL),
+                    AlphaOp(cv::gpu::ALPHA_ATOP_PREMUL),
+                    AlphaOp(cv::gpu::ALPHA_XOR_PREMUL),
+                    AlphaOp(cv::gpu::ALPHA_PLUS_PREMUL),
+                    AlphaOp(cv::gpu::ALPHA_PREMUL))));
 
 //////////////////////////////////////////////////////////////////////
 // ImagePyramid
 
-GPU_PERF_TEST(ImagePyramid_build, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+GPU_PERF_TEST(ImagePyramid_build, cv::gpu::DeviceInfo, cv::Size, MatType)
 {
     cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
     cv::Size size = GET_PARAM(1);
     int type = GET_PARAM(2);
 
-    cv::gpu::setDevice(devInfo.deviceID());
-
     cv::Mat src_host(size, type);
-
-    declare.in(src_host, WARMUP_RNG);
+    fill(src_host, 0, 255);
 
     cv::gpu::GpuMat src(src_host);
 
     cv::gpu::ImagePyramid pyr;
 
+    pyr.build(src, 5);
+
     TEST_CYCLE()
     {
         pyr.build(src, 5);
@@ -988,27 +1243,30 @@ GPU_PERF_TEST(ImagePyramid_build, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
 }
 
 INSTANTIATE_TEST_CASE_P(ImgProc, ImagePyramid_build, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32FC1, CV_32FC3, CV_32FC4)));
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
+                    MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
+                    MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4))));
 
-GPU_PERF_TEST(ImagePyramid_getLayer, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+GPU_PERF_TEST(ImagePyramid_getLayer, cv::gpu::DeviceInfo, cv::Size, MatType)
 {
     cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
     cv::Size size = GET_PARAM(1);
     int type = GET_PARAM(2);
 
-    cv::gpu::setDevice(devInfo.deviceID());
-
     cv::Mat src_host(size, type);
-
-    declare.in(src_host, WARMUP_RNG);
+    fill(src_host, 0, 255);
 
     cv::gpu::GpuMat src(src_host);
     cv::gpu::GpuMat dst;
 
     cv::gpu::ImagePyramid pyr(src, 3);
 
+    pyr.getLayer(dst, cv::Size(size.width / 2 + 10, size.height / 2 + 10));
+
     TEST_CYCLE()
     {
         pyr.getLayer(dst, cv::Size(size.width / 2 + 10, size.height / 2 + 10));
@@ -1016,77 +1274,10 @@ GPU_PERF_TEST(ImagePyramid_getLayer, cv::gpu::DeviceInfo, cv::Size, perf::MatTyp
 }
 
 INSTANTIATE_TEST_CASE_P(ImgProc, ImagePyramid_getLayer, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32FC1, CV_32FC3, CV_32FC4)));
-
-
-
-//////////////////////////////////////////////////////////////////////
-// MulAndScaleSpectrums
-
-GPU_PERF_TEST(MulAndScaleSpectrums, cv::gpu::DeviceInfo, cv::Size)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-        
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    int type = CV_32FC2;
-
-    cv::Mat src1_host(size, type);
-    cv::Mat src2_host(size, type);
-    declare.in(src1_host, src2_host, WARMUP_RNG);
-
-    cv::gpu::GpuMat src1(src1_host);
-    cv::gpu::GpuMat src2(src2_host);
-    cv::gpu::GpuMat dst(size, type);
-    
-    TEST_CYCLE()
-    {        
-        cv::gpu::mulSpectrums(src1, src2, dst, cv::DFT_ROWS, false);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(ImgProc, MulAndScaleSpectrums, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES));
-
-
-
-//////////////////////////////////////////////////////////////////////
-// MulAndScaleSpectrumsScale
-
-
-GPU_PERF_TEST(MulAndScaleSpectrumsScale, cv::gpu::DeviceInfo, cv::Size)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-
-    float scale = 1.f / size.area();
-    int type = CV_32FC2;
-    
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    cv::Mat src1_host(size, type);
-    cv::Mat src2_host(size, type);
-    declare.in(src1_host, src2_host, WARMUP_RNG);
-
-    cv::gpu::GpuMat src1(src1_host);
-    cv::gpu::GpuMat src2(src2_host);
-    cv::gpu::GpuMat dst(size, type);
-    
-    TEST_CYCLE()
-    {        
-        cv::gpu::mulAndScaleSpectrums(src1, src2, dst, cv::DFT_ROWS, scale, false);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(ImgProc, MulAndScaleSpectrumsScale, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES));
-
-
-
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
+                    MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
+                    MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4))));
 
 #endif
diff --git a/modules/gpu/perf/perf_matop.cpp b/modules/gpu/perf/perf_matop.cpp
index f56dbf3e5f..5cc24402c0 100644
--- a/modules/gpu/perf/perf_matop.cpp
+++ b/modules/gpu/perf/perf_matop.cpp
@@ -3,79 +3,21 @@
 #ifdef HAVE_CUDA
 
 //////////////////////////////////////////////////////////////////////
-// Merge
+// SetTo
 
-GPU_PERF_TEST(Merge, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+GPU_PERF_TEST(SetTo, cv::gpu::DeviceInfo, cv::Size, MatType)
 {
     cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
     cv::gpu::setDevice(devInfo.deviceID());
 
-    const int num_channels = 4;
-
-    std::vector<cv::gpu::GpuMat> src(num_channels);
-    for (int i = 0; i < num_channels; ++i)
-        src[i] = cv::gpu::GpuMat(size, type, cv::Scalar::all(i)); 
-
-    cv::gpu::GpuMat dst;
-
-    TEST_CYCLE()
-    {
-        cv::gpu::merge(src, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(MatOp, Merge, testing::Combine(
-                        ALL_DEVICES, 
-                        GPU_TYPICAL_MAT_SIZES, 
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1)));
-
-//////////////////////////////////////////////////////////////////////
-// Split
-
-GPU_PERF_TEST(Split, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
     cv::Size size = GET_PARAM(1);
     int type = GET_PARAM(2);
 
-    cv::gpu::setDevice(devInfo.deviceID());
-
-    const int num_channels = 4;
-
-    cv::gpu::GpuMat src(size, CV_MAKETYPE(type, num_channels), cv::Scalar(1, 2, 3, 4));
-
-    std::vector<cv::gpu::GpuMat> dst(num_channels);
-    for (int i = 0; i < num_channels; ++i)
-        dst[i] = cv::gpu::GpuMat(size, type); 
-
-    TEST_CYCLE()
-    {
-        cv::gpu::split(src, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(MatOp, Split, testing::Combine(
-                        ALL_DEVICES, 
-                        GPU_TYPICAL_MAT_SIZES, 
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1)));
-
-//////////////////////////////////////////////////////////////////////
-// SetTo
-
-GPU_PERF_TEST(SetTo, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::gpu::setDevice(devInfo.deviceID());
-
     cv::gpu::GpuMat src(size, type);
     cv::Scalar val(1, 2, 3, 4);
 
+    src.setTo(val);
+
     TEST_CYCLE()
     {
         src.setTo(val);
@@ -83,31 +25,36 @@ GPU_PERF_TEST(SetTo, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
 }
 
 INSTANTIATE_TEST_CASE_P(MatOp, SetTo, testing::Combine(
-                        ALL_DEVICES, 
-                        GPU_TYPICAL_MAT_SIZES, 
-                        testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32FC1, CV_32FC3, CV_32FC4)));
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
+                    MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
+                    MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4),
+                    MatType(CV_64FC1), MatType(CV_64FC3), MatType(CV_64FC4))));
 
 //////////////////////////////////////////////////////////////////////
 // SetToMasked
 
-GPU_PERF_TEST(SetToMasked, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+GPU_PERF_TEST(SetToMasked, cv::gpu::DeviceInfo, cv::Size, MatType)
 {
     cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
     cv::Size size = GET_PARAM(1);
     int type = GET_PARAM(2);
 
-    cv::gpu::setDevice(devInfo.deviceID());
-
     cv::Mat src_host(size, type);
-    cv::Mat mask_host(size, CV_8UC1);
+    fill(src_host, 0, 255);
 
-    declare.in(src_host, WARMUP_RNG);
+    cv::Mat mask_host(size, CV_8UC1);
     fill(mask_host, 0, 2);
 
     cv::gpu::GpuMat src(src_host);
     cv::Scalar val(1, 2, 3, 4);
     cv::gpu::GpuMat mask(mask_host);
-    
+
+    src.setTo(val, mask);
+
     TEST_CYCLE()
     {
         src.setTo(val, mask);
@@ -115,31 +62,36 @@ GPU_PERF_TEST(SetToMasked, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
 }
 
 INSTANTIATE_TEST_CASE_P(MatOp, SetToMasked, testing::Combine(
-                        ALL_DEVICES, 
-                        GPU_TYPICAL_MAT_SIZES, 
-                        testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32FC1, CV_32FC3, CV_32FC4)));
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
+                    MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
+                    MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4),
+                    MatType(CV_64FC1), MatType(CV_64FC3), MatType(CV_64FC4))));
 
 //////////////////////////////////////////////////////////////////////
 // CopyToMasked
 
-GPU_PERF_TEST(CopyToMasked, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+GPU_PERF_TEST(CopyToMasked, cv::gpu::DeviceInfo, cv::Size, MatType)
 {
     cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
     cv::Size size = GET_PARAM(1);
     int type = GET_PARAM(2);
 
-    cv::gpu::setDevice(devInfo.deviceID());
-
     cv::Mat src_host(size, type);
-    cv::Mat mask_host(size, CV_8UC1);
+    fill(src_host, 0, 255);
 
-    declare.in(src_host, WARMUP_RNG);
+    cv::Mat mask_host(size, CV_8UC1);
     fill(mask_host, 0, 2);
 
     cv::gpu::GpuMat src(src_host);
     cv::gpu::GpuMat mask(mask_host);
     cv::gpu::GpuMat dst;
-    
+
+    src.copyTo(dst, mask);
+
     TEST_CYCLE()
     {
         src.copyTo(dst, mask);
@@ -147,39 +99,43 @@ GPU_PERF_TEST(CopyToMasked, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
 }
 
 INSTANTIATE_TEST_CASE_P(MatOp, CopyToMasked, testing::Combine(
-                        ALL_DEVICES, 
-                        GPU_TYPICAL_MAT_SIZES, 
-                        testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32FC1, CV_32FC3, CV_32FC4)));
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
+                    MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
+                    MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4),
+                    MatType(CV_64FC1), MatType(CV_64FC3), MatType(CV_64FC4))));
 
 //////////////////////////////////////////////////////////////////////
 // ConvertTo
 
-GPU_PERF_TEST(ConvertTo, cv::gpu::DeviceInfo, cv::Size, perf::MatType, perf::MatType)
+GPU_PERF_TEST(ConvertTo, cv::gpu::DeviceInfo, cv::Size, MatDepth, MatDepth)
 {
     cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::Size size = GET_PARAM(1);
-    int type1 = GET_PARAM(2);
-    int type2 = GET_PARAM(3);
-
     cv::gpu::setDevice(devInfo.deviceID());
 
-    cv::Mat src_host(size, type1);
+    cv::Size size = GET_PARAM(1);
+    int depth1 = GET_PARAM(2);
+    int depth2 = GET_PARAM(3);
 
-    declare.in(src_host, WARMUP_RNG);
+    cv::Mat src_host(size, depth1);
+    fill(src_host, 0, 255);
 
     cv::gpu::GpuMat src(src_host);
     cv::gpu::GpuMat dst;
-    
+
+    src.convertTo(dst, depth2, 0.5, 1.0);
+
     TEST_CYCLE()
     {
-        src.convertTo(dst, type2, 0.5, 1.0);
+        src.convertTo(dst, depth2, 0.5, 1.0);
     }
 }
 
 INSTANTIATE_TEST_CASE_P(MatOp, ConvertTo, testing::Combine(
-                        ALL_DEVICES, 
-                        GPU_TYPICAL_MAT_SIZES, 
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1), 
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1)));
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_32F), MatDepth(CV_64F)),
+    testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_32F), MatDepth(CV_64F))));
 
 #endif
diff --git a/modules/gpu/perf/perf_objdetect.cpp b/modules/gpu/perf/perf_objdetect.cpp
index aa1efd9c81..0e0c934412 100644
--- a/modules/gpu/perf/perf_objdetect.cpp
+++ b/modules/gpu/perf/perf_objdetect.cpp
@@ -2,13 +2,16 @@
 
 #ifdef HAVE_CUDA
 
+///////////////////////////////////////////////////////////////
+// HOG
+
 GPU_PERF_TEST_1(HOG, cv::gpu::DeviceInfo)
 {
     cv::gpu::DeviceInfo devInfo = GetParam();
-
     cv::gpu::setDevice(devInfo.deviceID());
 
     cv::Mat img_host = readImage("gpu/hog/road.png", cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(img_host.empty());
 
     cv::gpu::GpuMat img(img_host);
     std::vector<cv::Rect> found_locations;
@@ -16,6 +19,8 @@ GPU_PERF_TEST_1(HOG, cv::gpu::DeviceInfo)
     cv::gpu::HOGDescriptor hog;
     hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
 
+    hog.detectMultiScale(img, found_locations);
+
     TEST_CYCLE()
     {
         hog.detectMultiScale(img, found_locations);
@@ -24,6 +29,8 @@ GPU_PERF_TEST_1(HOG, cv::gpu::DeviceInfo)
 
 INSTANTIATE_TEST_CASE_P(ObjDetect, HOG, ALL_DEVICES);
 
+///////////////////////////////////////////////////////////////
+// HaarClassifier
 
 GPU_PERF_TEST_1(HaarClassifier, cv::gpu::DeviceInfo)
 {
@@ -31,14 +38,16 @@ GPU_PERF_TEST_1(HaarClassifier, cv::gpu::DeviceInfo)
     cv::gpu::setDevice(devInfo.deviceID());
 
     cv::Mat img_host = readImage("gpu/haarcascade/group_1_640x480_VGA.pgm", cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(img_host.empty());
 
     cv::gpu::CascadeClassifier_GPU cascade;
 
-    if (!cascade.load("haarcascade_frontalface_alt.xml"))
-        CV_Error(0, "Can't load cascade");
+    ASSERT_TRUE(cascade.load(perf::TestBase::getDataPath("gpu/perf/haarcascade_frontalface_alt.xml")));
 
     cv::gpu::GpuMat img(img_host);
-    cv::gpu::GpuMat objects_buffer(1, 100, cv::DataType<cv::Rect>::type);
+    cv::gpu::GpuMat objects_buffer;
+
+    cascade.detectMultiScale(img, objects_buffer);
 
     TEST_CYCLE()
     {
@@ -48,7 +57,4 @@ GPU_PERF_TEST_1(HaarClassifier, cv::gpu::DeviceInfo)
 
 INSTANTIATE_TEST_CASE_P(ObjDetect, HaarClassifier, ALL_DEVICES);
 
-
-
-
 #endif
diff --git a/modules/gpu/perf/perf_precomp.hpp b/modules/gpu/perf/perf_precomp.hpp
index ef2839be51..21532ab52f 100644
--- a/modules/gpu/perf/perf_precomp.hpp
+++ b/modules/gpu/perf/perf_precomp.hpp
@@ -3,12 +3,17 @@
 
 #include <cstdio>
 #include <iostream>
+
 #include "cvconfig.h"
+
 #include "opencv2/ts/ts.hpp"
 #include "opencv2/ts/ts_perf.hpp"
+
 #include "opencv2/core/core.hpp"
 #include "opencv2/highgui/highgui.hpp"
 #include "opencv2/gpu/gpu.hpp"
+#include "opencv2/calib3d/calib3d.hpp"
+
 #include "perf_utility.hpp"
 
 #if GTEST_CREATE_SHARED_LIBRARY
diff --git a/modules/gpu/perf/perf_utility.cpp b/modules/gpu/perf/perf_utility.cpp
index c0b2fadade..c54d2ace66 100644
--- a/modules/gpu/perf/perf_utility.cpp
+++ b/modules/gpu/perf/perf_utility.cpp
@@ -7,12 +7,12 @@ using namespace cv::gpu;
 void fill(Mat& m, double a, double b)
 {
     RNG rng(123456789);
-    rng.fill(m, RNG::UNIFORM, a, b);
+    rng.fill(m, RNG::UNIFORM, Scalar::all(a), Scalar::all(b));
 }
 
 void PrintTo(const CvtColorInfo& info, ostream* os)
 {
-    static const char* str[] = 
+    static const char* str[] =
     {
         "BGR2BGRA",
         "BGRA2BGR",
@@ -89,7 +89,7 @@ void PrintTo(const CvtColorInfo& info, ostream* os)
         0,
         0,
         0,
-        
+
         "HLS2BGR",
         "HLS2RGB",
 
@@ -131,7 +131,7 @@ void PrintTo(const CvtColorInfo& info, ostream* os)
         0,
         0,
         0,
-        0 
+        0
     };
 
     *os << str[info.code];
@@ -147,11 +147,6 @@ Mat readImage(const string& fileName, int flags)
     return imread(perf::TestBase::getDataPath(fileName), flags);
 }
 
-bool supportFeature(const DeviceInfo& info, FeatureSet feature)
-{
-    return TargetArchs::builtWith(feature) && info.supports(feature);
-}
-
 const vector<DeviceInfo>& devices()
 {
     static vector<DeviceInfo> devs;
@@ -175,27 +170,3 @@ const vector<DeviceInfo>& devices()
 
     return devs;
 }
-
-vector<DeviceInfo> devices(FeatureSet feature)
-{
-    const vector<DeviceInfo>& d = devices();
-    
-    vector<DeviceInfo> devs_filtered;
-
-    if (TargetArchs::builtWith(feature))
-    {
-        devs_filtered.reserve(d.size());
-
-        for (size_t i = 0, size = d.size(); i < size; ++i)
-        {
-            const DeviceInfo& info = d[i];
-
-            if (info.supports(feature))
-                devs_filtered.push_back(info);
-        }
-    }
-
-    return devs_filtered;
-}
-
-
diff --git a/modules/gpu/perf/perf_utility.hpp b/modules/gpu/perf/perf_utility.hpp
index f6336c8e20..6bfc9c8a71 100644
--- a/modules/gpu/perf/perf_utility.hpp
+++ b/modules/gpu/perf/perf_utility.hpp
@@ -3,17 +3,23 @@
 
 void fill(cv::Mat& m, double a, double b);
 
-enum {HORIZONTAL_AXIS = 0, VERTICAL_AXIS = 1, BOTH_AXIS = -1};
-
 using perf::MatType;
 using perf::MatDepth;
 
 CV_ENUM(BorderMode, cv::BORDER_REFLECT101, cv::BORDER_REPLICATE, cv::BORDER_CONSTANT, cv::BORDER_REFLECT, cv::BORDER_WRAP)
-CV_ENUM(FlipCode, HORIZONTAL_AXIS, VERTICAL_AXIS, BOTH_AXIS)
 CV_ENUM(Interpolation, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC)
-CV_ENUM(MatchMethod, cv::TM_SQDIFF, cv::TM_SQDIFF_NORMED, cv::TM_CCORR, cv::TM_CCORR_NORMED, cv::TM_CCOEFF, cv::TM_CCOEFF_NORMED)
-CV_ENUM(NormType, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2)
-CV_ENUM(AlphaOp, cv::gpu::ALPHA_OVER, cv::gpu::ALPHA_IN, cv::gpu::ALPHA_OUT, cv::gpu::ALPHA_ATOP, cv::gpu::ALPHA_XOR, cv::gpu::ALPHA_PLUS, cv::gpu::ALPHA_OVER_PREMUL, cv::gpu::ALPHA_IN_PREMUL, cv::gpu::ALPHA_OUT_PREMUL, cv::gpu::ALPHA_ATOP_PREMUL, cv::gpu::ALPHA_XOR_PREMUL, cv::gpu::ALPHA_PLUS_PREMUL, cv::gpu::ALPHA_PREMUL)
+CV_ENUM(NormType, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2, cv::NORM_HAMMING)
+
+struct CvtColorInfo
+{
+    int scn;
+    int dcn;
+    int code;
+
+    explicit CvtColorInfo(int scn_=0, int dcn_=0, int code_=0) : scn(scn_), dcn(dcn_), code(code_) {}
+};
+
+void PrintTo(const CvtColorInfo& info, std::ostream* os);
 
 #define IMPLEMENT_PARAM_CLASS(name, type) \
     class name \
@@ -26,19 +32,10 @@ CV_ENUM(AlphaOp, cv::gpu::ALPHA_OVER, cv::gpu::ALPHA_IN, cv::gpu::ALPHA_OUT, cv:
     }; \
     inline void PrintTo( name param, std::ostream* os) \
     { \
-        *os << #name <<  "(" << testing::PrintToString(static_cast< type >(param)) << ")"; \
+        *os << #name <<  " = " << testing::PrintToString(static_cast< type >(param)); \
     }
 
-struct CvtColorInfo
-{
-    int scn;
-    int dcn;
-    int code;
-
-    explicit CvtColorInfo(int scn_=0, int dcn_=0, int code_=0) : scn(scn_), dcn(dcn_), code(code_) {}
-};
-
-void PrintTo(const CvtColorInfo& info, std::ostream* os);
+IMPLEMENT_PARAM_CLASS(Channels, int)
 
 namespace cv { namespace gpu
 {
@@ -71,14 +68,9 @@ namespace cv { namespace gpu
 
 cv::Mat readImage(const std::string& fileName, int flags = cv::IMREAD_COLOR);
 
-bool supportFeature(const cv::gpu::DeviceInfo& info, cv::gpu::FeatureSet feature);
-
 const std::vector<cv::gpu::DeviceInfo>& devices();
 
-std::vector<cv::gpu::DeviceInfo> devices(cv::gpu::FeatureSet feature);
-
 #define ALL_DEVICES testing::ValuesIn(devices())
-#define DEVICES(feature) testing::ValuesIn(devices(feature))
 
 #define GET_PARAM(k) std::tr1::get< k >(GetParam())
 
diff --git a/modules/gpu/perf/perf_video.cpp b/modules/gpu/perf/perf_video.cpp
index ff80aabb63..6d779d662c 100644
--- a/modules/gpu/perf/perf_video.cpp
+++ b/modules/gpu/perf/perf_video.cpp
@@ -8,13 +8,12 @@
 GPU_PERF_TEST_1(BroxOpticalFlow, cv::gpu::DeviceInfo)
 {
     cv::gpu::DeviceInfo devInfo = GetParam();
-
     cv::gpu::setDevice(devInfo.deviceID());
 
     cv::Mat frame0_host = readImage("gpu/opticalflow/frame0.png", cv::IMREAD_GRAYSCALE);
-    cv::Mat frame1_host = readImage("gpu/opticalflow/frame1.png", cv::IMREAD_GRAYSCALE);
-
     ASSERT_FALSE(frame0_host.empty());
+
+    cv::Mat frame1_host = readImage("gpu/opticalflow/frame1.png", cv::IMREAD_GRAYSCALE);
     ASSERT_FALSE(frame1_host.empty());
 
     frame0_host.convertTo(frame0_host, CV_32FC1, 1.0 / 255.0);
@@ -22,12 +21,14 @@ GPU_PERF_TEST_1(BroxOpticalFlow, cv::gpu::DeviceInfo)
 
     cv::gpu::GpuMat frame0(frame0_host);
     cv::gpu::GpuMat frame1(frame1_host);
-    cv::gpu::GpuMat u; 
+    cv::gpu::GpuMat u;
     cv::gpu::GpuMat v;
 
-    cv::gpu::BroxOpticalFlow d_flow(0.197f /*alpha*/, 50.0f /*gamma*/, 0.8f /*scale_factor*/, 
+    cv::gpu::BroxOpticalFlow d_flow(0.197f /*alpha*/, 50.0f /*gamma*/, 0.8f /*scale_factor*/,
                                     10 /*inner_iterations*/, 77 /*outer_iterations*/, 10 /*solver_iterations*/);
 
+    d_flow(frame0, frame1, u, v);
+
     declare.time(10);
 
     TEST_CYCLE()
@@ -44,13 +45,12 @@ INSTANTIATE_TEST_CASE_P(Video, BroxOpticalFlow, ALL_DEVICES);
 GPU_PERF_TEST_1(InterpolateFrames, cv::gpu::DeviceInfo)
 {
     cv::gpu::DeviceInfo devInfo = GetParam();
-
     cv::gpu::setDevice(devInfo.deviceID());
 
     cv::Mat frame0_host = readImage("gpu/perf/aloe.jpg", cv::IMREAD_GRAYSCALE);
-    cv::Mat frame1_host = readImage("gpu/perf/aloeR.jpg", cv::IMREAD_GRAYSCALE);
-
     ASSERT_FALSE(frame0_host.empty());
+
+    cv::Mat frame1_host = readImage("gpu/perf/aloeR.jpg", cv::IMREAD_GRAYSCALE);
     ASSERT_FALSE(frame1_host.empty());
 
     frame0_host.convertTo(frame0_host, CV_32FC1, 1.0 / 255.0);
@@ -58,18 +58,20 @@ GPU_PERF_TEST_1(InterpolateFrames, cv::gpu::DeviceInfo)
 
     cv::gpu::GpuMat frame0(frame0_host);
     cv::gpu::GpuMat frame1(frame1_host);
-    cv::gpu::GpuMat fu, fv; 
+    cv::gpu::GpuMat fu, fv;
     cv::gpu::GpuMat bu, bv;
 
-    cv::gpu::BroxOpticalFlow d_flow(0.197f /*alpha*/, 50.0f /*gamma*/, 0.8f /*scale_factor*/, 
+    cv::gpu::BroxOpticalFlow d_flow(0.197f /*alpha*/, 50.0f /*gamma*/, 0.8f /*scale_factor*/,
                                     10 /*inner_iterations*/, 77 /*outer_iterations*/, 10 /*solver_iterations*/);
-    
+
     d_flow(frame0, frame1, fu, fv);
     d_flow(frame1, frame0, bu, bv);
 
     cv::gpu::GpuMat newFrame;
     cv::gpu::GpuMat buf;
 
+    cv::gpu::interpolateFrames(frame0, frame1, fu, fv, bu, bv, 0.5f, newFrame, buf);
+
     TEST_CYCLE()
     {
         cv::gpu::interpolateFrames(frame0, frame1, fu, fv, bu, bv, 0.5f, newFrame, buf);
@@ -84,13 +86,12 @@ INSTANTIATE_TEST_CASE_P(Video, InterpolateFrames, ALL_DEVICES);
 GPU_PERF_TEST_1(CreateOpticalFlowNeedleMap, cv::gpu::DeviceInfo)
 {
     cv::gpu::DeviceInfo devInfo = GetParam();
-
     cv::gpu::setDevice(devInfo.deviceID());
 
     cv::Mat frame0_host = readImage("gpu/perf/aloe.jpg", cv::IMREAD_GRAYSCALE);
-    cv::Mat frame1_host = readImage("gpu/perf/aloeR.jpg", cv::IMREAD_GRAYSCALE);
-
     ASSERT_FALSE(frame0_host.empty());
+
+    cv::Mat frame1_host = readImage("gpu/perf/aloeR.jpg", cv::IMREAD_GRAYSCALE);
     ASSERT_FALSE(frame1_host.empty());
 
     frame0_host.convertTo(frame0_host, CV_32FC1, 1.0 / 255.0);
@@ -100,13 +101,15 @@ GPU_PERF_TEST_1(CreateOpticalFlowNeedleMap, cv::gpu::DeviceInfo)
     cv::gpu::GpuMat frame1(frame1_host);
     cv::gpu::GpuMat u, v;
 
-    cv::gpu::BroxOpticalFlow d_flow(0.197f /*alpha*/, 50.0f /*gamma*/, 0.8f /*scale_factor*/, 
+    cv::gpu::BroxOpticalFlow d_flow(0.197f /*alpha*/, 50.0f /*gamma*/, 0.8f /*scale_factor*/,
                                     10 /*inner_iterations*/, 77 /*outer_iterations*/, 10 /*solver_iterations*/);
-    
+
     d_flow(frame0, frame1, u, v);
 
     cv::gpu::GpuMat vertex, colors;
 
+    cv::gpu::createOpticalFlowNeedleMap(u, v, vertex, colors);
+
     TEST_CYCLE()
     {
         cv::gpu::createOpticalFlowNeedleMap(u, v, vertex, colors);
@@ -118,15 +121,16 @@ INSTANTIATE_TEST_CASE_P(Video, CreateOpticalFlowNeedleMap, ALL_DEVICES);
 //////////////////////////////////////////////////////
 // GoodFeaturesToTrack
 
-GPU_PERF_TEST(GoodFeaturesToTrack, cv::gpu::DeviceInfo, double)
+IMPLEMENT_PARAM_CLASS(MinDistance, double)
+
+GPU_PERF_TEST(GoodFeaturesToTrack, cv::gpu::DeviceInfo, MinDistance)
 {
     cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
     double minDistance = GET_PARAM(1);
 
-    cv::gpu::setDevice(devInfo.deviceID());
-    
     cv::Mat image_host = readImage("gpu/perf/aloe.jpg", cv::IMREAD_GRAYSCALE);
-
     ASSERT_FALSE(image_host.empty());
 
     cv::gpu::GoodFeaturesToTrackDetector_GPU detector(8000, 0.01, minDistance);
@@ -134,30 +138,38 @@ GPU_PERF_TEST(GoodFeaturesToTrack, cv::gpu::DeviceInfo, double)
     cv::gpu::GpuMat image(image_host);
     cv::gpu::GpuMat pts;
 
+    detector(image, pts);
+
     TEST_CYCLE()
     {
         detector(image, pts);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(Video, GoodFeaturesToTrack, testing::Combine(ALL_DEVICES, testing::Values(0.0, 3.0)));
+INSTANTIATE_TEST_CASE_P(Video, GoodFeaturesToTrack, testing::Combine(
+    ALL_DEVICES,
+    testing::Values(MinDistance(0.0), MinDistance(3.0))));
 
 //////////////////////////////////////////////////////
 // PyrLKOpticalFlowSparse
 
-GPU_PERF_TEST(PyrLKOpticalFlowSparse, cv::gpu::DeviceInfo, bool, int, int)
+IMPLEMENT_PARAM_CLASS(GraySource, bool)
+IMPLEMENT_PARAM_CLASS(Points, int)
+IMPLEMENT_PARAM_CLASS(WinSize, int)
+
+GPU_PERF_TEST(PyrLKOpticalFlowSparse, cv::gpu::DeviceInfo, GraySource, Points, WinSize)
 {
     cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::gpu::setDevice(devInfo.deviceID());
+
     bool useGray = GET_PARAM(1);
     int points = GET_PARAM(2);
     int win_size = GET_PARAM(3);
 
-    cv::gpu::setDevice(devInfo.deviceID());
-    
     cv::Mat frame0_host = readImage("gpu/opticalflow/frame0.png", useGray ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR);
-    cv::Mat frame1_host = readImage("gpu/opticalflow/frame1.png", useGray ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR);
-
     ASSERT_FALSE(frame0_host.empty());
+
+    cv::Mat frame1_host = readImage("gpu/opticalflow/frame1.png", useGray ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR);
     ASSERT_FALSE(frame1_host.empty());
 
     cv::Mat gray_frame;
@@ -179,19 +191,19 @@ GPU_PERF_TEST(PyrLKOpticalFlowSparse, cv::gpu::DeviceInfo, bool, int, int)
     cv::gpu::GpuMat nextPts;
     cv::gpu::GpuMat status;
 
+    pyrLK.sparse(frame0, frame1, pts, nextPts, status);
+
     TEST_CYCLE()
     {
         pyrLK.sparse(frame0, frame1, pts, nextPts, status);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(Video, PyrLKOpticalFlowSparse, testing::Combine
-                        (
-                            ALL_DEVICES, 
-                            testing::Bool(), 
-                            testing::Values(1000, 2000, 4000, 8000), 
-                            testing::Values(17, 21)
-                        ));
+INSTANTIATE_TEST_CASE_P(Video, PyrLKOpticalFlowSparse, testing::Combine(
+    ALL_DEVICES,
+    testing::Values(GraySource(true), GraySource(false)),
+    testing::Values(Points(1000), Points(2000), Points(4000), Points(8000)),
+    testing::Values(WinSize(17), WinSize(21))));
 
 //////////////////////////////////////////////////////
 // PyrLKOpticalFlowDense
@@ -199,22 +211,23 @@ INSTANTIATE_TEST_CASE_P(Video, PyrLKOpticalFlowSparse, testing::Combine
 GPU_PERF_TEST_1(PyrLKOpticalFlowDense, cv::gpu::DeviceInfo)
 {
     cv::gpu::DeviceInfo devInfo = GetParam();
-
     cv::gpu::setDevice(devInfo.deviceID());
 
     cv::Mat frame0_host = readImage("gpu/opticalflow/frame0.png", cv::IMREAD_GRAYSCALE);
-    cv::Mat frame1_host = readImage("gpu/opticalflow/frame1.png", cv::IMREAD_GRAYSCALE);
-
     ASSERT_FALSE(frame0_host.empty());
+
+    cv::Mat frame1_host = readImage("gpu/opticalflow/frame1.png", cv::IMREAD_GRAYSCALE);
     ASSERT_FALSE(frame1_host.empty());
 
     cv::gpu::GpuMat frame0(frame0_host);
     cv::gpu::GpuMat frame1(frame1_host);
-    cv::gpu::GpuMat u; 
+    cv::gpu::GpuMat u;
     cv::gpu::GpuMat v;
 
     cv::gpu::PyrLKOpticalFlow pyrLK;
 
+    pyrLK.dense(frame0, frame1, u, v);
+
     declare.time(10);
 
     TEST_CYCLE()
@@ -225,20 +238,18 @@ GPU_PERF_TEST_1(PyrLKOpticalFlowDense, cv::gpu::DeviceInfo)
 
 INSTANTIATE_TEST_CASE_P(Video, PyrLKOpticalFlowDense, ALL_DEVICES);
 
-
 //////////////////////////////////////////////////////
 // FarnebackOpticalFlowTest
 
 GPU_PERF_TEST_1(FarnebackOpticalFlowTest, cv::gpu::DeviceInfo)
 {
     cv::gpu::DeviceInfo devInfo = GetParam();
-
     cv::gpu::setDevice(devInfo.deviceID());
 
     cv::Mat frame0_host = readImage("gpu/opticalflow/frame0.png", cv::IMREAD_GRAYSCALE);
-    cv::Mat frame1_host = readImage("gpu/opticalflow/frame1.png", cv::IMREAD_GRAYSCALE);
-
     ASSERT_FALSE(frame0_host.empty());
+
+    cv::Mat frame1_host = readImage("gpu/opticalflow/frame1.png", cv::IMREAD_GRAYSCALE);
     ASSERT_FALSE(frame1_host.empty());
 
     cv::gpu::GpuMat frame0(frame0_host);
@@ -246,13 +257,15 @@ GPU_PERF_TEST_1(FarnebackOpticalFlowTest, cv::gpu::DeviceInfo)
     cv::gpu::GpuMat u;
     cv::gpu::GpuMat v;
 
-    cv::gpu::FarnebackOpticalFlow calc;
+    cv::gpu::FarnebackOpticalFlow farneback;
+
+    farneback(frame0, frame1, u, v);
 
     declare.time(10);
 
     TEST_CYCLE()
     {
-        calc(frame0, frame1, u, v);
+        farneback(frame0, frame1, u, v);
     }
 }
 
diff --git a/modules/gpu/perf_cpu/perf_arithm.cpp b/modules/gpu/perf_cpu/perf_arithm.cpp
deleted file mode 100644
index 6fd8919b48..0000000000
--- a/modules/gpu/perf_cpu/perf_arithm.cpp
+++ /dev/null
@@ -1,1117 +0,0 @@
-#include "perf_cpu_precomp.hpp"
-
-#ifdef HAVE_CUDA
-
-//////////////////////////////////////////////////////////////////////
-// Transpose
-
-GPU_PERF_TEST(Transpose, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::Mat src(size, type);
-
-    declare.in(src, WARMUP_RNG);
-
-    cv::Mat dst;
-
-    TEST_CYCLE()
-    {
-        cv::transpose(src, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, Transpose, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_32SC1, CV_64FC1)));
-
-//////////////////////////////////////////////////////////////////////
-// Flip
-
-GPU_PERF_TEST(Flip, cv::gpu::DeviceInfo, cv::Size, perf::MatType, FlipCode)
-{
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-    int flipCode = GET_PARAM(3);
-
-    cv::Mat src(size, type);
-
-    declare.in(src, WARMUP_RNG);
-
-    cv::Mat dst;
-
-    TEST_CYCLE()
-    {
-        cv::flip(src, dst, flipCode);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, Flip, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
-                        testing::Values((int) HORIZONTAL_AXIS, (int) VERTICAL_AXIS, (int) BOTH_AXIS)));
-
-//////////////////////////////////////////////////////////////////////
-// LUT
-
-GPU_PERF_TEST(LUT, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::Mat src(size, type);
-    cv::Mat lut(1, 256, CV_8UC1);
-
-    declare.in(src, lut, WARMUP_RNG);
-
-    cv::Mat dst;
-
-    TEST_CYCLE()
-    {
-        cv::LUT(src, lut, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, LUT, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_8UC3)));
-
-//////////////////////////////////////////////////////////////////////
-// CartToPolar
-
-GPU_PERF_TEST(CartToPolar, cv::gpu::DeviceInfo, cv::Size)
-{
-    cv::Size size = GET_PARAM(1);
-
-    cv::Mat x(size, CV_32FC1);
-    cv::Mat y(size, CV_32FC1);
-
-    fill(x, -100.0, 100.0);
-    fill(y, -100.0, 100.0);
-
-    cv::Mat magnitude;
-    cv::Mat angle;
-
-    TEST_CYCLE()
-    {
-        cv::cartToPolar(x, y, magnitude, angle);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, CartToPolar, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES));
-
-//////////////////////////////////////////////////////////////////////
-// PolarToCart
-
-GPU_PERF_TEST(PolarToCart, cv::gpu::DeviceInfo, cv::Size)
-{
-    cv::Size size = GET_PARAM(1);
-
-    cv::Mat magnitude(size, CV_32FC1);
-    cv::Mat angle(size, CV_32FC1);
-
-    fill(magnitude, 0.0, 100.0);
-    fill(angle, 0.0, 360.0);
-
-    cv::Mat x;
-    cv::Mat y;
-
-    TEST_CYCLE()
-    {
-        cv::polarToCart(magnitude, angle, x, y, true);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, PolarToCart, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES));
-
-//////////////////////////////////////////////////////////////////////
-// AddMat
-
-GPU_PERF_TEST(AddMat, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::Mat src1(size, type);
-    cv::Mat src2(size, type);
-
-    fill(src1, 0.0, 100.0);
-    fill(src2, 0.0, 100.0);
-
-    cv::Mat dst;
-
-    TEST_CYCLE()
-    {
-        cv::add(src1, src2, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, AddMat, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1)));
-
-//////////////////////////////////////////////////////////////////////
-// AddScalar
-
-GPU_PERF_TEST(AddScalar, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::Mat src(size, type);
-
-    fill(src, 0.0, 100.0);
-
-    cv::Scalar s(1,2,3,4);
-    cv::Mat dst;
-
-    TEST_CYCLE()
-    {
-        cv::add(src, s, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, AddScalar, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1)));
-
-
-//////////////////////////////////////////////////////////////////////
-// SubtractMat
-
-GPU_PERF_TEST(SubtractMat, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::Mat src1(size, type);
-    cv::Mat src2(size, type);
-
-    fill(src1, 0.0, 100.0);
-    fill(src2, 0.0, 100.0);
-
-    cv::Mat dst;
-
-    TEST_CYCLE()
-    {
-        cv::subtract(src1, src2, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, SubtractMat, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1)));
-
-//////////////////////////////////////////////////////////////////////
-// SubtractScalar
-
-GPU_PERF_TEST(SubtractScalar, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::Mat src(size, type);
-
-    fill(src, 0.0, 100.0);
-
-    cv::Scalar s(1,2,3,4);
-    cv::Mat dst;
-
-    TEST_CYCLE()
-    {
-        cv::subtract(src, s, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, SubtractScalar, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1)));
-
-
-//////////////////////////////////////////////////////////////////////
-// MultiplyMat
-
-GPU_PERF_TEST(MultiplyMat, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::Mat src1(size, type);
-    cv::Mat src2(size, type);
-
-    fill(src1, 0.0, 100.0);
-    fill(src2, 0.0, 100.0);
-
-    cv::Mat dst;
-
-    TEST_CYCLE()
-    {
-        cv::multiply(src1, src2, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, MultiplyMat, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1)));
-
-//////////////////////////////////////////////////////////////////////
-// MultiplyScalar
-
-GPU_PERF_TEST(MultiplyScalar, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::Mat src(size, type);
-
-    fill(src, 0.0, 100.0);
-
-    cv::Scalar s(1,2,3,4);
-    cv::Mat dst;
-
-    TEST_CYCLE()
-    {
-        cv::multiply(src, s, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, MultiplyScalar, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1)));
-
-
-//////////////////////////////////////////////////////////////////////
-// DivideMat
-
-GPU_PERF_TEST(DivideMat, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::Mat src1(size, type);
-    cv::Mat src2(size, type);
-
-    fill(src1, 0.0, 100.0);
-    fill(src2, 0.0, 100.0);
-
-    cv::Mat dst;
-
-    TEST_CYCLE()
-    {
-        cv::divide(src1, src2, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, DivideMat, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1)));
-
-//////////////////////////////////////////////////////////////////////
-// DivideScalar
-
-GPU_PERF_TEST(DivideScalar, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::Mat src(size, type);
-
-    fill(src, 0.0, 100.0);
-
-    cv::Scalar s(1,2,3,4);
-    cv::Mat dst;
-
-    TEST_CYCLE()
-    {
-        cv::divide(src, s, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, DivideScalar, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1)));
-
-
-//////////////////////////////////////////////////////////////////////
-// Exp
-
-GPU_PERF_TEST(Exp, cv::gpu::DeviceInfo, cv::Size)
-{
-    cv::Size size = GET_PARAM(1);
-
-    cv::Mat src(size, CV_32FC1);
-
-    fill(src, 0.0, 10.0);
-
-    cv::Mat dst;
-
-    TEST_CYCLE()
-    {
-        cv::exp(src, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, Exp, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES));
-
-//////////////////////////////////////////////////////////////////////
-// Pow
-
-GPU_PERF_TEST(Pow, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::Mat src(size, type);
-
-    declare.in(src, WARMUP_RNG);
-
-    cv::Mat dst;
-
-    TEST_CYCLE()
-    {
-        cv::pow(src, 0.5, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, Pow, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(perf::MatType(CV_32FC1))));
-
-//////////////////////////////////////////////////////////////////////
-// Log
-
-GPU_PERF_TEST(Log, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::Mat src(size, type);
-
-    declare.in(src, WARMUP_RNG);
-
-    cv::Mat dst;
-
-    TEST_CYCLE()
-    {
-        cv::log(src, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, Log, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(perf::MatType(CV_32FC1))));
-
-
-//////////////////////////////////////////////////////////////////////
-// Sqrt
-
-GPU_PERF_TEST(Sqrt, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::Mat src(size, type);
-
-    declare.in(src, WARMUP_RNG);
-
-    cv::Mat dst;
-
-    TEST_CYCLE()
-    {
-        cv::sqrt(src, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, Sqrt, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(perf::MatType(CV_32FC1))));
-
-
-
-//////////////////////////////////////////////////////////////////////
-// Compare
-
-GPU_PERF_TEST(Compare, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::Mat src1(size, type);
-    cv::Mat src2(size, type);
-
-    declare.in(src1, src2, WARMUP_RNG);
-
-    cv::Mat dst;
-
-    TEST_CYCLE()
-    {
-        cv::compare(src1, src2, dst, cv::CMP_EQ);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, Compare, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1)));
-
-
-//////////////////////////////////////////////////////////////////////
-// CompareScalar
-
-GPU_PERF_TEST(CompareScalar, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::Mat src1(size, type);
-    cv::Scalar src2 = cv::Scalar::all(123);
-
-    declare.in(src1, WARMUP_RNG);
-
-    cv::Mat dst;
-
-    TEST_CYCLE()
-    {
-        cv::compare(src1, src2, dst, cv::CMP_EQ);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, CompareScalar, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1)));
-
-
-
-//////////////////////////////////////////////////////////////////////
-// BitwiseNot
-
-GPU_PERF_TEST(BitwiseNot, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::Mat src(size, type);
-
-    declare.in(src, WARMUP_RNG);
-
-    cv::Mat dst;
-
-    TEST_CYCLE()
-    {
-        cv::bitwise_not(src, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, BitwiseNot, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32SC1)));
-
-//////////////////////////////////////////////////////////////////////
-// BitwiseAnd
-
-GPU_PERF_TEST(BitwiseAnd, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::Mat src1(size, type);
-    cv::Mat src2(size, type);
-
-    declare.in(src1, src2, WARMUP_RNG);
-
-    cv::Mat dst;
-
-    TEST_CYCLE()
-    {
-        cv::bitwise_and(src1, src2, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, BitwiseAnd, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32SC1)));
-
-//////////////////////////////////////////////////////////////////////
-// BitwiseScalarAnd
-
-GPU_PERF_TEST(BitwiseScalarAnd, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::Mat src(size, type);
-
-    declare.in(src, WARMUP_RNG);
-
-    cv::Mat dst;
-    cv::Scalar sc = cv::Scalar(123, 123, 123, 123);
-
-    TEST_CYCLE()
-    {
-        cv::bitwise_and(src, sc, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, BitwiseScalarAnd, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32SC1, CV_32SC3, CV_32SC4)));
-
-
-
-//////////////////////////////////////////////////////////////////////
-// BitwiseOr
-
-GPU_PERF_TEST(BitwiseOr, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::Mat src1(size, type);
-    cv::Mat src2(size, type);
-
-    declare.in(src1, src2, WARMUP_RNG);
-
-    cv::Mat dst;
-
-    TEST_CYCLE()
-    {
-        cv::bitwise_or(src1, src2, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, BitwiseOr, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32SC1)));
-
-//////////////////////////////////////////////////////////////////////
-// BitwiseScalarOr
-
-GPU_PERF_TEST(BitwiseScalarOr, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::Mat src(size, type);
-
-    declare.in(src, WARMUP_RNG);
-
-    cv::Mat dst;
-    cv::Scalar sc = cv::Scalar(123, 123, 123, 123);
-
-    TEST_CYCLE()
-    {
-        cv::bitwise_or(src, sc, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, BitwiseScalarOr, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32SC1, CV_32SC3, CV_32SC4)));
-
-
-//////////////////////////////////////////////////////////////////////
-// BitwiseXor
-
-GPU_PERF_TEST(BitwiseXor, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::Mat src1(size, type);
-    cv::Mat src2(size, type);
-
-    declare.in(src1, src2, WARMUP_RNG);
-
-    cv::Mat dst;
-
-    TEST_CYCLE()
-    {
-        cv::bitwise_xor(src1, src2, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, BitwiseXor, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32SC1)));
-
-//////////////////////////////////////////////////////////////////////
-// BitwiseScalarXor
-
-GPU_PERF_TEST(BitwiseScalarXor, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::Mat src(size, type);
-
-    declare.in(src, WARMUP_RNG);
-
-    cv::Mat dst;
-    cv::Scalar sc = cv::Scalar(123, 123, 123, 123);
-
-    TEST_CYCLE()
-    {
-        cv::bitwise_xor(src, sc, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, BitwiseScalarXor, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32SC1, CV_32SC3, CV_32SC4)));
-
-
-
-//////////////////////////////////////////////////////////////////////
-// Min
-
-GPU_PERF_TEST(Min, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::Mat src1(size, type);
-    cv::Mat src2(size, type);
-
-    declare.in(src1, src2, WARMUP_RNG);
-
-    cv::Mat dst(size, type);
-
-    TEST_CYCLE()
-    {
-        cv::min(src1, src2, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, Min, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32SC1)));
-
-//////////////////////////////////////////////////////////////////////
-// Max
-
-GPU_PERF_TEST(Max, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::Mat src1(size, type);
-    cv::Mat src2(size, type);
-
-    declare.in(src1, src2, WARMUP_RNG);
-
-    cv::Mat dst(size, type);
-
-    TEST_CYCLE()
-    {
-        cv::max(src1, src2, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, Max, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32SC1)));
-
-
-//////////////////////////////////////////////////////////////////////
-// Abs
-
-GPU_PERF_TEST(Abs, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::Mat src1(size, type);
-    
-    declare.in(src1, WARMUP_RNG);
-
-    cv::Mat dst(size, type);
-
-    TEST_CYCLE()
-    {
-        dst = cv::abs(src1);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, Abs, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_16S, CV_32F)));
-
-
-//////////////////////////////////////////////////////////////////////
-// AbsDiff
-
-GPU_PERF_TEST(AbsDiff, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::Mat src1(size, type);
-    cv::Mat src2(size, type);
-    
-    declare.in(src1, src2, WARMUP_RNG);
-
-    cv::Mat dst(size, type);
-
-    TEST_CYCLE()
-    {
-        cv::absdiff(src1, src2, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, AbsDiff, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32SC1)));
-
-
-//////////////////////////////////////////////////////////////////////
-// AbsDiffScalar
-
-GPU_PERF_TEST(AbsDiffScalar, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::Mat src1(size, type);
-    cv::Scalar src2 = cv::Scalar::all(123);
-    
-    declare.in(src1, WARMUP_RNG);
-
-    cv::Mat dst(size, type);
-
-    TEST_CYCLE()
-    {
-        cv::absdiff(src1, src2, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, AbsDiffScalar, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32SC1)));
-
-
-//////////////////////////////////////////////////////////////////////
-// Phase
-
-GPU_PERF_TEST(Phase, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::Mat src1(size, type);
-    cv::Mat src2(size, type);
-        
-    declare.in(src1, src2, WARMUP_RNG);
-
-    cv::Mat dst(size, type);
-
-    TEST_CYCLE()
-    {
-        cv::phase(src1, src2, dst, true);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, Phase, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values<perf::MatType>(CV_32FC1) ));
-
-
-//////////////////////////////////////////////////////////////////////
-// Magnitude
-
-GPU_PERF_TEST(Magnitude, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::Mat src1(size, type);
-    cv::Mat src2(size, type);
-        
-    declare.in(src1, src2, WARMUP_RNG);
-
-    cv::Mat dst(size, type);
-
-    TEST_CYCLE()
-    {
-        cv::magnitude(src1, src2, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, Magnitude, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values<perf::MatType>(CV_32FC1) ));
-
-
-
-//////////////////////////////////////////////////////////////////////
-// MeanStdDev
-
-GPU_PERF_TEST(MeanStdDev, cv::gpu::DeviceInfo, cv::Size)
-{
-    cv::Size size = GET_PARAM(1);
-
-    cv::Mat src(size, CV_8UC1);
-
-    declare.in(src, WARMUP_RNG);
-
-    cv::Scalar mean;
-    cv::Scalar stddev;
-
-    TEST_CYCLE()
-    {
-        cv::meanStdDev(src, mean, stddev);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, MeanStdDev, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES));
-
-//////////////////////////////////////////////////////////////////////
-// Norm
-
-GPU_PERF_TEST(Norm, cv::gpu::DeviceInfo, cv::Size, perf::MatType, NormType)
-{
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-    int normType = GET_PARAM(3);
-
-    cv::Mat src(size, type);
-
-    declare.in(src, WARMUP_RNG);
-
-    double dst;
-
-    TEST_CYCLE()
-    {
-        dst = cv::norm(src, normType);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, Norm, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32SC1),
-                        testing::Values((int) cv::NORM_INF, (int) cv::NORM_L1, (int) cv::NORM_L2)));
-
-//////////////////////////////////////////////////////////////////////
-// NormDiff
-
-GPU_PERF_TEST(NormDiff, cv::gpu::DeviceInfo, cv::Size, NormType)
-{
-    cv::Size size = GET_PARAM(1);
-    int normType = GET_PARAM(2);
-
-    cv::Mat src1(size, CV_8UC1);
-    cv::Mat src2(size, CV_8UC1);
-
-    declare.in(src1, src2, WARMUP_RNG);
-
-    double dst;
-
-    TEST_CYCLE()
-    {
-        dst = cv::norm(src1, src2, normType);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, NormDiff, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values((int) cv::NORM_INF, (int) cv::NORM_L1, (int) cv::NORM_L2)));
-
-//////////////////////////////////////////////////////////////////////
-// Sum
-
-GPU_PERF_TEST(Sum, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::Mat src(size, type);
-
-    declare.in(src, WARMUP_RNG);
-
-    cv::Scalar dst;
-
-    TEST_CYCLE()
-    {
-        dst = cv::sum(src);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, Sum, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1)));
-
-//////////////////////////////////////////////////////////////////////
-// MinMaxLoc
-
-GPU_PERF_TEST(MinMaxLoc, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::Mat src(size, type);
-
-    declare.in(src, WARMUP_RNG);
-
-    double minVal, maxVal;
-    cv::Point minLoc, maxLoc;
-
-    TEST_CYCLE()
-    {
-        cv::minMaxLoc(src, &minVal, &maxVal, &minLoc, &maxLoc);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, MinMaxLoc, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1)));
-
-//////////////////////////////////////////////////////////////////////
-// CountNonZero
-
-GPU_PERF_TEST(CountNonZero, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::Mat src(size, type);
-
-    fill(src, 0.0, 1.0);
-
-    int dst;
-
-    TEST_CYCLE()
-    {
-        dst = cv::countNonZero(src);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, CountNonZero, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1)));
-
-//////////////////////////////////////////////////////////////////////
-// AddWeighted
-
-GPU_PERF_TEST(AddWeighted, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    cv::Mat src1(size, type);
-    cv::Mat src2(size, type);
-
-    fill(src1, 0.0, 100.0);
-    fill(src2, 0.0, 100.0);
-
-    cv::Mat dst;
-
-    TEST_CYCLE()
-    {
-        cv::addWeighted(src1, 0.5, src2, 0.5, 0.0, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, AddWeighted, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1)));
-
-//////////////////////////////////////////////////////////////////////
-// Reduce
-
-GPU_PERF_TEST(Reduce, cv::gpu::DeviceInfo, cv::Size, perf::MatType, FlipCode)
-{
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-    int dim = GET_PARAM(3);
-
-    cv::Mat src(size, type);
-
-    fill(src, 0.0, 10.0);
-
-    cv::Mat dst;
-
-    TEST_CYCLE()
-    {
-        cv::reduce(src, dst, dim, CV_REDUCE_MIN);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, Reduce, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1),
-                        testing::Values((int) HORIZONTAL_AXIS, (int) VERTICAL_AXIS)));
-
-//////////////////////////////////////////////////////////////////////
-// GEMM
-
-GPU_PERF_TEST(GEMM, cv::gpu::DeviceInfo, cv::Size)
-{
-    cv::Size size = GET_PARAM(1);
-
-    cv::Mat src1(size, CV_32FC1);
-    cv::Mat src2(size, CV_32FC1);
-    cv::Mat src3(size, CV_32FC1);
-
-    fill(src1, 0.0, 10.0);
-    fill(src2, 0.0, 10.0);
-    fill(src3, 0.0, 10.0);
-
-    cv::Mat dst;
-
-    declare.time(15.0);
-
-    TEST_CYCLE()
-    {
-        cv::gemm(src1, src2, 1.0, src3, 1.0, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(Arithm, GEMM, testing::Combine(
-                        ALL_DEVICES,
-                        testing::Values(cv::Size(512, 512), cv::Size(1024, 1024))));
-
-#endif
diff --git a/modules/gpu/perf_cpu/perf_calib3d.cpp b/modules/gpu/perf_cpu/perf_calib3d.cpp
index c52a8d4608..8124b808af 100644
--- a/modules/gpu/perf_cpu/perf_calib3d.cpp
+++ b/modules/gpu/perf_cpu/perf_calib3d.cpp
@@ -2,71 +2,135 @@
 
 #ifdef HAVE_CUDA
 
+//////////////////////////////////////////////////////////////////////
+// StereoBM
+
+GPU_PERF_TEST_1(StereoBM, cv::gpu::DeviceInfo)
+{
+    cv::Mat img_l = readImage("gpu/perf/aloe.jpg", cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(img_l.empty());
+
+    cv::Mat img_r = readImage("gpu/perf/aloeR.jpg", cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(img_r.empty());
+
+    cv::StereoBM bm(0, 256);
+
+    cv::Mat dst;
+
+    bm(img_l, img_r, dst);
+
+    declare.time(5.0);
+
+    TEST_CYCLE()
+    {
+        bm(img_l, img_r, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Calib3D, StereoBM, ALL_DEVICES);
+
 //////////////////////////////////////////////////////////////////////
 // ProjectPoints
 
-GPU_PERF_TEST_1(ProjectPoints, cv::gpu::DeviceInfo)
+IMPLEMENT_PARAM_CLASS(Count, int)
+
+GPU_PERF_TEST(ProjectPoints, cv::gpu::DeviceInfo, Count)
 {
-    cv::Mat src(1, 10000, CV_32FC3);
+    int count = GET_PARAM(1);
 
-    declare.in(src, WARMUP_RNG);
+    cv::Mat src(1, count, CV_32FC3);
+    fill(src, -100, 100);
 
+    cv::Mat rvec = cv::Mat::ones(1, 3, CV_32FC1);
+    cv::Mat tvec = cv::Mat::ones(1, 3, CV_32FC1);
+    cv::Mat camera_mat = cv::Mat::ones(3, 3, CV_32FC1);
     cv::Mat dst;
 
+    cv::projectPoints(src, rvec, tvec, camera_mat, cv::noArray(), dst);
+
     TEST_CYCLE()
     {
-        cv::projectPoints(src, cv::Mat::ones(1, 3, CV_32FC1), cv::Mat::ones(1, 3, CV_32FC1), cv::Mat::ones(3, 3, CV_32FC1), cv::Mat(), dst);
+        cv::projectPoints(src, rvec, tvec, camera_mat, cv::noArray(), dst);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(Calib3D, ProjectPoints, ALL_DEVICES);
+INSTANTIATE_TEST_CASE_P(Calib3D, ProjectPoints, testing::Combine(
+    ALL_DEVICES,
+    testing::Values<Count>(5000, 10000, 20000)));
 
 //////////////////////////////////////////////////////////////////////
 // SolvePnPRansac
 
-GPU_PERF_TEST_1(SolvePnPRansac, cv::gpu::DeviceInfo)
+GPU_PERF_TEST(SolvePnPRansac, cv::gpu::DeviceInfo, Count)
 {
-    cv::Mat object(1, 10000, CV_32FC3);
-    cv::Mat image(1, 10000, CV_32FC2);
+    int count = GET_PARAM(1);
+
+    cv::Mat object(1, count, CV_32FC3);
+    fill(object, -100, 100);
 
-    declare.in(object, image, WARMUP_RNG);
+    cv::Mat camera_mat(3, 3, CV_32FC1);
+    fill(camera_mat, 0.5, 1);
+    camera_mat.at<float>(0, 1) = 0.f;
+    camera_mat.at<float>(1, 0) = 0.f;
+    camera_mat.at<float>(2, 0) = 0.f;
+    camera_mat.at<float>(2, 1) = 0.f;
 
-    cv::Mat rvec, tvec;
+    cv::Mat dist_coef(1, 8, CV_32F, cv::Scalar::all(0));
 
-    declare.time(3.0);
+    std::vector<cv::Point2f> image_vec;
+    cv::Mat rvec_gold(1, 3, CV_32FC1);
+    fill(rvec_gold, 0, 1);
+    cv::Mat tvec_gold(1, 3, CV_32FC1);
+    fill(tvec_gold, 0, 1);
+    cv::projectPoints(object, rvec_gold, tvec_gold, camera_mat, dist_coef, image_vec);
+
+    cv::Mat image(1, count, CV_32FC2, &image_vec[0]);
+
+    cv::Mat rvec;
+    cv::Mat tvec;
+
+    cv::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
+
+    declare.time(10.0);
 
     TEST_CYCLE()
     {
-        cv::solvePnPRansac(object, image, cv::Mat::ones(3, 3, CV_32FC1), cv::Mat(1, 8, CV_32F, cv::Scalar::all(0)), rvec, tvec);
+        cv::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(Calib3D, SolvePnPRansac, ALL_DEVICES);
+INSTANTIATE_TEST_CASE_P(Calib3D, SolvePnPRansac, testing::Combine(
+    ALL_DEVICES,
+    testing::Values<Count>(5000, 10000, 20000)));
 
 //////////////////////////////////////////////////////////////////////
-// StereoBM
+// ReprojectImageTo3D
 
-GPU_PERF_TEST_1(StereoBM, cv::gpu::DeviceInfo)
+GPU_PERF_TEST(ReprojectImageTo3D, cv::gpu::DeviceInfo, cv::Size, MatDepth)
 {
-    cv::Mat img_l = readImage("gpu/perf/aloe.jpg", cv::IMREAD_GRAYSCALE);
-    cv::Mat img_r = readImage("gpu/perf/aloeR.jpg", cv::IMREAD_GRAYSCALE);
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
 
-    ASSERT_FALSE(img_l.empty());
-    ASSERT_FALSE(img_r.empty());
+    cv::Mat src(size, depth);
+    fill(src, 5.0, 30.0);
 
-    cv::Mat dst;
+    cv::Mat Q(4, 4, CV_32FC1);
+    fill(Q, 0.1, 1.0);
 
-    cv::StereoBM bm(0, 256);
+    cv::Mat dst;
 
-    declare.time(5.0);
+    cv::reprojectImageTo3D(src, dst, Q);
 
     TEST_CYCLE()
     {
-        bm(img_l, img_r, dst);
+        cv::reprojectImageTo3D(src, dst, Q);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(Calib3D, StereoBM, ALL_DEVICES);
+INSTANTIATE_TEST_CASE_P(Calib3D, ReprojectImageTo3D, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16S)));
 
 #endif
 
diff --git a/modules/gpu/perf_cpu/perf_core.cpp b/modules/gpu/perf_cpu/perf_core.cpp
new file mode 100644
index 0000000000..fb87009584
--- /dev/null
+++ b/modules/gpu/perf_cpu/perf_core.cpp
@@ -0,0 +1,1388 @@
+#include "perf_cpu_precomp.hpp"
+
+#ifdef HAVE_CUDA
+
+//////////////////////////////////////////////////////////////////////
+// Merge
+
+GPU_PERF_TEST(Merge, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels)
+{
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+    int channels = GET_PARAM(3);
+
+    std::vector<cv::Mat> src(channels);
+    for (int i = 0; i < channels; ++i)
+        src[i] = cv::Mat(size, depth, cv::Scalar::all(i));
+
+    cv::Mat dst;
+
+    cv::merge(src, dst);
+
+    TEST_CYCLE()
+    {
+        cv::merge(src, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Merge, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32F, CV_64F),
+    testing::Values<Channels>(2, 3, 4)));
+
+//////////////////////////////////////////////////////////////////////
+// Split
+
+GPU_PERF_TEST(Split, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels)
+{
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+    int channels = GET_PARAM(3);
+
+    cv::Mat src(size, CV_MAKE_TYPE(depth, channels), cv::Scalar(1, 2, 3, 4));
+
+    std::vector<cv::Mat> dst;
+
+    cv::split(src, dst);
+
+    TEST_CYCLE()
+    {
+        cv::split(src, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Split, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32F, CV_64F),
+    testing::Values<Channels>(2, 3, 4)));
+
+//////////////////////////////////////////////////////////////////////
+// Add_Mat
+
+GPU_PERF_TEST(Add_Mat, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src1(size, depth);
+    fill(src1, 0.0, 100.0);
+
+    cv::Mat src2(size, depth);
+    fill(src2, 0.0, 100.0);
+
+    cv::Mat dst;
+
+    cv::add(src1, src2, dst);
+
+    TEST_CYCLE()
+    {
+        cv::add(src1, src2, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Add_Mat, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32F, CV_64F)));
+
+//////////////////////////////////////////////////////////////////////
+// Add_Scalar
+
+GPU_PERF_TEST(Add_Scalar, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src(size, depth);
+    fill(src, 0.0, 100.0);
+
+    cv::Scalar s(1, 2, 3, 4);
+    cv::Mat dst;
+
+    cv::add(src, s, dst);
+
+    TEST_CYCLE()
+    {
+        cv::add(src, s, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Add_Scalar, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32F, CV_64F)));
+
+//////////////////////////////////////////////////////////////////////
+// Subtract_Mat
+
+GPU_PERF_TEST(Subtract_Mat, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src1(size, depth);
+    fill(src1, 0.0, 100.0);
+
+    cv::Mat src2(size, depth);
+    fill(src2, 0.0, 100.0);
+
+    cv::Mat dst;
+
+    cv::subtract(src1, src2, dst);
+
+    TEST_CYCLE()
+    {
+        cv::subtract(src1, src2, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Subtract_Mat, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32F, CV_64F)));
+
+//////////////////////////////////////////////////////////////////////
+// Subtract_Scalar
+
+GPU_PERF_TEST(Subtract_Scalar, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src(size, depth);
+    fill(src, 0.0, 100.0);
+
+    cv::Scalar s(1, 2, 3, 4);
+    cv::Mat dst;
+
+    cv::subtract(src, s, dst);
+
+    TEST_CYCLE()
+    {
+        cv::subtract(src, s, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Subtract_Scalar, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32F, CV_64F)));
+
+//////////////////////////////////////////////////////////////////////
+// Multiply_Mat
+
+GPU_PERF_TEST(Multiply_Mat, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src1(size, depth);
+    fill(src1, 0.0, 100.0);
+
+    cv::Mat src2(size, depth);
+    fill(src2, 0.0, 100.0);
+
+    cv::Mat dst;
+
+    cv::multiply(src1, src2, dst);
+
+    TEST_CYCLE()
+    {
+        cv::multiply(src1, src2, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Multiply_Mat, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32F, CV_64F)));
+
+//////////////////////////////////////////////////////////////////////
+// Multiply_Scalar
+
+GPU_PERF_TEST(Multiply_Scalar, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src(size, depth);
+    fill(src, 0.0, 100.0);
+
+    cv::Scalar s(1, 2, 3, 4);
+    cv::Mat dst;
+
+    cv::multiply(src, s, dst);
+
+    TEST_CYCLE()
+    {
+        cv::multiply(src, s, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Multiply_Scalar, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32F, CV_64F)));
+
+//////////////////////////////////////////////////////////////////////
+// Divide_Mat
+
+GPU_PERF_TEST(Divide_Mat, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src1(size, depth);
+    fill(src1, 0.0, 100.0);
+
+    cv::Mat src2(size, depth);
+    fill(src2, 0.0, 100.0);
+
+    cv::Mat dst;
+
+    cv::divide(src1, src2, dst);
+
+    TEST_CYCLE()
+    {
+        cv::divide(src1, src2, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Divide_Mat, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32F, CV_64F)));
+
+//////////////////////////////////////////////////////////////////////
+// Divide_Scalar
+
+GPU_PERF_TEST(Divide_Scalar, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src(size, depth);
+    fill(src, 0.0, 100.0);
+
+    cv::Scalar s(1, 2, 3, 4);
+    cv::Mat dst;
+
+    cv::divide(src, s, dst);
+
+    TEST_CYCLE()
+    {
+        cv::divide(src, s, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Divide_Scalar, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32F, CV_64F)));
+
+//////////////////////////////////////////////////////////////////////
+// Divide_Scalar_Inv
+
+GPU_PERF_TEST(Divide_Scalar_Inv, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src(size, depth);
+    fill(src, 0.0, 100.0);
+
+    double scale = 100.0;
+    cv::Mat dst;
+
+    cv::divide(scale, src, dst);
+
+    TEST_CYCLE()
+    {
+        cv::divide(scale, src, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Divide_Scalar_Inv, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32F, CV_64F)));
+
+//////////////////////////////////////////////////////////////////////
+// AbsDiff_Mat
+
+GPU_PERF_TEST(AbsDiff_Mat, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src1(size, depth);
+    fill(src1, 0.0, 100.0);
+
+    cv::Mat src2(size, depth);
+    fill(src2, 0.0, 100.0);
+
+    cv::Mat dst;
+
+    cv::absdiff(src1, src2, dst);
+
+    TEST_CYCLE()
+    {
+        cv::absdiff(src1, src2, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, AbsDiff_Mat, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32F, CV_64F)));
+
+//////////////////////////////////////////////////////////////////////
+// AbsDiff_Scalar
+
+GPU_PERF_TEST(AbsDiff_Scalar, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src(size, depth);
+    fill(src, 0.0, 100.0);
+
+    cv::Scalar s(1, 2, 3, 4);
+    cv::Mat dst;
+
+    cv::absdiff(src, s, dst);
+
+    TEST_CYCLE()
+    {
+        cv::absdiff(src, s, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, AbsDiff_Scalar, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32F, CV_64F)));
+
+//////////////////////////////////////////////////////////////////////
+// Sqrt
+
+GPU_PERF_TEST(Sqrt, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src(size, depth);
+    fill(src, 0.0, 100.0);
+
+    cv::Mat dst;
+
+    cv::sqrt(src, dst);
+
+    TEST_CYCLE()
+    {
+        cv::sqrt(src, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Sqrt, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16S, CV_32F)));
+
+//////////////////////////////////////////////////////////////////////
+// Log
+
+GPU_PERF_TEST(Log, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src(size, depth);
+    fill(src, 1.0, 100.0);
+
+    cv::Mat dst;
+
+    cv::log(src, dst);
+
+    TEST_CYCLE()
+    {
+        cv::log(src, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Log, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16S, CV_32F)));
+
+//////////////////////////////////////////////////////////////////////
+// Exp
+
+GPU_PERF_TEST(Exp, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src(size, depth);
+    fill(src, 1.0, 10.0);
+
+    cv::Mat dst;
+
+    cv::exp(src, dst);
+
+    TEST_CYCLE()
+    {
+        cv::exp(src, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Exp, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16S, CV_32F)));
+
+//////////////////////////////////////////////////////////////////////
+// Pow
+
+GPU_PERF_TEST(Pow, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src(size, depth);
+    fill(src, 1.0, 10.0);
+
+    cv::Mat dst;
+
+    cv::pow(src, 2.3, dst);
+
+    TEST_CYCLE()
+    {
+        cv::pow(src, 2.3, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Pow, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16S, CV_32F)));
+
+//////////////////////////////////////////////////////////////////////
+// Compare_Mat
+
+CV_ENUM(CmpCode, cv::CMP_EQ, cv::CMP_GT, cv::CMP_GE, cv::CMP_LT, cv::CMP_LE, cv::CMP_NE)
+#define ALL_CMP_CODES testing::Values(CmpCode(cv::CMP_EQ), CmpCode(cv::CMP_NE), CmpCode(cv::CMP_GT), CmpCode(cv::CMP_GE), CmpCode(cv::CMP_LT), CmpCode(cv::CMP_LE))
+
+GPU_PERF_TEST(Compare_Mat, cv::gpu::DeviceInfo, cv::Size, MatDepth, CmpCode)
+{
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+    int cmp_code = GET_PARAM(3);
+
+    cv::Mat src1(size, depth);
+    fill(src1, 0, 100.0);
+
+    cv::Mat src2(size, depth);
+    fill(src2, 0, 100.0);
+
+    cv::Mat dst;
+
+    cv::compare(src1, src2, dst, cmp_code);
+
+    TEST_CYCLE()
+    {
+        cv::compare(src1, src2, dst, cmp_code);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Compare_Mat, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32F, CV_64F),
+    ALL_CMP_CODES));
+
+//////////////////////////////////////////////////////////////////////
+// Compare_Scalar
+
+GPU_PERF_TEST(Compare_Scalar, cv::gpu::DeviceInfo, cv::Size, MatDepth, CmpCode)
+{
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+    int cmp_code = GET_PARAM(3);
+
+    cv::Mat src(size, depth);
+    fill(src, 0, 100.0);
+
+    cv::Scalar s = cv::Scalar::all(50);
+    cv::Mat dst;
+
+    cv::compare(src, s, dst, cmp_code);
+
+    TEST_CYCLE()
+    {
+        cv::compare(src, s, dst, cmp_code);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Compare_Scalar, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32F, CV_64F),
+    ALL_CMP_CODES));
+
+//////////////////////////////////////////////////////////////////////
+// Bitwise_Not
+
+GPU_PERF_TEST(Bitwise_Not, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src(size, depth);
+    fill(src, 0, 100.0);
+
+    cv::Mat dst;
+
+    cv::bitwise_not(src, dst);
+
+    TEST_CYCLE()
+    {
+        cv::bitwise_not(src, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Bitwise_Not, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32S)));
+
+//////////////////////////////////////////////////////////////////////
+// Bitwise_And_Mat
+
+GPU_PERF_TEST(Bitwise_And_Mat, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src1(size, depth);
+    fill(src1, 0, 100.0);
+
+    cv::Mat src2(size, depth);
+    fill(src2, 0, 100.0);
+
+    cv::Mat dst;
+
+    cv::bitwise_and(src1, src2, dst);
+
+    TEST_CYCLE()
+    {
+        cv::bitwise_and(src1, src2, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Bitwise_And_Mat, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32S)));
+
+//////////////////////////////////////////////////////////////////////
+// Bitwise_And_Scalar
+
+GPU_PERF_TEST(Bitwise_And_Scalar, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels)
+{
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+    int channels = GET_PARAM(3);
+
+    int type = CV_MAKE_TYPE(depth, channels);
+
+    cv::Mat src(size, type);
+    fill(src, 0, 100.0);
+
+    cv::Scalar s = cv::Scalar(50, 50, 50, 50);
+    cv::Mat dst;
+
+    cv::bitwise_and(src, s, dst);
+
+    TEST_CYCLE()
+    {
+        cv::bitwise_and(src, s, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Bitwise_And_Scalar, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32S),
+    testing::Values<Channels>(1, 3, 4)));
+
+//////////////////////////////////////////////////////////////////////
+// Bitwise_Or_Mat
+
+GPU_PERF_TEST(Bitwise_Or_Mat, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src1(size, depth);
+    fill(src1, 0, 100.0);
+
+    cv::Mat src2(size, depth);
+    fill(src2, 0, 100.0);
+
+    cv::Mat dst;
+
+    cv::bitwise_or(src1, src2, dst);
+
+    TEST_CYCLE()
+    {
+        cv::bitwise_or(src1, src2, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Bitwise_Or_Mat, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32S)));
+
+//////////////////////////////////////////////////////////////////////
+// Bitwise_Or_Scalar
+
+GPU_PERF_TEST(Bitwise_Or_Scalar, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels)
+{
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+    int channels = GET_PARAM(3);
+
+    int type = CV_MAKE_TYPE(depth, channels);
+
+    cv::Mat src(size, type);
+    fill(src, 0, 100.0);
+
+    cv::Scalar s = cv::Scalar(50, 50, 50, 50);
+    cv::Mat dst;
+
+    cv::bitwise_or(src, s, dst);
+
+    TEST_CYCLE()
+    {
+        cv::bitwise_or(src, s, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Bitwise_Or_Scalar, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32S),
+    testing::Values<Channels>(1, 3, 4)));
+
+//////////////////////////////////////////////////////////////////////
+// Bitwise_Xor_Mat
+
+GPU_PERF_TEST(Bitwise_Xor_Mat, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src1(size, depth);
+    fill(src1, 0, 100.0);
+
+    cv::Mat src2(size, depth);
+    fill(src2, 0, 100.0);
+
+    cv::Mat dst;
+
+    cv::bitwise_xor(src1, src2, dst);
+
+    TEST_CYCLE()
+    {
+        cv::bitwise_xor(src1, src2, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Bitwise_Xor_Mat, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32S)));
+
+//////////////////////////////////////////////////////////////////////
+// Bitwise_Xor_Scalar
+
+GPU_PERF_TEST(Bitwise_Xor_Scalar, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels)
+{
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+    int channels = GET_PARAM(3);
+
+    int type = CV_MAKE_TYPE(depth, channels);
+
+    cv::Mat src(size, type);
+    fill(src, 0, 100.0);
+
+    cv::Scalar s = cv::Scalar(50, 50, 50, 50);
+    cv::Mat dst;
+
+    cv::bitwise_xor(src, s, dst);
+
+    TEST_CYCLE()
+    {
+        cv::bitwise_xor(src, s, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Bitwise_Xor_Scalar, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32S),
+    testing::Values<Channels>(1, 3, 4)));
+
+//////////////////////////////////////////////////////////////////////
+// Min_Mat
+
+GPU_PERF_TEST(Min_Mat, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src1(size, depth);
+    fill(src1, 0, 255.0);
+
+    cv::Mat src2(size, depth);
+    fill(src2, 0, 255.0);
+
+    cv::Mat dst;
+
+    cv::min(src1, src2, dst);
+
+    TEST_CYCLE()
+    {
+        cv::min(src1, src2, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Min_Mat, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32F)));
+
+//////////////////////////////////////////////////////////////////////
+// Min_Scalar
+
+GPU_PERF_TEST(Min_Scalar, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src(size, depth);
+    fill(src, 0, 255.0);
+
+    double val = 50.0;
+    cv::Mat dst;
+
+    cv::min(src, val, dst);
+
+    TEST_CYCLE()
+    {
+        cv::min(src, val, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Min_Scalar, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32F)));
+
+//////////////////////////////////////////////////////////////////////
+// Max_Mat
+
+GPU_PERF_TEST(Max_Mat, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src1(size, depth);
+    fill(src1, 0, 255.0);
+
+    cv::Mat src2(size, depth);
+    fill(src2, 0, 255.0);
+
+    cv::Mat dst;
+
+    cv::max(src1, src2, dst);
+
+    TEST_CYCLE()
+    {
+        cv::max(src1, src2, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Max_Mat, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32F)));
+
+//////////////////////////////////////////////////////////////////////
+// Max_Scalar
+
+GPU_PERF_TEST(Max_Scalar, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src(size, depth);
+    fill(src, 0, 255.0);
+
+    double val = 50.0;
+    cv::Mat dst;
+
+    cv::max(src, val, dst);
+
+    TEST_CYCLE()
+    {
+        cv::max(src, val, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Max_Scalar, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32F)));
+
+//////////////////////////////////////////////////////////////////////
+// AddWeighted
+
+GPU_PERF_TEST(AddWeighted, cv::gpu::DeviceInfo, cv::Size, MatDepth, MatDepth, MatDepth)
+{
+    cv::Size size = GET_PARAM(1);
+    int depth1 = GET_PARAM(2);
+    int depth2 = GET_PARAM(3);
+    int dst_depth = GET_PARAM(4);
+
+    cv::Mat src1(size, depth1);
+    fill(src1, 0, 100.0);
+
+    cv::Mat src2(size, depth2);
+    fill(src2, 0, 100.0);
+
+    cv::Mat dst;
+
+    cv::addWeighted(src1, 0.5, src2, 0.5, 10.0, dst, dst_depth);
+
+    TEST_CYCLE()
+    {
+        cv::addWeighted(src1, 0.5, src2, 0.5, 10.0, dst, dst_depth);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, AddWeighted, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32F, CV_64F),
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32F, CV_64F),
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32F, CV_64F)));
+
+//////////////////////////////////////////////////////////////////////
+// GEMM
+
+CV_FLAGS(GemmFlags, 0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_3_T)
+#define ALL_GEMM_FLAGS testing::Values(GemmFlags(0), GemmFlags(cv::GEMM_1_T), GemmFlags(cv::GEMM_2_T), GemmFlags(cv::GEMM_3_T), GemmFlags(cv::GEMM_1_T | cv::GEMM_2_T), GemmFlags(cv::GEMM_1_T | cv::GEMM_3_T), GemmFlags(cv::GEMM_1_T | cv::GEMM_2_T | cv::GEMM_3_T))
+
+GPU_PERF_TEST(GEMM, cv::gpu::DeviceInfo, cv::Size, MatType, GemmFlags)
+{
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+    int flags = GET_PARAM(3);
+
+    cv::Mat src1(size, type);
+    fill(src1, 0.0, 10.0);
+
+    cv::Mat src2(size, type);
+    fill(src2, 0.0, 10.0);
+
+    cv::Mat src3(size, type);
+    fill(src3, 0.0, 10.0);
+
+    cv::Mat dst;
+
+    cv::gemm(src1, src2, 1.0, src3, 1.0, dst, flags);
+
+    declare.time(50.0);
+
+    TEST_CYCLE()
+    {
+        cv::gemm(src1, src2, 1.0, src3, 1.0, dst, flags);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, GEMM, testing::Combine(
+    ALL_DEVICES,
+    testing::Values(cv::Size(512, 512), cv::Size(1024, 1024)),
+    testing::Values<MatType>(CV_32FC1, CV_32FC2, CV_64FC1, CV_64FC2),
+    ALL_GEMM_FLAGS));
+
+//////////////////////////////////////////////////////////////////////
+// Transpose
+
+GPU_PERF_TEST(Transpose, cv::gpu::DeviceInfo, cv::Size, MatType)
+{
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+
+    cv::Mat src(size, type);
+    fill(src, 0.0, 100.0);
+
+    cv::Mat dst;
+
+    cv::transpose(src, dst);
+
+    TEST_CYCLE()
+    {
+        cv::transpose(src, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Transpose, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatType>(CV_8UC1, CV_8UC4, CV_16UC2, CV_16SC2, CV_32SC1, CV_32SC2, CV_64FC1)));
+
+//////////////////////////////////////////////////////////////////////
+// Flip
+
+enum {FLIP_BOTH = 0, FLIP_X = 1, FLIP_Y = -1};
+CV_ENUM(FlipCode, FLIP_BOTH, FLIP_X, FLIP_Y)
+#define ALL_FLIP_CODES testing::Values(FlipCode(FLIP_BOTH), FlipCode(FLIP_X), FlipCode(FLIP_Y))
+
+GPU_PERF_TEST(Flip, cv::gpu::DeviceInfo, cv::Size, MatType, FlipCode)
+{
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+    int flipCode = GET_PARAM(3);
+
+    cv::Mat src(size, type);
+    fill(src, 0.0, 100.0);
+
+    cv::Mat dst;
+
+    cv::flip(src, dst, flipCode);
+
+    TEST_CYCLE()
+    {
+        cv::flip(src, dst, flipCode);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Flip, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatType>(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32FC1, CV_32FC3, CV_32FC4),
+    ALL_FLIP_CODES));
+
+//////////////////////////////////////////////////////////////////////
+// LUT_OneChannel
+
+GPU_PERF_TEST(LUT_OneChannel, cv::gpu::DeviceInfo, cv::Size, MatType)
+{
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+
+    cv::Mat src(size, type);
+    fill(src, 0.0, 100.0);
+
+    cv::Mat lut(1, 256, CV_8UC1);
+    fill(lut, 0.0, 100.0);
+
+    cv::Mat dst;
+
+    cv::LUT(src, lut, dst);
+
+    TEST_CYCLE()
+    {
+        cv::LUT(src, lut, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, LUT_OneChannel, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatType>(CV_8UC1, CV_8UC3)));
+
+//////////////////////////////////////////////////////////////////////
+// LUT_MultiChannel
+
+GPU_PERF_TEST(LUT_MultiChannel, cv::gpu::DeviceInfo, cv::Size, MatType)
+{
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+
+    cv::Mat src(size, type);
+    fill(src, 0.0, 100.0);
+
+    cv::Mat lut(1, 256, CV_MAKE_TYPE(CV_8U, src.channels()));
+    fill(lut, 0.0, 100.0);
+
+    cv::Mat dst;
+
+    cv::LUT(src, lut, dst);
+
+    TEST_CYCLE()
+    {
+        cv::LUT(src, lut, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, LUT_MultiChannel, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatType>(CV_8UC3)));
+
+//////////////////////////////////////////////////////////////////////
+// Magnitude_Complex
+
+GPU_PERF_TEST(Magnitude_Complex, cv::gpu::DeviceInfo, cv::Size)
+{
+    cv::Size size = GET_PARAM(1);
+
+    cv::Mat src(size, CV_32FC2);
+    fill(src, -100.0, 100.0);
+
+    cv::Mat srcs[2];
+    cv::split(src, srcs);
+
+    cv::Mat dst;
+
+    cv::magnitude(srcs[0], srcs[1], dst);
+
+    TEST_CYCLE()
+    {
+        cv::magnitude(srcs[0], srcs[1], dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Magnitude_Complex, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES));
+
+//////////////////////////////////////////////////////////////////////
+// Magnitude
+
+GPU_PERF_TEST(Magnitude, cv::gpu::DeviceInfo, cv::Size)
+{
+    cv::Size size = GET_PARAM(1);
+
+    cv::Mat src1(size, CV_32FC1);
+    fill(src1, -100.0, 100.0);
+
+    cv::Mat src2(size, CV_32FC1);
+    fill(src2, -100.0, 100.0);
+
+    cv::Mat dst;
+
+    cv::magnitude(src1, src2, dst);
+
+    TEST_CYCLE()
+    {
+        cv::magnitude(src1, src2, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Magnitude, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES));
+
+//////////////////////////////////////////////////////////////////////
+// Phase
+
+IMPLEMENT_PARAM_CLASS(AngleInDegrees, bool)
+
+GPU_PERF_TEST(Phase, cv::gpu::DeviceInfo, cv::Size, AngleInDegrees)
+{
+    cv::Size size = GET_PARAM(1);
+    bool angleInDegrees = GET_PARAM(2);
+
+    cv::Mat src1(size, CV_32FC1);
+    fill(src1, -100.0, 100.0);
+
+    cv::Mat src2(size, CV_32FC1);
+    fill(src2, -100.0, 100.0);
+
+    cv::Mat dst;
+
+    cv::phase(src1, src2, dst, angleInDegrees);
+
+    TEST_CYCLE()
+    {
+        cv::phase(src1, src2, dst, angleInDegrees);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Phase, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<AngleInDegrees>(false, true)));
+
+//////////////////////////////////////////////////////////////////////
+// CartToPolar
+
+GPU_PERF_TEST(CartToPolar, cv::gpu::DeviceInfo, cv::Size, AngleInDegrees)
+{
+    cv::Size size = GET_PARAM(1);
+    bool angleInDegrees = GET_PARAM(2);
+
+    cv::Mat src1(size, CV_32FC1);
+    fill(src1, -100.0, 100.0);
+
+    cv::Mat src2(size, CV_32FC1);
+    fill(src2, -100.0, 100.0);
+
+    cv::Mat magnitude;
+    cv::Mat angle;
+
+    cv::cartToPolar(src1, src2, magnitude, angle, angleInDegrees);
+
+    TEST_CYCLE()
+    {
+        cv::cartToPolar(src1, src2, magnitude, angle, angleInDegrees);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, CartToPolar, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<AngleInDegrees>(false, true)));
+
+//////////////////////////////////////////////////////////////////////
+// PolarToCart
+
+GPU_PERF_TEST(PolarToCart, cv::gpu::DeviceInfo, cv::Size, AngleInDegrees)
+{
+    cv::Size size = GET_PARAM(1);
+    bool angleInDegrees = GET_PARAM(2);
+
+    cv::Mat magnitude(size, CV_32FC1);
+    fill(magnitude, 0.0, 100.0);
+
+    cv::Mat angle(size, CV_32FC1);
+    fill(angle, 0.0, angleInDegrees ? 360.0 : 2 * CV_PI);
+
+    cv::Mat x;
+    cv::Mat y;
+
+    cv::polarToCart(magnitude, angle, x, y, angleInDegrees);
+
+    TEST_CYCLE()
+    {
+        cv::polarToCart(magnitude, angle, x, y, angleInDegrees);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, PolarToCart, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<AngleInDegrees>(false, true)));
+
+//////////////////////////////////////////////////////////////////////
+// MeanStdDev
+
+GPU_PERF_TEST(MeanStdDev, cv::gpu::DeviceInfo, cv::Size)
+{
+    cv::Size size = GET_PARAM(1);
+
+    cv::Mat src(size, CV_8UC1);
+    fill(src, 0.0, 255.0);
+
+    cv::Scalar mean;
+    cv::Scalar stddev;
+
+    cv::meanStdDev(src, mean, stddev);
+
+    TEST_CYCLE()
+    {
+        cv::meanStdDev(src, mean, stddev);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, MeanStdDev, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES));
+
+//////////////////////////////////////////////////////////////////////
+// Norm
+
+GPU_PERF_TEST(Norm, cv::gpu::DeviceInfo, cv::Size, MatDepth, NormType)
+{
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+    int normType = GET_PARAM(3);
+
+    cv::Mat src(size, depth);
+    fill(src, 0.0, 255.0);
+
+    double dst;
+    cv::Mat buf;
+
+    dst = cv::norm(src, normType);
+
+    TEST_CYCLE()
+    {
+        dst = cv::norm(src, normType);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Norm, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32S, CV_32F),
+    testing::Values(NormType(cv::NORM_INF), NormType(cv::NORM_L1), NormType(cv::NORM_L2))));
+
+//////////////////////////////////////////////////////////////////////
+// NormDiff
+
+GPU_PERF_TEST(NormDiff, cv::gpu::DeviceInfo, cv::Size, NormType)
+{
+    cv::Size size = GET_PARAM(1);
+    int normType = GET_PARAM(2);
+
+    cv::Mat src1(size, CV_8UC1);
+    fill(src1, 0.0, 255.0);
+
+    cv::Mat src2(size, CV_8UC1);
+    fill(src2, 0.0, 255.0);
+
+    double dst;
+
+    dst = cv::norm(src1, src2, normType);
+
+    TEST_CYCLE()
+    {
+        dst = cv::norm(src1, src2, normType);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, NormDiff, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(NormType(cv::NORM_INF), NormType(cv::NORM_L1), NormType(cv::NORM_L2))));
+
+//////////////////////////////////////////////////////////////////////
+// Sum
+
+GPU_PERF_TEST(Sum, cv::gpu::DeviceInfo, cv::Size, MatType)
+{
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+
+    cv::Mat src(size, type);
+    fill(src, 0.0, 255.0);
+
+    cv::Scalar dst;
+
+    dst = cv::sum(src);
+
+    TEST_CYCLE()
+    {
+        dst = cv::sum(src);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Sum, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatType>(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32FC1, CV_32FC3, CV_32FC4)));
+
+//////////////////////////////////////////////////////////////////////
+// MinMaxLoc
+
+GPU_PERF_TEST(MinMaxLoc, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src(size, depth);
+    fill(src, 0.0, 255.0);
+
+    double minVal, maxVal;
+    cv::Point minLoc, maxLoc;
+
+    cv::minMaxLoc(src, &minVal, &maxVal, &minLoc, &maxLoc);
+
+    TEST_CYCLE()
+    {
+        cv::minMaxLoc(src, &minVal, &maxVal, &minLoc, &maxLoc);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, MinMaxLoc, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32F, CV_64F)));
+
+//////////////////////////////////////////////////////////////////////
+// CountNonZero
+
+GPU_PERF_TEST(CountNonZero, cv::gpu::DeviceInfo, cv::Size, MatDepth)
+{
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+
+    cv::Mat src(size, depth);
+    fill(src, 0.0, 1.5);
+
+    int dst;
+
+    dst = cv::countNonZero(src);
+
+    TEST_CYCLE()
+    {
+        dst = cv::countNonZero(src);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, CountNonZero, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_32F, CV_64F)));
+
+//////////////////////////////////////////////////////////////////////
+// Reduce
+
+CV_ENUM(ReduceCode, CV_REDUCE_SUM, CV_REDUCE_AVG, CV_REDUCE_MAX, CV_REDUCE_MIN)
+#define ALL_REDUCE_CODES testing::Values<ReduceCode>(CV_REDUCE_SUM, CV_REDUCE_AVG, CV_REDUCE_MAX, CV_REDUCE_MIN)
+
+enum {Rows = 0, Cols = 1};
+CV_ENUM(ReduceDim, Rows, Cols)
+
+GPU_PERF_TEST(Reduce, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels, ReduceCode, ReduceDim)
+{
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+    int channels = GET_PARAM(3);
+    int reduceOp = GET_PARAM(4);
+    int dim = GET_PARAM(5);
+
+    int type = CV_MAKE_TYPE(depth, channels);
+
+    cv::Mat src(size, type);
+    fill(src, 0.0, 10.0);
+
+    cv::Mat dst;
+
+    cv::reduce(src, dst, dim, reduceOp);
+
+    TEST_CYCLE()
+    {
+        cv::reduce(src, dst, dim, reduceOp);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Core, Reduce, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values<MatDepth>(CV_8U, CV_16U, CV_16S, CV_32F),
+    testing::Values<Channels>(1, 2, 3, 4),
+    ALL_REDUCE_CODES,
+    testing::Values(ReduceDim(Rows), ReduceDim(Cols))));
+
+#endif
diff --git a/modules/gpu/perf_cpu/perf_features2d.cpp b/modules/gpu/perf_cpu/perf_features2d.cpp
index 20f544ada4..13c6034f48 100644
--- a/modules/gpu/perf_cpu/perf_features2d.cpp
+++ b/modules/gpu/perf_cpu/perf_features2d.cpp
@@ -3,152 +3,182 @@
 #ifdef HAVE_CUDA
 
 //////////////////////////////////////////////////////////////////////
-// BruteForceMatcher_match
+// SURF
 
-GPU_PERF_TEST(BruteForceMatcher_match, cv::gpu::DeviceInfo, int)
+GPU_PERF_TEST_1(SURF, cv::gpu::DeviceInfo)
 {
-    int desc_size = GET_PARAM(1);
+    cv::Mat img = readImage("gpu/perf/aloe.jpg", cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(img.empty());
 
-    cv::Mat query(3000, desc_size, CV_32FC1);
-    cv::Mat train(3000, desc_size, CV_32FC1);
+    cv::SURF surf;
 
-    declare.in(query, train, WARMUP_RNG);
+    std::vector<cv::KeyPoint> keypoints;
+    cv::Mat descriptors;
 
-    cv::BFMatcher matcher(cv::NORM_L2);
-    std::vector<cv::DMatch> matches;
+    surf(img, cv::noArray(), keypoints, descriptors);
 
-    declare.time(10.0);
+    declare.time(50.0);
 
     TEST_CYCLE()
     {
-        matcher.match(query, train, matches);
+        surf(img, cv::noArray(), keypoints, descriptors);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(Features2D, BruteForceMatcher_match, testing::Combine(
-                        ALL_DEVICES,
-                        testing::Values(64, 128, 256)));
+INSTANTIATE_TEST_CASE_P(Features2D, SURF, ALL_DEVICES);
 
 //////////////////////////////////////////////////////////////////////
-// BruteForceMatcher_knnMatch
+// FAST
 
-GPU_PERF_TEST(BruteForceMatcher_knnMatch, cv::gpu::DeviceInfo, int, int)
+GPU_PERF_TEST_1(FAST, cv::gpu::DeviceInfo)
 {
-    int desc_size = GET_PARAM(1);
-    int k = GET_PARAM(2);
-
-    cv::Mat query(3000, desc_size, CV_32FC1);
-    cv::Mat train(3000, desc_size, CV_32FC1);
-
-    declare.in(query, train, WARMUP_RNG);
+    cv::Mat img = readImage("gpu/perf/aloe.jpg", cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(img.empty());
 
-    cv::BFMatcher matcher(cv::NORM_L2);
-    std::vector< std::vector<cv::DMatch> > matches;
+    std::vector<cv::KeyPoint> keypoints;
 
-    declare.time(10.0);
+    cv::FAST(img, keypoints, 20);
 
     TEST_CYCLE()
     {
-        matcher.knnMatch(query, train, matches, k);
+        cv::FAST(img, keypoints, 20);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(Features2D, BruteForceMatcher_knnMatch, testing::Combine(
-                        ALL_DEVICES,
-                        testing::Values(64, 128, 256),
-                        testing::Values(2, 3)));
+INSTANTIATE_TEST_CASE_P(Features2D, FAST, ALL_DEVICES);
 
 //////////////////////////////////////////////////////////////////////
-// BruteForceMatcher_radiusMatch
+// ORB
 
-GPU_PERF_TEST(BruteForceMatcher_radiusMatch, cv::gpu::DeviceInfo, int)
+GPU_PERF_TEST_1(ORB, cv::gpu::DeviceInfo)
 {
-    int desc_size = GET_PARAM(1);
-
-    cv::Mat query(3000, desc_size, CV_32FC1);
-    cv::Mat train(3000, desc_size, CV_32FC1);
+    cv::Mat img = readImage("gpu/perf/aloe.jpg", cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(img.empty());
 
-    fill(query, 0, 1);
-    fill(train, 0, 1);
+    cv::ORB orb(4000);
 
-    cv::BFMatcher matcher(cv::NORM_L2);
-    std::vector< std::vector<cv::DMatch> > matches;
+    std::vector<cv::KeyPoint> keypoints;
+    cv::Mat descriptors;
 
-    declare.time(10.0);
+    orb(img, cv::noArray(), keypoints, descriptors);
 
     TEST_CYCLE()
     {
-        matcher.radiusMatch(query, train, matches, 2.0);
+        orb(img, cv::noArray(), keypoints, descriptors);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(Features2D, BruteForceMatcher_radiusMatch, testing::Combine(
-                        ALL_DEVICES,
-                        testing::Values(64, 128, 256)));
+INSTANTIATE_TEST_CASE_P(Features2D, ORB, ALL_DEVICES);
 
 //////////////////////////////////////////////////////////////////////
-// SURF
+// BruteForceMatcher_match
 
-GPU_PERF_TEST_1(SURF, cv::gpu::DeviceInfo)
+IMPLEMENT_PARAM_CLASS(DescriptorSize, int)
+
+GPU_PERF_TEST(BruteForceMatcher_match, cv::gpu::DeviceInfo, DescriptorSize, NormType)
 {
-    cv::Mat img = readImage("gpu/perf/aloe.jpg", cv::IMREAD_GRAYSCALE);
+    int desc_size = GET_PARAM(1);
+    int normType = GET_PARAM(2);
 
-    ASSERT_FALSE(img.empty());
+    int type = normType == cv::NORM_HAMMING ? CV_8U : CV_32F;
 
-    std::vector<cv::KeyPoint> keypoints;
-    cv::Mat descriptors;
+    cv::Mat query(3000, desc_size, type);
+    fill(query, 0.0, 10.0);
 
-    cv::SURF surf;
+    cv::Mat train(3000, desc_size, type);
+    fill(train, 0.0, 10.0);
 
-    declare.time(30.0);
+    cv::BFMatcher matcher(normType);
+
+    std::vector<cv::DMatch> matches;
+
+    matcher.match(query, train, matches);
+
+    declare.time(20.0);
 
     TEST_CYCLE()
     {
-        surf(img, cv::noArray(), keypoints, descriptors);
+        matcher.match(query, train, matches);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(Features2D, SURF, DEVICES(cv::gpu::GLOBAL_ATOMICS));
+INSTANTIATE_TEST_CASE_P(Features2D, BruteForceMatcher_match, testing::Combine(
+    ALL_DEVICES,
+    testing::Values(DescriptorSize(64), DescriptorSize(128), DescriptorSize(256)),
+    testing::Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING))));
 
 //////////////////////////////////////////////////////////////////////
-// FAST
+// BruteForceMatcher_knnMatch
 
-GPU_PERF_TEST_1(FAST, cv::gpu::DeviceInfo)
+IMPLEMENT_PARAM_CLASS(K, int)
+
+GPU_PERF_TEST(BruteForceMatcher_knnMatch, cv::gpu::DeviceInfo, DescriptorSize, K, NormType)
 {
-    cv::Mat img = readImage("gpu/perf/aloe.jpg", cv::IMREAD_GRAYSCALE);
+    int desc_size = GET_PARAM(1);
+    int k = GET_PARAM(2);
+    int normType = GET_PARAM(3);
 
-    ASSERT_FALSE(img.empty());
+    int type = normType == cv::NORM_HAMMING ? CV_8U : CV_32F;
 
-    std::vector<cv::KeyPoint> keypoints;
+    cv::Mat query(3000, desc_size, type);
+    fill(query, 0.0, 10.0);
+
+    cv::Mat train(3000, desc_size, type);
+    fill(train, 0.0, 10.0);
+
+    cv::BFMatcher matcher(normType);
+
+    std::vector< std::vector<cv::DMatch> > matches;
+
+    matcher.knnMatch(query, train, matches, k);
+
+    declare.time(30.0);
 
     TEST_CYCLE()
     {
-        cv::FAST(img, keypoints, 20);
+        matcher.knnMatch(query, train, matches, k);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(Features2D, FAST, DEVICES(cv::gpu::GLOBAL_ATOMICS));
+INSTANTIATE_TEST_CASE_P(Features2D, BruteForceMatcher_knnMatch, testing::Combine(
+    ALL_DEVICES,
+    testing::Values(DescriptorSize(64), DescriptorSize(128), DescriptorSize(256)),
+    testing::Values(K(2), K(3)),
+    testing::Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING))));
 
 //////////////////////////////////////////////////////////////////////
-// ORB
+// BruteForceMatcher_radiusMatch
 
-GPU_PERF_TEST_1(ORB, cv::gpu::DeviceInfo)
+GPU_PERF_TEST(BruteForceMatcher_radiusMatch, cv::gpu::DeviceInfo, DescriptorSize, NormType)
 {
-    cv::Mat img = readImage("gpu/perf/aloe.jpg", cv::IMREAD_GRAYSCALE);
+    int desc_size = GET_PARAM(1);
+    int normType = GET_PARAM(2);
 
-    ASSERT_FALSE(img.empty());
+    int type = normType == cv::NORM_HAMMING ? CV_8U : CV_32F;
 
-    std::vector<cv::KeyPoint> keypoints;
-    cv::Mat descriptors;
+    cv::Mat query(3000, desc_size, type);
+    fill(query, 0.0, 1.0);
 
-    cv::ORB orb(4000);
+    cv::Mat train(3000, desc_size, type);
+    fill(train, 0.0, 1.0);
+
+    cv::BFMatcher matcher(normType);
+
+    std::vector< std::vector<cv::DMatch> > matches;
+
+    matcher.radiusMatch(query, train, matches, 2.0);
+
+    declare.time(30.0);
 
     TEST_CYCLE()
     {
-        orb(img, cv::noArray(), keypoints, descriptors);
+        matcher.radiusMatch(query, train, matches, 2.0);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(Features2D, ORB, DEVICES(cv::gpu::GLOBAL_ATOMICS));
+INSTANTIATE_TEST_CASE_P(Features2D, BruteForceMatcher_radiusMatch, testing::Combine(
+    ALL_DEVICES,
+    testing::Values(DescriptorSize(64), DescriptorSize(128), DescriptorSize(256)),
+    testing::Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING))));
 
 #endif
diff --git a/modules/gpu/perf_cpu/perf_filters.cpp b/modules/gpu/perf_cpu/perf_filters.cpp
index d2c19c3fcf..ab0be3bad9 100644
--- a/modules/gpu/perf_cpu/perf_filters.cpp
+++ b/modules/gpu/perf_cpu/perf_filters.cpp
@@ -14,11 +14,12 @@ GPU_PERF_TEST(Blur, cv::gpu::DeviceInfo, cv::Size, MatType, KernelSize)
     int ksize = GET_PARAM(3);
 
     cv::Mat src(size, type);
-
-    declare.in(src, WARMUP_RNG);
+    fill(src, 0.0, 255.0);
 
     cv::Mat dst;
 
+    cv::blur(src, dst, cv::Size(ksize, ksize));
+
     declare.time(20.0);
 
     TEST_CYCLE()
@@ -27,7 +28,7 @@ GPU_PERF_TEST(Blur, cv::gpu::DeviceInfo, cv::Size, MatType, KernelSize)
     }
 }
 
-INSTANTIATE_TEST_CASE_P(Filter, Blur, testing::Combine(
+INSTANTIATE_TEST_CASE_P(Filters, Blur, testing::Combine(
     ALL_DEVICES,
     GPU_TYPICAL_MAT_SIZES,
     testing::Values(MatType(CV_8UC1), MatType(CV_8UC4)),
@@ -43,11 +44,12 @@ GPU_PERF_TEST(Sobel, cv::gpu::DeviceInfo, cv::Size, MatType, KernelSize)
     int ksize = GET_PARAM(3);
 
     cv::Mat src(size, type);
-
-    declare.in(src, WARMUP_RNG);
+    fill(src, 0.0, 255.0);
 
     cv::Mat dst;
 
+    cv::Sobel(src, dst, -1, 1, 1, ksize);
+
     declare.time(20.0);
 
     TEST_CYCLE()
@@ -56,7 +58,7 @@ GPU_PERF_TEST(Sobel, cv::gpu::DeviceInfo, cv::Size, MatType, KernelSize)
     }
 }
 
-INSTANTIATE_TEST_CASE_P(Filter, Sobel, testing::Combine(
+INSTANTIATE_TEST_CASE_P(Filters, Sobel, testing::Combine(
     ALL_DEVICES,
     GPU_TYPICAL_MAT_SIZES,
     testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_32FC1)),
@@ -71,11 +73,12 @@ GPU_PERF_TEST(Scharr, cv::gpu::DeviceInfo, cv::Size, MatType)
     int type = GET_PARAM(2);
 
     cv::Mat src(size, type);
-
-    declare.in(src, WARMUP_RNG);
+    fill(src, 0.0, 255.0);
 
     cv::Mat dst;
 
+    cv::Scharr(src, dst, -1, 1, 0);
+
     declare.time(20.0);
 
     TEST_CYCLE()
@@ -84,7 +87,7 @@ GPU_PERF_TEST(Scharr, cv::gpu::DeviceInfo, cv::Size, MatType)
     }
 }
 
-INSTANTIATE_TEST_CASE_P(Filter, Scharr, testing::Combine(
+INSTANTIATE_TEST_CASE_P(Filters, Scharr, testing::Combine(
     ALL_DEVICES,
     GPU_TYPICAL_MAT_SIZES,
     testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_32FC1))));
@@ -99,11 +102,12 @@ GPU_PERF_TEST(GaussianBlur, cv::gpu::DeviceInfo, cv::Size, MatType, KernelSize)
     int ksize = GET_PARAM(3);
 
     cv::Mat src(size, type);
-
-    declare.in(src, WARMUP_RNG);
+    fill(src, 0.0, 255.0);
 
     cv::Mat dst;
 
+    cv::GaussianBlur(src, dst, cv::Size(ksize, ksize), 0.5);
+
     declare.time(20.0);
 
     TEST_CYCLE()
@@ -112,7 +116,7 @@ GPU_PERF_TEST(GaussianBlur, cv::gpu::DeviceInfo, cv::Size, MatType, KernelSize)
     }
 }
 
-INSTANTIATE_TEST_CASE_P(Filter, GaussianBlur, testing::Combine(
+INSTANTIATE_TEST_CASE_P(Filters, GaussianBlur, testing::Combine(
     ALL_DEVICES,
     GPU_TYPICAL_MAT_SIZES,
     testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_32FC1)),
@@ -128,11 +132,12 @@ GPU_PERF_TEST(Laplacian, cv::gpu::DeviceInfo, cv::Size, MatType, KernelSize)
     int ksize = GET_PARAM(3);
 
     cv::Mat src(size, type);
-
-    declare.in(src, WARMUP_RNG);
+    fill(src, 0.0, 255.0);
 
     cv::Mat dst;
 
+    cv::Laplacian(src, dst, -1, ksize);
+
     declare.time(20.0);
 
     TEST_CYCLE()
@@ -141,7 +146,7 @@ GPU_PERF_TEST(Laplacian, cv::gpu::DeviceInfo, cv::Size, MatType, KernelSize)
     }
 }
 
-INSTANTIATE_TEST_CASE_P(Filter, Laplacian, testing::Combine(
+INSTANTIATE_TEST_CASE_P(Filters, Laplacian, testing::Combine(
     ALL_DEVICES,
     GPU_TYPICAL_MAT_SIZES,
     testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC4)),
@@ -156,13 +161,14 @@ GPU_PERF_TEST(Erode, cv::gpu::DeviceInfo, cv::Size, MatType)
     int type = GET_PARAM(2);
 
     cv::Mat src(size, type);
-
-    declare.in(src, WARMUP_RNG);
+    fill(src, 0.0, 255.0);
 
     cv::Mat ker = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
 
     cv::Mat dst;
 
+    cv::erode(src, dst, ker);
+
     declare.time(20.0);
 
     TEST_CYCLE()
@@ -171,7 +177,7 @@ GPU_PERF_TEST(Erode, cv::gpu::DeviceInfo, cv::Size, MatType)
     }
 }
 
-INSTANTIATE_TEST_CASE_P(Filter, Erode, testing::Combine(
+INSTANTIATE_TEST_CASE_P(Filters, Erode, testing::Combine(
     ALL_DEVICES,
     GPU_TYPICAL_MAT_SIZES,
     testing::Values(MatType(CV_8UC1), MatType(CV_8UC4))));
@@ -185,13 +191,14 @@ GPU_PERF_TEST(Dilate, cv::gpu::DeviceInfo, cv::Size, MatType)
     int type = GET_PARAM(2);
 
     cv::Mat src(size, type);
-
-    declare.in(src, WARMUP_RNG);
+    fill(src, 0.0, 255.0);
 
     cv::Mat ker = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
 
     cv::Mat dst;
 
+    cv::dilate(src, dst, ker);
+
     declare.time(20.0);
 
     TEST_CYCLE()
@@ -200,7 +207,7 @@ GPU_PERF_TEST(Dilate, cv::gpu::DeviceInfo, cv::Size, MatType)
     }
 }
 
-INSTANTIATE_TEST_CASE_P(Filter, Dilate, testing::Combine(
+INSTANTIATE_TEST_CASE_P(Filters, Dilate, testing::Combine(
     ALL_DEVICES,
     GPU_TYPICAL_MAT_SIZES,
     testing::Values(MatType(CV_8UC1), MatType(CV_8UC4))));
@@ -218,13 +225,14 @@ GPU_PERF_TEST(MorphologyEx, cv::gpu::DeviceInfo, cv::Size, MatType, MorphOp)
     int morphOp = GET_PARAM(3);
 
     cv::Mat src(size, type);
-
-    declare.in(src, WARMUP_RNG);
+    fill(src, 0.0, 255.0);
 
     cv::Mat dst;
 
     cv::Mat ker = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
 
+    cv::morphologyEx(src, dst, morphOp, ker);
+
     declare.time(20.0);
 
     TEST_CYCLE()
@@ -233,7 +241,7 @@ GPU_PERF_TEST(MorphologyEx, cv::gpu::DeviceInfo, cv::Size, MatType, MorphOp)
     }
 }
 
-INSTANTIATE_TEST_CASE_P(Filter, MorphologyEx, testing::Combine(
+INSTANTIATE_TEST_CASE_P(Filters, MorphologyEx, testing::Combine(
     ALL_DEVICES,
     GPU_TYPICAL_MAT_SIZES,
     testing::Values(MatType(CV_8UC1), MatType(CV_8UC4)),
@@ -249,12 +257,15 @@ GPU_PERF_TEST(Filter2D, cv::gpu::DeviceInfo, cv::Size, MatType, KernelSize)
     int ksize = GET_PARAM(3);
 
     cv::Mat src(size, type);
-    cv::Mat kernel(ksize, ksize, CV_32FC1);
+    fill(src, 0.0, 255.0);
 
-    declare.in(src, kernel, WARMUP_RNG);
+    cv::Mat kernel(ksize, ksize, CV_32FC1);
+    fill(kernel, 0.0, 1.0);
 
     cv::Mat dst;
 
+    cv::filter2D(src, dst, -1, kernel);
+
     declare.time(20.0);
 
     TEST_CYCLE()
@@ -263,7 +274,7 @@ GPU_PERF_TEST(Filter2D, cv::gpu::DeviceInfo, cv::Size, MatType, KernelSize)
     }
 }
 
-INSTANTIATE_TEST_CASE_P(Filter, Filter2D, testing::Combine(
+INSTANTIATE_TEST_CASE_P(Filters, Filter2D, testing::Combine(
     ALL_DEVICES,
     GPU_TYPICAL_MAT_SIZES,
     testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC4)),
diff --git a/modules/gpu/perf_cpu/perf_imgproc.cpp b/modules/gpu/perf_cpu/perf_imgproc.cpp
index 4e48b117da..51bdf4cc98 100644
--- a/modules/gpu/perf_cpu/perf_imgproc.cpp
+++ b/modules/gpu/perf_cpu/perf_imgproc.cpp
@@ -5,7 +5,7 @@
 //////////////////////////////////////////////////////////////////////
 // Remap
 
-GPU_PERF_TEST(Remap, cv::gpu::DeviceInfo, cv::Size, perf::MatType, Interpolation, BorderMode)
+GPU_PERF_TEST(Remap, cv::gpu::DeviceInfo, cv::Size, MatType, Interpolation, BorderMode)
 {
     cv::Size size = GET_PARAM(1);
     int type = GET_PARAM(2);
@@ -13,14 +13,19 @@ GPU_PERF_TEST(Remap, cv::gpu::DeviceInfo, cv::Size, perf::MatType, Interpolation
     int borderMode = GET_PARAM(4);
 
     cv::Mat src(size, type);
+    fill(src, 0, 255);
+
     cv::Mat xmap(size, CV_32FC1);
-    cv::Mat ymap(size, CV_32FC1);
+    fill(xmap, 0, size.width);
 
-    declare.in(src, xmap, ymap, WARMUP_RNG);
+    cv::Mat ymap(size, CV_32FC1);
+    fill(ymap, 0, size.height);
 
     cv::Mat dst;
 
-    declare.time(10.0);
+    cv::remap(src, dst, xmap, ymap, interpolation, borderMode);
+
+    declare.time(3.0);
 
     TEST_CYCLE()
     {
@@ -29,535 +34,648 @@ GPU_PERF_TEST(Remap, cv::gpu::DeviceInfo, cv::Size, perf::MatType, Interpolation
 }
 
 INSTANTIATE_TEST_CASE_P(ImgProc, Remap, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_8UC4, CV_16UC1, CV_32FC1),
-                        testing::Values((int) cv::INTER_NEAREST, (int) cv::INTER_LINEAR, (int) cv::INTER_CUBIC),
-                        testing::Values((int) cv::BORDER_REFLECT101, (int) cv::BORDER_REPLICATE, (int) cv::BORDER_CONSTANT)));
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
+                    MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
+                    MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
+    testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)),
+    testing::Values(BorderMode(cv::BORDER_REFLECT101), BorderMode(cv::BORDER_REPLICATE), BorderMode(cv::BORDER_CONSTANT), BorderMode(cv::BORDER_REFLECT), BorderMode(cv::BORDER_WRAP))));
+
 
 //////////////////////////////////////////////////////////////////////
-// MeanShiftFiltering
+// Resize
 
-GPU_PERF_TEST_1(MeanShiftFiltering, cv::gpu::DeviceInfo)
+IMPLEMENT_PARAM_CLASS(Scale, double)
+
+GPU_PERF_TEST(Resize, cv::gpu::DeviceInfo, cv::Size, MatType, Interpolation, Scale)
 {
-    cv::Mat img = readImage("gpu/meanshift/cones.png");
-    ASSERT_FALSE(img.empty());
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+    int interpolation = GET_PARAM(3);
+    double f = GET_PARAM(4);
+
+    cv::Mat src(size, type);
+    fill(src, 0, 255);
 
     cv::Mat dst;
 
-    declare.time(100.0);
+    cv::resize(src, dst, cv::Size(), f, f, interpolation);
+
+    declare.time(1.0);
 
     TEST_CYCLE()
     {
-        cv::pyrMeanShiftFiltering(img, dst, 50, 50);
+        cv::resize(src, dst, cv::Size(), f, f, interpolation);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, MeanShiftFiltering, ALL_DEVICES);
+INSTANTIATE_TEST_CASE_P(ImgProc, Resize, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
+                    MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
+                    MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
+    testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)),
+    testing::Values(Scale(0.5), Scale(2.0))));
 
 //////////////////////////////////////////////////////////////////////
-// ReprojectImageTo3D
+// WarpAffine
 
-GPU_PERF_TEST(ReprojectImageTo3D, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+GPU_PERF_TEST(WarpAffine, cv::gpu::DeviceInfo, cv::Size, MatType, Interpolation, BorderMode)
 {
     cv::Size size = GET_PARAM(1);
     int type = GET_PARAM(2);
+    int interpolation = GET_PARAM(3);
+    int borderMode = GET_PARAM(4);
 
     cv::Mat src(size, type);
-
-    declare.in(src, WARMUP_RNG);
+    fill(src, 0, 255);
 
     cv::Mat dst;
 
+    const double aplha = CV_PI / 4;
+    double mat[2][3] = { {std::cos(aplha), -std::sin(aplha), src.cols / 2},
+                         {std::sin(aplha),  std::cos(aplha), 0}};
+    cv::Mat M(2, 3, CV_64F, (void*) mat);
+
+    cv::warpAffine(src, dst, M, size, interpolation, borderMode);
+
     TEST_CYCLE()
     {
-        cv::reprojectImageTo3D(src, dst, cv::Mat::ones(4, 4, CV_32FC1));
+        cv::warpAffine(src, dst, M, size, interpolation, borderMode);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, ReprojectImageTo3D, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_16SC1)));
+INSTANTIATE_TEST_CASE_P(ImgProc, WarpAffine, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
+                    MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
+                    MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
+    testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)),
+    testing::Values(BorderMode(cv::BORDER_REFLECT101), BorderMode(cv::BORDER_REPLICATE), BorderMode(cv::BORDER_CONSTANT), BorderMode(cv::BORDER_REFLECT), BorderMode(cv::BORDER_WRAP))));
 
 //////////////////////////////////////////////////////////////////////
-// CvtColor
+// WarpPerspective
 
-GPU_PERF_TEST(CvtColor, cv::gpu::DeviceInfo, cv::Size, perf::MatType, CvtColorInfo)
+GPU_PERF_TEST(WarpPerspective, cv::gpu::DeviceInfo, cv::Size, MatType, Interpolation, BorderMode)
 {
     cv::Size size = GET_PARAM(1);
     int type = GET_PARAM(2);
-    CvtColorInfo info = GET_PARAM(3);
-
-    cv::Mat src(size, CV_MAKETYPE(type, info.scn));
+    int interpolation = GET_PARAM(3);
+    int borderMode = GET_PARAM(4);
 
-    declare.in(src, WARMUP_RNG);
+    cv::Mat src(size, type);
+    fill(src, 0, 255);
 
     cv::Mat dst;
 
+    const double aplha = CV_PI / 4;
+    double mat[3][3] = { {std::cos(aplha), -std::sin(aplha), src.cols / 2},
+                         {std::sin(aplha),  std::cos(aplha), 0},
+                         {0.0,              0.0,             1.0}};
+    cv::Mat M(3, 3, CV_64F, (void*) mat);
+
+    cv::warpPerspective(src, dst, M, size, interpolation, borderMode);
+
     TEST_CYCLE()
     {
-        cv::cvtColor(src, dst, info.code, info.dcn);
+        cv::warpPerspective(src, dst, M, size, interpolation, borderMode);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, CvtColor, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1),
-                        testing::Values(
-                            CvtColorInfo(4, 4, cv::COLOR_RGBA2BGRA), CvtColorInfo(4, 1, cv::COLOR_BGRA2GRAY), CvtColorInfo(1, 4, cv::COLOR_GRAY2BGRA),
-                            CvtColorInfo(3, 3, cv::COLOR_BGR2XYZ), CvtColorInfo(3, 3, cv::COLOR_BGR2YCrCb), CvtColorInfo(3, 3, cv::COLOR_YCrCb2BGR),
-                            CvtColorInfo(3, 3, cv::COLOR_BGR2HSV), CvtColorInfo(3, 3, cv::COLOR_HSV2BGR))));
+INSTANTIATE_TEST_CASE_P(ImgProc, WarpPerspective, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
+                    MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
+                    MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
+    testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)),
+    testing::Values(BorderMode(cv::BORDER_REFLECT101), BorderMode(cv::BORDER_REPLICATE), BorderMode(cv::BORDER_CONSTANT), BorderMode(cv::BORDER_REFLECT), BorderMode(cv::BORDER_WRAP))));
 
 //////////////////////////////////////////////////////////////////////
-// Threshold
+// CopyMakeBorder
 
-GPU_PERF_TEST(Threshold, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+GPU_PERF_TEST(CopyMakeBorder, cv::gpu::DeviceInfo, cv::Size, MatType, BorderMode)
 {
     cv::Size size = GET_PARAM(1);
     int type = GET_PARAM(2);
+    int borderType = GET_PARAM(3);
 
     cv::Mat src(size, type);
+    fill(src, 0, 255);
 
-    declare.in(src, WARMUP_RNG);
+    cv::Mat dst;
 
-    cv::Mat dst(size, type);
+    cv::copyMakeBorder(src, dst, 5, 5, 5, 5, borderType);
 
     TEST_CYCLE()
     {
-        cv::threshold(src, dst, 100.0, 255.0, cv::THRESH_BINARY);
+        cv::copyMakeBorder(src, dst, 5, 5, 5, 5, borderType);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, Threshold, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_32FC1)));
+INSTANTIATE_TEST_CASE_P(ImgProc, CopyMakeBorder, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
+                    MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
+                    MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
+    testing::Values(BorderMode(cv::BORDER_REFLECT101), BorderMode(cv::BORDER_REPLICATE), BorderMode(cv::BORDER_CONSTANT), BorderMode(cv::BORDER_REFLECT), BorderMode(cv::BORDER_WRAP))));
 
 //////////////////////////////////////////////////////////////////////
-// Resize
+// Threshold
+
+CV_ENUM(ThreshOp, cv::THRESH_BINARY, cv::THRESH_BINARY_INV, cv::THRESH_TRUNC, cv::THRESH_TOZERO, cv::THRESH_TOZERO_INV)
+#define ALL_THRESH_OPS testing::Values(ThreshOp(cv::THRESH_BINARY), ThreshOp(cv::THRESH_BINARY_INV), ThreshOp(cv::THRESH_TRUNC), ThreshOp(cv::THRESH_TOZERO), ThreshOp(cv::THRESH_TOZERO_INV))
 
-GPU_PERF_TEST(Resize, cv::gpu::DeviceInfo, cv::Size, perf::MatType, Interpolation, double)
+GPU_PERF_TEST(Threshold, cv::gpu::DeviceInfo, cv::Size, MatDepth, ThreshOp)
 {
     cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-    int interpolation = GET_PARAM(3);
-    double f = GET_PARAM(4);
-
-    cv::Mat src(size, type);
+    int depth = GET_PARAM(2);
+    int threshOp = GET_PARAM(3);
 
-    declare.in(src, WARMUP_RNG);
+    cv::Mat src(size, depth);
+    fill(src, 0, 255);
 
     cv::Mat dst;
 
-    declare.time(1.0);
+    cv::threshold(src, dst, 100.0, 255.0, threshOp);
 
     TEST_CYCLE()
     {
-        cv::resize(src, dst, cv::Size(), f, f, interpolation);
+        cv::threshold(src, dst, 100.0, 255.0, threshOp);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, Resize, testing::Combine(
-                        ALL_DEVICES,
-                        testing::Values(perf::szSXGA, perf::sz1080p),
-                        testing::Values(CV_8UC1, CV_8UC4, CV_16UC1, CV_32FC1),
-                        testing::Values((int) cv::INTER_NEAREST, (int) cv::INTER_LINEAR, (int) cv::INTER_CUBIC),
-                        testing::Values(0.5, 2.0)));
+INSTANTIATE_TEST_CASE_P(ImgProc, Threshold, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_32F), MatDepth(CV_64F)),
+    ALL_THRESH_OPS));
 
 //////////////////////////////////////////////////////////////////////
-// WarpAffine
+// Integral
 
-GPU_PERF_TEST(WarpAffine, cv::gpu::DeviceInfo, cv::Size, perf::MatType, Interpolation)
+GPU_PERF_TEST(Integral, cv::gpu::DeviceInfo, cv::Size)
 {
     cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-    int interpolation = GET_PARAM(3);
 
-    cv::Mat src(size, type);
-
-    declare.in(src, WARMUP_RNG);
+    cv::Mat src(size, CV_8UC1);
+    fill(src, 0, 255);
 
     cv::Mat dst;
 
-    const double aplha = CV_PI / 4;
-    double mat[2][3] = { {std::cos(aplha), -std::sin(aplha), src.cols / 2},
-                         {std::sin(aplha),  std::cos(aplha), 0}};
-    cv::Mat M(2, 3, CV_64F, (void*) mat);
+    cv::integral(src, dst);
 
     TEST_CYCLE()
     {
-        cv::warpAffine(src, dst, M, size, interpolation, cv::BORDER_CONSTANT, cv::Scalar());
+        cv::integral(src, dst);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, WarpAffine, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32FC1, CV_32FC3, CV_32FC4),
-                        testing::Values((int) cv::INTER_NEAREST, (int) cv::INTER_LINEAR, (int) cv::INTER_CUBIC)));
+INSTANTIATE_TEST_CASE_P(ImgProc, Integral, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES));
 
 //////////////////////////////////////////////////////////////////////
-// WarpPerspective
+// HistEven_OneChannel
 
-GPU_PERF_TEST(WarpPerspective, cv::gpu::DeviceInfo, cv::Size, perf::MatType, Interpolation)
+GPU_PERF_TEST(HistEven_OneChannel, cv::gpu::DeviceInfo, cv::Size, MatDepth)
 {
     cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-    int interpolation = GET_PARAM(3);
+    int depth = GET_PARAM(2);
 
-    cv::Mat src(size, type);
+    cv::Mat src(size, depth);
+    fill(src, 0, 255);
 
-    declare.in(src, WARMUP_RNG);
+    int hbins = 30;
+    float hranges[] = {0.0f, 180.0f};
+    cv::Mat hist;
+    int histSize[] = {hbins};
+    const float* ranges[] = {hranges};
+    int channels[] = {0};
 
-    cv::Mat dst;
-
-    const double aplha = CV_PI / 4;
-    double mat[3][3] = { {std::cos(aplha), -std::sin(aplha), src.cols / 2},
-                         {std::sin(aplha),  std::cos(aplha), 0},
-                         {0.0,              0.0,             1.0}};
-    cv::Mat M(3, 3, CV_64F, (void*) mat);
+    cv::calcHist(&src, 1, channels, cv::Mat(), hist, 1, histSize, ranges);
 
     TEST_CYCLE()
     {
-        cv::warpPerspective(src, dst, M, size, interpolation, cv::BORDER_CONSTANT, cv::Scalar());
+        cv::calcHist(&src, 1, channels, cv::Mat(), hist, 1, histSize, ranges);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, WarpPerspective, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32FC1, CV_32FC3, CV_32FC4),
-                        testing::Values((int) cv::INTER_NEAREST, (int) cv::INTER_LINEAR, (int) cv::INTER_CUBIC)));
+INSTANTIATE_TEST_CASE_P(ImgProc, HistEven_OneChannel, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_16S))));
 
 //////////////////////////////////////////////////////////////////////
-// CopyMakeBorder
+// EqualizeHist
 
-GPU_PERF_TEST(CopyMakeBorder, cv::gpu::DeviceInfo, cv::Size, perf::MatType, BorderMode)
+GPU_PERF_TEST(EqualizeHist, cv::gpu::DeviceInfo, cv::Size)
 {
     cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-    int borderType = GET_PARAM(3);
 
-    cv::Mat src(size, type);
-
-    declare.in(src, WARMUP_RNG);
+    cv::Mat src(size, CV_8UC1);
+    fill(src, 0, 255);
 
     cv::Mat dst;
 
+    cv::equalizeHist(src, dst);
+
     TEST_CYCLE()
     {
-        cv::copyMakeBorder(src, dst, 5, 5, 5, 5, borderType);
+        cv::equalizeHist(src, dst);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, CopyMakeBorder, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_8UC4, CV_32FC1),
-                        testing::Values((int) cv::BORDER_REPLICATE, (int) cv::BORDER_REFLECT, (int) cv::BORDER_WRAP, (int) cv::BORDER_CONSTANT)));
+INSTANTIATE_TEST_CASE_P(ImgProc, EqualizeHist, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES));
 
 //////////////////////////////////////////////////////////////////////
-// Integral
+// Canny
 
-GPU_PERF_TEST(Integral, cv::gpu::DeviceInfo, cv::Size)
-{
-    cv::Size size = GET_PARAM(1);
+IMPLEMENT_PARAM_CLASS(AppertureSize, int)
+IMPLEMENT_PARAM_CLASS(L2gradient, bool)
 
-    cv::Mat src(size, CV_8UC1);
+GPU_PERF_TEST(Canny, cv::gpu::DeviceInfo, AppertureSize, L2gradient)
+{
+    int apperture_size = GET_PARAM(1);
+    bool useL2gradient = GET_PARAM(2);
 
-    declare.in(src, WARMUP_RNG);
+    cv::Mat image = readImage("perf/1280x1024.jpg", cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(image.empty());
 
     cv::Mat dst;
 
+    cv::Canny(image, dst, 50.0, 100.0, apperture_size, useL2gradient);
+
     TEST_CYCLE()
     {
-        cv::integral(src, dst);
+        cv::Canny(image, dst, 50.0, 100.0, apperture_size, useL2gradient);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, Integral, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES));
+INSTANTIATE_TEST_CASE_P(ImgProc, Canny, testing::Combine(
+    ALL_DEVICES,
+    testing::Values(AppertureSize(3), AppertureSize(5)),
+    testing::Values(L2gradient(false), L2gradient(true))));
 
 //////////////////////////////////////////////////////////////////////
-// CornerHarris
+// MeanShiftFiltering
 
-GPU_PERF_TEST(CornerHarris, cv::gpu::DeviceInfo, perf::MatType)
+GPU_PERF_TEST_1(MeanShiftFiltering, cv::gpu::DeviceInfo)
 {
-    int type = GET_PARAM(1);
-
-    cv::Mat img = readImage("gpu/stereobm/aloe-L.png", cv::IMREAD_GRAYSCALE);
+    cv::Mat img = readImage("gpu/meanshift/cones.png");
     ASSERT_FALSE(img.empty());
 
-    img.convertTo(img, type, type == CV_32F ? 1.0 / 255.0 : 1.0);
-
     cv::Mat dst;
 
-    int blockSize = 3;
-    int ksize = 7;
-    double k = 0.5;
+    cv::pyrMeanShiftFiltering(img, dst, 50, 50);
+
+    declare.time(15.0);
 
     TEST_CYCLE()
     {
-        cv::cornerHarris(img, dst, blockSize, ksize, k);
+        cv::pyrMeanShiftFiltering(img, dst, 50, 50);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, CornerHarris, testing::Combine(
-                        ALL_DEVICES,
-                        testing::Values(CV_8UC1, CV_32FC1)));
+INSTANTIATE_TEST_CASE_P(ImgProc, MeanShiftFiltering, ALL_DEVICES);
 
 //////////////////////////////////////////////////////////////////////
-// CornerMinEigenVal
+// Convolve
+
+IMPLEMENT_PARAM_CLASS(KSize, int)
+IMPLEMENT_PARAM_CLASS(Ccorr, bool)
 
-GPU_PERF_TEST(CornerMinEigenVal, cv::gpu::DeviceInfo, perf::MatType)
+GPU_PERF_TEST(Convolve, cv::gpu::DeviceInfo, cv::Size, KSize, Ccorr)
 {
-    int type = GET_PARAM(1);
+    cv::Size size = GET_PARAM(1);
+    int templ_size = GET_PARAM(2);
+    bool ccorr = GET_PARAM(3);
 
-    cv::Mat img = readImage("gpu/stereobm/aloe-L.png", cv::IMREAD_GRAYSCALE);
-    ASSERT_FALSE(img.empty());
+    ASSERT_FALSE(ccorr);
 
-    img.convertTo(img, type, type == CV_32F ? 1.0 / 255.0 : 1.0);
+    cv::Mat image(size, CV_32FC1);
+    image.setTo(1.0);
+
+    cv::Mat templ(templ_size, templ_size, CV_32FC1);
+    templ.setTo(1.0);
 
     cv::Mat dst;
 
-    int blockSize = 3;
-    int ksize = 7;
+    cv::filter2D(image, dst, image.depth(), templ);
+
+    declare.time(10.0);
 
     TEST_CYCLE()
     {
-        cv::cornerMinEigenVal(img, dst, blockSize, ksize);
+        cv::filter2D(image, dst, image.depth(), templ);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, CornerMinEigenVal, testing::Combine(
-                        ALL_DEVICES,
-                        testing::Values(CV_8UC1, CV_32FC1)));
+INSTANTIATE_TEST_CASE_P(ImgProc, Convolve, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(KSize(3), KSize(9), KSize(17), KSize(27), KSize(32), KSize(64)),
+    testing::Values(Ccorr(false), Ccorr(true))));
 
-//////////////////////////////////////////////////////////////////////
-// MulSpectrums
+////////////////////////////////////////////////////////////////////////////////
+// MatchTemplate_8U
+
+CV_ENUM(TemplateMethod, cv::TM_SQDIFF, cv::TM_SQDIFF_NORMED, cv::TM_CCORR, cv::TM_CCORR_NORMED, cv::TM_CCOEFF, cv::TM_CCOEFF_NORMED)
+#define ALL_TEMPLATE_METHODS testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_SQDIFF_NORMED), TemplateMethod(cv::TM_CCORR), TemplateMethod(cv::TM_CCORR_NORMED), TemplateMethod(cv::TM_CCOEFF), TemplateMethod(cv::TM_CCOEFF_NORMED))
+
+IMPLEMENT_PARAM_CLASS(TemplateSize, cv::Size)
 
-GPU_PERF_TEST(MulSpectrums, cv::gpu::DeviceInfo, cv::Size)
+GPU_PERF_TEST(MatchTemplate_8U, cv::gpu::DeviceInfo, cv::Size, TemplateSize, Channels, TemplateMethod)
 {
     cv::Size size = GET_PARAM(1);
+    cv::Size templ_size = GET_PARAM(2);
+    int cn = GET_PARAM(3);
+    int method = GET_PARAM(4);
 
-    cv::Mat a(size, CV_32FC2);
-    cv::Mat b(size, CV_32FC2);
+    cv::Mat image(size, CV_MAKE_TYPE(CV_8U, cn));
+    fill(image, 0, 255);
 
-    declare.in(a, b, WARMUP_RNG);
+    cv::Mat templ(templ_size, CV_MAKE_TYPE(CV_8U, cn));
+    fill(templ, 0, 255);
 
     cv::Mat dst;
 
+    cv::matchTemplate(image, templ, dst, method);
+
     TEST_CYCLE()
     {
-        cv::mulSpectrums(a, b, dst, 0);
+        cv::matchTemplate(image, templ, dst, method);
     }
-}
+};
 
-INSTANTIATE_TEST_CASE_P(ImgProc, MulSpectrums, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES));
+INSTANTIATE_TEST_CASE_P(ImgProc, MatchTemplate_8U, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16)), TemplateSize(cv::Size(30, 30))),
+    testing::Values(Channels(1), Channels(3), Channels(4)),
+    ALL_TEMPLATE_METHODS));
 
-//////////////////////////////////////////////////////////////////////
-// Dft
+////////////////////////////////////////////////////////////////////////////////
+// MatchTemplate_32F
 
-GPU_PERF_TEST(Dft, cv::gpu::DeviceInfo, cv::Size)
+GPU_PERF_TEST(MatchTemplate_32F, cv::gpu::DeviceInfo, cv::Size, TemplateSize, Channels, TemplateMethod)
 {
     cv::Size size = GET_PARAM(1);
+    cv::Size templ_size = GET_PARAM(2);
+    int cn = GET_PARAM(3);
+    int method = GET_PARAM(4);
 
-    cv::Mat src(size, CV_32FC2);
+    cv::Mat image(size, CV_MAKE_TYPE(CV_32F, cn));
+    fill(image, 0, 255);
 
-    declare.in(src, WARMUP_RNG);
+    cv::Mat templ(templ_size, CV_MAKE_TYPE(CV_32F, cn));
+    fill(templ, 0, 255);
 
     cv::Mat dst;
 
-    declare.time(2.0);
+    cv::matchTemplate(image, templ, dst, method);
 
     TEST_CYCLE()
     {
-        cv::dft(src, dst);
+        cv::matchTemplate(image, templ, dst, method);
     }
-}
+};
 
-INSTANTIATE_TEST_CASE_P(ImgProc, Dft, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES));
+INSTANTIATE_TEST_CASE_P(ImgProc, MatchTemplate_32F, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16)), TemplateSize(cv::Size(30, 30))),
+    testing::Values(Channels(1), Channels(3), Channels(4)),
+    testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR))));
 
 //////////////////////////////////////////////////////////////////////
-// Convolve
+// MulSpectrums
 
-GPU_PERF_TEST(Convolve, cv::gpu::DeviceInfo, cv::Size, int, bool)
+CV_FLAGS(DftFlags, 0, cv::DFT_INVERSE, cv::DFT_SCALE, cv::DFT_ROWS, cv::DFT_COMPLEX_OUTPUT, cv::DFT_REAL_OUTPUT)
+
+GPU_PERF_TEST(MulSpectrums, cv::gpu::DeviceInfo, cv::Size, DftFlags)
 {
     cv::Size size = GET_PARAM(1);
-    int templ_size = GET_PARAM(2);
+    int flag = GET_PARAM(2);
 
-    cv::Mat image(size, CV_32FC1);
-    cv::Mat templ(templ_size, templ_size, CV_32FC1);
+    cv::Mat a(size, CV_32FC2);
+    fill(a, 0, 100);
 
-    image.setTo(cv::Scalar(1.0));
-    templ.setTo(cv::Scalar(1.0));
+    cv::Mat b(size, CV_32FC2);
+    fill(b, 0, 100);
 
     cv::Mat dst;
 
-    declare.time(2.0);
+    cv::mulSpectrums(a, b, dst, flag);
 
     TEST_CYCLE()
     {
-        cv::filter2D(image, dst, image.depth(), templ);
+        cv::mulSpectrums(a, b, dst, flag);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, Convolve, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(3, 9, 27, 32, 64),
-                        testing::Bool()));
+INSTANTIATE_TEST_CASE_P(ImgProc, MulSpectrums, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(DftFlags(0), DftFlags(cv::DFT_ROWS))));
 
 //////////////////////////////////////////////////////////////////////
-// PyrDown
+// Dft
 
-GPU_PERF_TEST(PyrDown, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+GPU_PERF_TEST(Dft, cv::gpu::DeviceInfo, cv::Size, DftFlags)
 {
     cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
+    int flag = GET_PARAM(2);
 
-    cv::Mat src(size, type);
-
-    declare.in(src, WARMUP_RNG);
+    cv::Mat src(size, CV_32FC2);
+    fill(src, 0, 100);
 
     cv::Mat dst;
 
+    cv::dft(src, dst, flag);
+
+    declare.time(10.0);
+
     TEST_CYCLE()
     {
-        cv::pyrDown(src, dst);
+        cv::dft(src, dst, flag);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, PyrDown, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_8UC4, CV_16SC3, CV_32FC1)));
+INSTANTIATE_TEST_CASE_P(ImgProc, Dft, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(DftFlags(0), DftFlags(cv::DFT_ROWS), DftFlags(cv::DFT_INVERSE))));
 
 //////////////////////////////////////////////////////////////////////
-// PyrUp
+// CornerHarris
 
-GPU_PERF_TEST(PyrUp, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+IMPLEMENT_PARAM_CLASS(BlockSize, int)
+IMPLEMENT_PARAM_CLASS(ApertureSize, int)
+
+GPU_PERF_TEST(CornerHarris, cv::gpu::DeviceInfo, MatType, BorderMode, BlockSize, ApertureSize)
 {
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
+    int type = GET_PARAM(1);
+    int borderType = GET_PARAM(2);
+    int blockSize = GET_PARAM(3);
+    int apertureSize = GET_PARAM(4);
 
-    cv::Mat src(size, type);
+    cv::Mat img = readImage("gpu/stereobm/aloe-L.png", cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(img.empty());
 
-    declare.in(src, WARMUP_RNG);
+    img.convertTo(img, type, type == CV_32F ? 1.0 / 255.0 : 1.0);
 
     cv::Mat dst;
 
+    double k = 0.5;
+
+    cv::cornerHarris(img, dst, blockSize, apertureSize, k, borderType);
+
     TEST_CYCLE()
     {
-        cv::pyrUp(src, dst);
+        cv::cornerHarris(img, dst, blockSize, apertureSize, k, borderType);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, PyrUp, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_8UC4, CV_16SC3, CV_32FC1)));
+INSTANTIATE_TEST_CASE_P(ImgProc, CornerHarris, testing::Combine(
+    ALL_DEVICES,
+    testing::Values(MatType(CV_8UC1), MatType(CV_32FC1)),
+    testing::Values(BorderMode(cv::BORDER_REFLECT101), BorderMode(cv::BORDER_REPLICATE), BorderMode(cv::BORDER_REFLECT)),
+    testing::Values(BlockSize(3), BlockSize(5), BlockSize(7)),
+    testing::Values(ApertureSize(0), ApertureSize(3), ApertureSize(5), ApertureSize(7))));
 
 //////////////////////////////////////////////////////////////////////
-// Canny
+// CornerMinEigenVal
 
-GPU_PERF_TEST_1(Canny, cv::gpu::DeviceInfo)
+GPU_PERF_TEST(CornerMinEigenVal, cv::gpu::DeviceInfo, MatType, BorderMode, BlockSize, ApertureSize)
 {
-    cv::Mat image = readImage("perf/1280x1024.jpg", cv::IMREAD_GRAYSCALE);
-    ASSERT_FALSE(image.empty());
+    int type = GET_PARAM(1);
+    int borderType = GET_PARAM(2);
+    int blockSize = GET_PARAM(3);
+    int apertureSize = GET_PARAM(4);
+
+    cv::Mat img = readImage("gpu/stereobm/aloe-L.png", cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(img.empty());
+
+    img.convertTo(img, type, type == CV_32F ? 1.0 / 255.0 : 1.0);
 
     cv::Mat dst;
 
+    cv::cornerMinEigenVal(img, dst, blockSize, apertureSize, borderType);
+
     TEST_CYCLE()
     {
-        cv::Canny(image, dst, 50.0, 100.0);
+        cv::cornerMinEigenVal(img, dst, blockSize, apertureSize, borderType);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, Canny, ALL_DEVICES);
+INSTANTIATE_TEST_CASE_P(ImgProc, CornerMinEigenVal, testing::Combine(
+    ALL_DEVICES,
+    testing::Values(MatType(CV_8UC1), MatType(CV_32FC1)),
+    testing::Values(BorderMode(cv::BORDER_REFLECT101), BorderMode(cv::BORDER_REPLICATE), BorderMode(cv::BORDER_REFLECT)),
+    testing::Values(BlockSize(3), BlockSize(5), BlockSize(7)),
+    testing::Values(ApertureSize(0), ApertureSize(3), ApertureSize(5), ApertureSize(7))));
 
 //////////////////////////////////////////////////////////////////////
-// CalcHist
+// PyrDown
 
-GPU_PERF_TEST(CalcHist, cv::gpu::DeviceInfo, cv::Size)
+GPU_PERF_TEST(PyrDown, cv::gpu::DeviceInfo, cv::Size, MatType)
 {
     cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
 
-    cv::Mat src(size, CV_8UC1);
-
-    declare.in(src, WARMUP_RNG);
+    cv::Mat src(size, type);
+    fill(src, 0, 255);
 
-    cv::Mat hist;
+    cv::Mat dst;
 
-    int histSize = 256;
-    float range[] = { 0, 256 } ;
-    const float* histRange = { range };
+    cv::pyrDown(src, dst);
 
     TEST_CYCLE()
     {
-        cv::calcHist(&src, 1, 0, cv::noArray(), hist, 1, &histSize, &histRange);
+        cv::pyrDown(src, dst);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, CalcHist, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES));
+INSTANTIATE_TEST_CASE_P(ImgProc, PyrDown, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
+                    MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
+                    MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4))));
 
 //////////////////////////////////////////////////////////////////////
-// EqualizeHist
+// PyrUp
 
-GPU_PERF_TEST(EqualizeHist, cv::gpu::DeviceInfo, cv::Size)
+GPU_PERF_TEST(PyrUp, cv::gpu::DeviceInfo, cv::Size, MatType)
 {
     cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
 
-    cv::Mat src(size, CV_8UC1);
-
-    declare.in(src, WARMUP_RNG);
+    cv::Mat src(size, type);
+    fill(src, 0, 255);
 
     cv::Mat dst;
 
+    cv::pyrUp(src, dst);
+
     TEST_CYCLE()
     {
-        cv::equalizeHist(src, dst);
+        cv::pyrUp(src, dst);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, EqualizeHist, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES));
-
+INSTANTIATE_TEST_CASE_P(ImgProc, PyrUp, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
+                    MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
+                    MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4))));
 
 //////////////////////////////////////////////////////////////////////
-// MulAndScaleSpectrums
-
+// CvtColor
 
-GPU_PERF_TEST(MulAndScaleSpectrums, cv::gpu::DeviceInfo, cv::Size)
+GPU_PERF_TEST(CvtColor, cv::gpu::DeviceInfo, cv::Size, MatDepth, CvtColorInfo)
 {
-    cv::Size size = GET_PARAM(1);       
+    cv::Size size = GET_PARAM(1);
+    int depth = GET_PARAM(2);
+    CvtColorInfo info = GET_PARAM(3);
 
-    int type = CV_32FC2;
+    cv::Mat src(size, CV_MAKETYPE(depth, info.scn));
+    fill(src, 0, 255);
+
+    cv::Mat dst;
+
+    cv::cvtColor(src, dst, info.code, info.dcn);
 
-    cv::Mat src1(size, type);
-    cv::Mat src2(size, type);
-    cv::Mat dst(size, type);
-    declare.in(src1, src2, WARMUP_RNG);   
-    
     TEST_CYCLE()
-    {        
-        cv::mulSpectrums(src1, src2, dst, cv::DFT_ROWS, false);
+    {
+        cv::cvtColor(src, dst, info.code, info.dcn);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(ImgProc, MulAndScaleSpectrums, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES));
-
+INSTANTIATE_TEST_CASE_P(ImgProc, CvtColor, testing::Combine(
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_32F)),
+    testing::Values(CvtColorInfo(4, 4, cv::COLOR_RGBA2BGRA),
+                    CvtColorInfo(4, 1, cv::COLOR_BGRA2GRAY),
+                    CvtColorInfo(1, 4, cv::COLOR_GRAY2BGRA),
+                    CvtColorInfo(3, 3, cv::COLOR_BGR2XYZ),
+                    CvtColorInfo(3, 3, cv::COLOR_XYZ2BGR),
+                    CvtColorInfo(3, 3, cv::COLOR_BGR2YCrCb),
+                    CvtColorInfo(3, 3, cv::COLOR_YCrCb2BGR),
+                    CvtColorInfo(3, 3, cv::COLOR_BGR2YUV),
+                    CvtColorInfo(3, 3, cv::COLOR_YUV2BGR),
+                    CvtColorInfo(3, 3, cv::COLOR_BGR2HSV),
+                    CvtColorInfo(3, 3, cv::COLOR_HSV2BGR),
+                    CvtColorInfo(3, 3, cv::COLOR_BGR2HLS),
+                    CvtColorInfo(3, 3, cv::COLOR_HLS2BGR))));
 
 #endif
diff --git a/modules/gpu/perf_cpu/perf_matop.cpp b/modules/gpu/perf_cpu/perf_matop.cpp
index 0cf225e093..7c46eee80e 100644
--- a/modules/gpu/perf_cpu/perf_matop.cpp
+++ b/modules/gpu/perf_cpu/perf_matop.cpp
@@ -2,64 +2,10 @@
 
 #ifdef HAVE_CUDA
 
-//////////////////////////////////////////////////////////////////////
-// Merge
-
-GPU_PERF_TEST(Merge, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    const int num_channels = 4;
-
-    std::vector<cv::Mat> src(num_channels);
-    for (int i = 0; i < num_channels; ++i)
-        src[i] = cv::Mat(size, type, cv::Scalar::all(i));
-
-    cv::Mat dst;
-
-    TEST_CYCLE()
-    {
-        cv::merge(src, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(MatOp, Merge, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1)));
-
-//////////////////////////////////////////////////////////////////////
-// Split
-
-GPU_PERF_TEST(Split, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
-{
-    cv::Size size = GET_PARAM(1);
-    int type = GET_PARAM(2);
-
-    const int num_channels = 4;
-
-    cv::Mat src(size, CV_MAKETYPE(type, num_channels), cv::Scalar(1, 2, 3, 4));
-
-    std::vector<cv::Mat> dst(num_channels);
-    for (int i = 0; i < num_channels; ++i)
-        dst[i] = cv::Mat(size, type);
-
-    TEST_CYCLE()
-    {
-        cv::split(src, dst);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(MatOp, Split, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1)));
-
 //////////////////////////////////////////////////////////////////////
 // SetTo
 
-GPU_PERF_TEST(SetTo, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+GPU_PERF_TEST(SetTo, cv::gpu::DeviceInfo, cv::Size, MatType)
 {
     cv::Size size = GET_PARAM(1);
     int type = GET_PARAM(2);
@@ -67,6 +13,8 @@ GPU_PERF_TEST(SetTo, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
     cv::Mat src(size, type);
     cv::Scalar val(1, 2, 3, 4);
 
+    src.setTo(val);
+
     TEST_CYCLE()
     {
         src.setTo(val);
@@ -74,26 +22,31 @@ GPU_PERF_TEST(SetTo, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
 }
 
 INSTANTIATE_TEST_CASE_P(MatOp, SetTo, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32FC1, CV_32FC3, CV_32FC4)));
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
+                    MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
+                    MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4),
+                    MatType(CV_64FC1), MatType(CV_64FC3), MatType(CV_64FC4))));
 
 //////////////////////////////////////////////////////////////////////
 // SetToMasked
 
-GPU_PERF_TEST(SetToMasked, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+GPU_PERF_TEST(SetToMasked, cv::gpu::DeviceInfo, cv::Size, MatType)
 {
     cv::Size size = GET_PARAM(1);
     int type = GET_PARAM(2);
 
     cv::Mat src(size, type);
-    cv::Mat mask(size, CV_8UC1);
+    fill(src, 0, 255);
 
-    declare.in(src, WARMUP_RNG);
+    cv::Mat mask(size, CV_8UC1);
     fill(mask, 0, 2);
 
     cv::Scalar val(1, 2, 3, 4);
 
+    src.setTo(val, mask);
+
     TEST_CYCLE()
     {
         src.setTo(val, mask);
@@ -101,26 +54,31 @@ GPU_PERF_TEST(SetToMasked, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
 }
 
 INSTANTIATE_TEST_CASE_P(MatOp, SetToMasked, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32FC1, CV_32FC3, CV_32FC4)));
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
+                    MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
+                    MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4),
+                    MatType(CV_64FC1), MatType(CV_64FC3), MatType(CV_64FC4))));
 
 //////////////////////////////////////////////////////////////////////
 // CopyToMasked
 
-GPU_PERF_TEST(CopyToMasked, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+GPU_PERF_TEST(CopyToMasked, cv::gpu::DeviceInfo, cv::Size, MatType)
 {
     cv::Size size = GET_PARAM(1);
     int type = GET_PARAM(2);
 
     cv::Mat src(size, type);
-    cv::Mat mask(size, CV_8UC1);
+    fill(src, 0, 255);
 
-    declare.in(src, WARMUP_RNG);
+    cv::Mat mask(size, CV_8UC1);
     fill(mask, 0, 2);
 
     cv::Mat dst;
 
+    src.copyTo(dst, mask);
+
     TEST_CYCLE()
     {
         src.copyTo(dst, mask);
@@ -128,35 +86,39 @@ GPU_PERF_TEST(CopyToMasked, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
 }
 
 INSTANTIATE_TEST_CASE_P(MatOp, CopyToMasked, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32FC1, CV_32FC3, CV_32FC4)));
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4),
+                    MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4),
+                    MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4),
+                    MatType(CV_64FC1), MatType(CV_64FC3), MatType(CV_64FC4))));
 
 //////////////////////////////////////////////////////////////////////
 // ConvertTo
 
-GPU_PERF_TEST(ConvertTo, cv::gpu::DeviceInfo, cv::Size, perf::MatType, perf::MatType)
+GPU_PERF_TEST(ConvertTo, cv::gpu::DeviceInfo, cv::Size, MatDepth, MatDepth)
 {
     cv::Size size = GET_PARAM(1);
-    int type1 = GET_PARAM(2);
-    int type2 = GET_PARAM(3);
+    int depth1 = GET_PARAM(2);
+    int depth2 = GET_PARAM(3);
 
-    cv::Mat src(size, type1);
-
-    declare.in(src, WARMUP_RNG);
+    cv::Mat src(size, depth1);
+    fill(src, 0, 255);
 
     cv::Mat dst;
 
+    src.convertTo(dst, depth2, 0.5, 1.0);
+
     TEST_CYCLE()
     {
-        src.convertTo(dst, type2, 0.5, 1.0);
+        src.convertTo(dst, depth2, 0.5, 1.0);
     }
 }
 
 INSTANTIATE_TEST_CASE_P(MatOp, ConvertTo, testing::Combine(
-                        ALL_DEVICES,
-                        GPU_TYPICAL_MAT_SIZES,
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1),
-                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1)));
+    ALL_DEVICES,
+    GPU_TYPICAL_MAT_SIZES,
+    testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_32F), MatDepth(CV_64F)),
+    testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_32F), MatDepth(CV_64F))));
 
 #endif
diff --git a/modules/gpu/perf_cpu/perf_objdetect.cpp b/modules/gpu/perf_cpu/perf_objdetect.cpp
index e2e5d0e06b..05bed40bff 100644
--- a/modules/gpu/perf_cpu/perf_objdetect.cpp
+++ b/modules/gpu/perf_cpu/perf_objdetect.cpp
@@ -2,15 +2,21 @@
 
 #ifdef HAVE_CUDA
 
+///////////////////////////////////////////////////////////////
+// HOG
+
 GPU_PERF_TEST_1(HOG, cv::gpu::DeviceInfo)
 {
     cv::Mat img = readImage("gpu/hog/road.png", cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(img.empty());
 
     std::vector<cv::Rect> found_locations;
 
     cv::HOGDescriptor hog;
     hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
 
+    hog.detectMultiScale(img, found_locations);
+
     TEST_CYCLE()
     {
         hog.detectMultiScale(img, found_locations);
@@ -19,27 +25,28 @@ GPU_PERF_TEST_1(HOG, cv::gpu::DeviceInfo)
 
 INSTANTIATE_TEST_CASE_P(ObjDetect, HOG, ALL_DEVICES);
 
+///////////////////////////////////////////////////////////////
+// HaarClassifier
+
 GPU_PERF_TEST_1(HaarClassifier, cv::gpu::DeviceInfo)
-{    
+{
     cv::Mat img = readImage("gpu/haarcascade/group_1_640x480_VGA.pgm", cv::IMREAD_GRAYSCALE);
-        
+    ASSERT_FALSE(img.empty());
+
     cv::CascadeClassifier cascade;
 
-    if (!cascade.load("haarcascade_frontalface_alt.xml"))
-        CV_Error(0, "Can't load cascade");
-        
-    
+    ASSERT_TRUE(cascade.load(perf::TestBase::getDataPath("gpu/perf/haarcascade_frontalface_alt.xml")));
+
     std::vector<cv::Rect> rects;
-    rects.reserve(1000);
+
+    cascade.detectMultiScale(img, rects);
 
     TEST_CYCLE()
     {
-        cascade.detectMultiScale(img, rects);        
+        cascade.detectMultiScale(img, rects);
     }
 }
 
 INSTANTIATE_TEST_CASE_P(ObjDetect, HaarClassifier, ALL_DEVICES);
 
-
-
 #endif
diff --git a/modules/gpu/perf_cpu/perf_utility.cpp b/modules/gpu/perf_cpu/perf_utility.cpp
index e5b22adb53..88d5111f57 100644
--- a/modules/gpu/perf_cpu/perf_utility.cpp
+++ b/modules/gpu/perf_cpu/perf_utility.cpp
@@ -147,11 +147,6 @@ Mat readImage(const string& fileName, int flags)
     return imread(perf::TestBase::getDataPath(fileName), flags);
 }
 
-bool supportFeature(const DeviceInfo& info, FeatureSet feature)
-{
-    return TargetArchs::builtWith(feature) && info.supports(feature);
-}
-
 const vector<DeviceInfo>& devices()
 {
     static vector<DeviceInfo> devs;
@@ -175,27 +170,3 @@ const vector<DeviceInfo>& devices()
 
     return devs;
 }
-
-vector<DeviceInfo> devices(FeatureSet feature)
-{
-    const vector<DeviceInfo>& d = devices();
-
-    vector<DeviceInfo> devs_filtered;
-
-    if (TargetArchs::builtWith(feature))
-    {
-        devs_filtered.reserve(d.size());
-
-        for (size_t i = 0, size = d.size(); i < size; ++i)
-        {
-            const DeviceInfo& info = d[i];
-
-            if (info.supports(feature))
-                devs_filtered.push_back(info);
-        }
-    }
-
-    return devs_filtered;
-}
-
-
diff --git a/modules/gpu/perf_cpu/perf_utility.hpp b/modules/gpu/perf_cpu/perf_utility.hpp
index f6336c8e20..1af0a0a333 100644
--- a/modules/gpu/perf_cpu/perf_utility.hpp
+++ b/modules/gpu/perf_cpu/perf_utility.hpp
@@ -3,17 +3,23 @@
 
 void fill(cv::Mat& m, double a, double b);
 
-enum {HORIZONTAL_AXIS = 0, VERTICAL_AXIS = 1, BOTH_AXIS = -1};
-
 using perf::MatType;
 using perf::MatDepth;
 
 CV_ENUM(BorderMode, cv::BORDER_REFLECT101, cv::BORDER_REPLICATE, cv::BORDER_CONSTANT, cv::BORDER_REFLECT, cv::BORDER_WRAP)
-CV_ENUM(FlipCode, HORIZONTAL_AXIS, VERTICAL_AXIS, BOTH_AXIS)
 CV_ENUM(Interpolation, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC)
-CV_ENUM(MatchMethod, cv::TM_SQDIFF, cv::TM_SQDIFF_NORMED, cv::TM_CCORR, cv::TM_CCORR_NORMED, cv::TM_CCOEFF, cv::TM_CCOEFF_NORMED)
 CV_ENUM(NormType, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2)
-CV_ENUM(AlphaOp, cv::gpu::ALPHA_OVER, cv::gpu::ALPHA_IN, cv::gpu::ALPHA_OUT, cv::gpu::ALPHA_ATOP, cv::gpu::ALPHA_XOR, cv::gpu::ALPHA_PLUS, cv::gpu::ALPHA_OVER_PREMUL, cv::gpu::ALPHA_IN_PREMUL, cv::gpu::ALPHA_OUT_PREMUL, cv::gpu::ALPHA_ATOP_PREMUL, cv::gpu::ALPHA_XOR_PREMUL, cv::gpu::ALPHA_PLUS_PREMUL, cv::gpu::ALPHA_PREMUL)
+
+struct CvtColorInfo
+{
+    int scn;
+    int dcn;
+    int code;
+
+    explicit CvtColorInfo(int scn_=0, int dcn_=0, int code_=0) : scn(scn_), dcn(dcn_), code(code_) {}
+};
+
+void PrintTo(const CvtColorInfo& info, std::ostream* os);
 
 #define IMPLEMENT_PARAM_CLASS(name, type) \
     class name \
@@ -29,16 +35,7 @@ CV_ENUM(AlphaOp, cv::gpu::ALPHA_OVER, cv::gpu::ALPHA_IN, cv::gpu::ALPHA_OUT, cv:
         *os << #name <<  "(" << testing::PrintToString(static_cast< type >(param)) << ")"; \
     }
 
-struct CvtColorInfo
-{
-    int scn;
-    int dcn;
-    int code;
-
-    explicit CvtColorInfo(int scn_=0, int dcn_=0, int code_=0) : scn(scn_), dcn(dcn_), code(code_) {}
-};
-
-void PrintTo(const CvtColorInfo& info, std::ostream* os);
+IMPLEMENT_PARAM_CLASS(Channels, int)
 
 namespace cv { namespace gpu
 {
@@ -71,14 +68,9 @@ namespace cv { namespace gpu
 
 cv::Mat readImage(const std::string& fileName, int flags = cv::IMREAD_COLOR);
 
-bool supportFeature(const cv::gpu::DeviceInfo& info, cv::gpu::FeatureSet feature);
-
 const std::vector<cv::gpu::DeviceInfo>& devices();
 
-std::vector<cv::gpu::DeviceInfo> devices(cv::gpu::FeatureSet feature);
-
 #define ALL_DEVICES testing::ValuesIn(devices())
-#define DEVICES(feature) testing::ValuesIn(devices(feature))
 
 #define GET_PARAM(k) std::tr1::get< k >(GetParam())
 
diff --git a/modules/gpu/perf_cpu/perf_video.cpp b/modules/gpu/perf_cpu/perf_video.cpp
index a2a011a1f8..997758686d 100644
--- a/modules/gpu/perf_cpu/perf_video.cpp
+++ b/modules/gpu/perf_cpu/perf_video.cpp
@@ -5,37 +5,46 @@
 //////////////////////////////////////////////////////
 // GoodFeaturesToTrack
 
-GPU_PERF_TEST(GoodFeaturesToTrack, cv::gpu::DeviceInfo, double)
+IMPLEMENT_PARAM_CLASS(MinDistance, double)
+
+GPU_PERF_TEST(GoodFeaturesToTrack, cv::gpu::DeviceInfo, MinDistance)
 {
     double minDistance = GET_PARAM(1);
 
     cv::Mat image = readImage("gpu/perf/aloe.jpg", cv::IMREAD_GRAYSCALE);
-
     ASSERT_FALSE(image.empty());
 
     cv::Mat corners;
 
+    cv::goodFeaturesToTrack(image, corners, 8000, 0.01, minDistance);
+
     TEST_CYCLE()
     {
         cv::goodFeaturesToTrack(image, corners, 8000, 0.01, minDistance);
     }
 }
 
-INSTANTIATE_TEST_CASE_P(Video, GoodFeaturesToTrack, testing::Combine(ALL_DEVICES, testing::Values(0.0, 3.0)));
+INSTANTIATE_TEST_CASE_P(Video, GoodFeaturesToTrack, testing::Combine(
+    ALL_DEVICES,
+    testing::Values(MinDistance(0.0), MinDistance(3.0))));
 
 //////////////////////////////////////////////////////
 // PyrLKOpticalFlowSparse
 
-GPU_PERF_TEST(PyrLKOpticalFlowSparse, cv::gpu::DeviceInfo, bool, int, int)
+IMPLEMENT_PARAM_CLASS(GraySource, bool)
+IMPLEMENT_PARAM_CLASS(Points, int)
+IMPLEMENT_PARAM_CLASS(WinSize, int)
+
+GPU_PERF_TEST(PyrLKOpticalFlowSparse, cv::gpu::DeviceInfo, GraySource, Points, WinSize)
 {
     bool useGray = GET_PARAM(1);
     int points = GET_PARAM(2);
     int win_size = GET_PARAM(3);
 
     cv::Mat frame0 = readImage("gpu/opticalflow/frame0.png", useGray ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR);
-    cv::Mat frame1 = readImage("gpu/opticalflow/frame1.png", useGray ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR);
-
     ASSERT_FALSE(frame0.empty());
+
+    cv::Mat frame1 = readImage("gpu/opticalflow/frame1.png", useGray ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR);
     ASSERT_FALSE(frame1.empty());
 
     cv::Mat gray_frame;
@@ -50,6 +59,8 @@ GPU_PERF_TEST(PyrLKOpticalFlowSparse, cv::gpu::DeviceInfo, bool, int, int)
     cv::Mat nextPts;
     cv::Mat status;
 
+    cv::calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts, status, cv::noArray(), cv::Size(win_size, win_size));
+
     TEST_CYCLE()
     {
         cv::calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts, status, cv::noArray(), cv::Size(win_size, win_size));
@@ -57,10 +68,10 @@ GPU_PERF_TEST(PyrLKOpticalFlowSparse, cv::gpu::DeviceInfo, bool, int, int)
 }
 
 INSTANTIATE_TEST_CASE_P(Video, PyrLKOpticalFlowSparse, testing::Combine(
-                        ALL_DEVICES,
-                        testing::Bool(),
-                        testing::Values(1000, 2000, 4000, 8000),
-                        testing::Values(17, 21)));
+    ALL_DEVICES,
+    testing::Values(GraySource(true), GraySource(false)),
+    testing::Values(Points(1000), Points(2000), Points(4000), Points(8000)),
+    testing::Values(WinSize(17), WinSize(21))));
 
 //////////////////////////////////////////////////////
 // FarnebackOpticalFlowTest
@@ -68,15 +79,13 @@ INSTANTIATE_TEST_CASE_P(Video, PyrLKOpticalFlowSparse, testing::Combine(
 GPU_PERF_TEST_1(FarnebackOpticalFlowTest, cv::gpu::DeviceInfo)
 {
     cv::Mat frame0 = readImage("gpu/opticalflow/frame0.png", cv::IMREAD_GRAYSCALE);
-    cv::Mat frame1 = readImage("gpu/opticalflow/frame1.png", cv::IMREAD_GRAYSCALE);
-
     ASSERT_FALSE(frame0.empty());
+
+    cv::Mat frame1 = readImage("gpu/opticalflow/frame1.png", cv::IMREAD_GRAYSCALE);
     ASSERT_FALSE(frame1.empty());
 
     cv::Mat flow;
 
-    declare.time(10);
-
     int numLevels = 5;
     double pyrScale = 0.5;
     int winSize = 13;
@@ -85,9 +94,12 @@ GPU_PERF_TEST_1(FarnebackOpticalFlowTest, cv::gpu::DeviceInfo)
     double polySigma = 1.1;
     int flags = 0;
 
+    cv::calcOpticalFlowFarneback(frame0, frame1, flow, pyrScale, numLevels, winSize, numIters, polyN, polySigma, flags);
+
+    declare.time(10);
+
     TEST_CYCLE()
     {
-        cv::calcOpticalFlowFarneback(frame0, frame1, flow, pyrScale, numLevels, winSize, numIters, polyN, polySigma, flags);
     }
 }
 
diff --git a/modules/gpu/test/test_core.cpp b/modules/gpu/test/test_core.cpp
index e379712279..04039cd251 100644
--- a/modules/gpu/test/test_core.cpp
+++ b/modules/gpu/test/test_core.cpp
@@ -1916,10 +1916,10 @@ TEST_P(LShift, Accuracy)
     cv::Scalar_<int> val = randomScalar(0.0, 8.0);
 
     cv::gpu::GpuMat dst = createMat(size, type, useRoi);
-    cv::gpu::rshift(loadMat(src, useRoi), val, dst);
+    cv::gpu::lshift(loadMat(src, useRoi), val, dst);
 
     cv::Mat dst_gold;
-    rhiftGold(src, val, dst_gold);
+    lhiftGold(src, val, dst_gold);
 
     EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
 }