From d78aa7c9376036837e9e69e5e656d0cfc374f966 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <no@email>
Date: Tue, 22 May 2012 08:29:08 +0000
Subject: [PATCH] added cpu performance test for gpu module

---
 modules/gpu/CMakeLists.txt               |   40 +
 modules/gpu/perf_cpu/perf_arithm.cpp     |  739 ++++++++++++++++
 modules/gpu/perf_cpu/perf_calib3d.cpp    |  202 +++++
 modules/gpu/perf_cpu/perf_features2d.cpp |  187 ++++
 modules/gpu/perf_cpu/perf_filters.cpp    |  144 +++
 modules/gpu/perf_cpu/perf_imgproc.cpp    | 1023 ++++++++++++++++++++++
 modules/gpu/perf_cpu/perf_main.cpp       |   20 +
 modules/gpu/perf_cpu/perf_matop.cpp      |  185 ++++
 modules/gpu/perf_cpu/perf_objdetect.cpp  |   27 +
 modules/gpu/perf_cpu/perf_precomp.cpp    |    1 +
 modules/gpu/perf_cpu/perf_precomp.hpp    |   18 +
 modules/gpu/perf_cpu/perf_utility.cpp    |  201 +++++
 modules/gpu/perf_cpu/perf_utility.hpp    |   69 ++
 modules/gpu/perf_cpu/perf_video.cpp      |  261 ++++++
 14 files changed, 3117 insertions(+)
 create mode 100644 modules/gpu/perf_cpu/perf_arithm.cpp
 create mode 100644 modules/gpu/perf_cpu/perf_calib3d.cpp
 create mode 100644 modules/gpu/perf_cpu/perf_features2d.cpp
 create mode 100644 modules/gpu/perf_cpu/perf_filters.cpp
 create mode 100644 modules/gpu/perf_cpu/perf_imgproc.cpp
 create mode 100644 modules/gpu/perf_cpu/perf_main.cpp
 create mode 100644 modules/gpu/perf_cpu/perf_matop.cpp
 create mode 100644 modules/gpu/perf_cpu/perf_objdetect.cpp
 create mode 100644 modules/gpu/perf_cpu/perf_precomp.cpp
 create mode 100644 modules/gpu/perf_cpu/perf_precomp.hpp
 create mode 100644 modules/gpu/perf_cpu/perf_utility.cpp
 create mode 100644 modules/gpu/perf_cpu/perf_utility.hpp
 create mode 100644 modules/gpu/perf_cpu/perf_video.cpp

diff --git a/modules/gpu/CMakeLists.txt b/modules/gpu/CMakeLists.txt
index 58142f31e7..140583d5b7 100644
--- a/modules/gpu/CMakeLists.txt
+++ b/modules/gpu/CMakeLists.txt
@@ -119,3 +119,43 @@ ocv_add_accuracy_tests(FILES "Include" ${test_hdrs}
                        FILES "Src" ${test_srcs}
                        ${nvidia})
 ocv_add_perf_tests()
+
+
+
+set(perf_cpu_path "${CMAKE_CURRENT_SOURCE_DIR}/perf_cpu")
+if(BUILD_PERF_TESTS AND EXISTS "${perf_cpu_path}")
+    # opencv_highgui is required for imread/imwrite
+    set(perf_deps ${the_module} opencv_ts opencv_highgui)
+    ocv_check_dependencies(${perf_deps})
+
+    if(OCV_DEPENDENCIES_FOUND)
+      set(the_target "opencv_perf_gpu_cpu")
+
+      ocv_module_include_directories(${perf_deps} "${perf_cpu_path}")
+
+      if(NOT OPENCV_PERF_${the_module}_CPU_SOURCES)
+        file(GLOB perf_srcs "${perf_cpu_path}/*.cpp")
+        file(GLOB perf_hdrs "${perf_cpu_path}/*.hpp" "${perf_cpu_path}/*.h")
+        source_group("Src" FILES ${perf_srcs})
+        source_group("Include" FILES ${perf_hdrs})
+        set(OPENCV_PERF_${the_module}_CPU_SOURCES ${perf_srcs} ${perf_hdrs})
+      endif()
+
+      add_executable(${the_target} ${OPENCV_PERF_${the_module}_CPU_SOURCES})
+      target_link_libraries(${the_target} ${OPENCV_MODULE_${the_module}_DEPS} ${perf_deps} ${OPENCV_LINKER_LIBS})
+
+      # Additional target properties
+      set_target_properties(${the_target} PROPERTIES
+        DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}"
+        RUNTIME_OUTPUT_DIRECTORY "${EXECUTABLE_OUTPUT_PATH}"
+      )
+
+      if(ENABLE_SOLUTION_FOLDERS)
+        set_target_properties(${the_target} PROPERTIES FOLDER "tests performance")
+      endif()
+
+      ocv_add_precompiled_headers(${the_target})
+    else(OCV_DEPENDENCIES_FOUND)
+      #TODO: warn about unsatisfied dependencies
+    endif(OCV_DEPENDENCIES_FOUND)
+  endif()
diff --git a/modules/gpu/perf_cpu/perf_arithm.cpp b/modules/gpu/perf_cpu/perf_arithm.cpp
new file mode 100644
index 0000000000..2ab720cb80
--- /dev/null
+++ b/modules/gpu/perf_cpu/perf_arithm.cpp
@@ -0,0 +1,739 @@
+#include "perf_precomp.hpp"
+
+#ifdef HAVE_CUDA
+
+//////////////////////////////////////////////////////////////////////
+// Transpose
+
+GPU_PERF_TEST(Transpose, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src_host(size, type);
+
+    declare.in(src_host, WARMUP_RNG);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    TEST_CYCLE()
+    {
+        cv::gpu::transpose(src, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Arithm, Transpose, testing::Combine(
+                        ALL_DEVICES, 
+                        GPU_TYPICAL_MAT_SIZES, 
+                        testing::Values(CV_8UC1, CV_32SC1, CV_64FC1)));
+
+//////////////////////////////////////////////////////////////////////
+// Flip
+
+GPU_PERF_TEST(Flip, cv::gpu::DeviceInfo, cv::Size, perf::MatType, FlipCode)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+    int flipCode = GET_PARAM(3);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src_host(size, type);
+
+    declare.in(src_host, WARMUP_RNG);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    TEST_CYCLE()
+    {
+        cv::gpu::flip(src, dst, flipCode);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Arithm, Flip, testing::Combine(
+                        ALL_DEVICES, 
+                        GPU_TYPICAL_MAT_SIZES, 
+                        testing::Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
+                        testing::Values((int) HORIZONTAL_AXIS, (int) VERTICAL_AXIS, (int) BOTH_AXIS)));
+
+//////////////////////////////////////////////////////////////////////
+// LUT
+
+GPU_PERF_TEST(LUT, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src_host(size, type);
+    cv::Mat lut(1, 256, CV_8UC1);
+
+    declare.in(src_host, lut, WARMUP_RNG);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    TEST_CYCLE()
+    {
+        cv::gpu::LUT(src, lut, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Arithm, LUT, testing::Combine(
+                        ALL_DEVICES, 
+                        GPU_TYPICAL_MAT_SIZES, 
+                        testing::Values(CV_8UC1, CV_8UC3)));
+
+//////////////////////////////////////////////////////////////////////
+// CartToPolar
+
+GPU_PERF_TEST(CartToPolar, cv::gpu::DeviceInfo, cv::Size)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat x_host(size, CV_32FC1);
+    cv::Mat y_host(size, CV_32FC1);
+
+    fill(x_host, -100.0, 100.0);
+    fill(y_host, -100.0, 100.0);
+
+    cv::gpu::GpuMat x(x_host);
+    cv::gpu::GpuMat y(y_host);
+    cv::gpu::GpuMat magnitude;
+    cv::gpu::GpuMat angle;
+
+    TEST_CYCLE()
+    {
+        cv::gpu::cartToPolar(x, y, magnitude, angle);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Arithm, CartToPolar, testing::Combine(
+                        ALL_DEVICES, 
+                        GPU_TYPICAL_MAT_SIZES));
+
+//////////////////////////////////////////////////////////////////////
+// PolarToCart
+
+GPU_PERF_TEST(PolarToCart, cv::gpu::DeviceInfo, cv::Size)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat magnitude_host(size, CV_32FC1);
+    cv::Mat angle_host(size, CV_32FC1);
+
+    fill(magnitude_host, 0.0, 100.0);
+    fill(angle_host, 0.0, 360.0);
+
+    cv::gpu::GpuMat magnitude(magnitude_host);
+    cv::gpu::GpuMat angle(angle_host);
+    cv::gpu::GpuMat x;
+    cv::gpu::GpuMat y;
+
+    TEST_CYCLE()
+    {
+        cv::gpu::polarToCart(magnitude, angle, x, y, true);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Arithm, PolarToCart, testing::Combine(
+                        ALL_DEVICES, 
+                        GPU_TYPICAL_MAT_SIZES));
+
+//////////////////////////////////////////////////////////////////////
+// AddMat
+
+GPU_PERF_TEST(AddMat, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src1_host(size, type);
+    cv::Mat src2_host(size, type);
+
+    fill(src1_host, 0.0, 100.0);
+    fill(src2_host, 0.0, 100.0);
+
+    cv::gpu::GpuMat src1(src1_host);
+    cv::gpu::GpuMat src2(src2_host);
+    cv::gpu::GpuMat dst;
+
+    TEST_CYCLE()
+    {
+        cv::gpu::add(src1, src2, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Arithm, AddMat, testing::Combine(
+                        ALL_DEVICES, 
+                        GPU_TYPICAL_MAT_SIZES, 
+                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1)));
+
+//////////////////////////////////////////////////////////////////////
+// AddScalar
+
+GPU_PERF_TEST(AddScalar, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src_host(size, type);
+
+    fill(src_host, 0.0, 100.0);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::Scalar s(1,2,3,4);
+    cv::gpu::GpuMat dst;
+
+    TEST_CYCLE()
+    {
+        cv::gpu::add(src, s, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Arithm, AddScalar, testing::Combine(
+                        ALL_DEVICES, 
+                        GPU_TYPICAL_MAT_SIZES, 
+                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1)));
+
+//////////////////////////////////////////////////////////////////////
+// Exp
+
+GPU_PERF_TEST(Exp, cv::gpu::DeviceInfo, cv::Size)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src_host(size, CV_32FC1);
+
+    fill(src_host, 0.0, 10.0);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    TEST_CYCLE()
+    {
+        cv::gpu::exp(src, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Arithm, Exp, testing::Combine(
+                        ALL_DEVICES, 
+                        GPU_TYPICAL_MAT_SIZES));
+
+//////////////////////////////////////////////////////////////////////
+// Pow
+
+GPU_PERF_TEST(Pow, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src_host(size, type);
+
+    declare.in(src_host, WARMUP_RNG);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    TEST_CYCLE()
+    {
+        cv::gpu::pow(src, 0.5, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Arithm, Pow, testing::Combine(
+                        ALL_DEVICES, 
+                        GPU_TYPICAL_MAT_SIZES, 
+                        testing::Values(CV_8UC1, CV_8UC4, CV_16UC1, CV_32FC1)));
+
+//////////////////////////////////////////////////////////////////////
+// Compare
+
+GPU_PERF_TEST(Compare, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src1_host(size, type);
+    cv::Mat src2_host(size, type);
+
+    declare.in(src1_host, src2_host, WARMUP_RNG);
+
+    cv::gpu::GpuMat src1(src1_host);
+    cv::gpu::GpuMat src2(src2_host);
+    cv::gpu::GpuMat dst;
+
+    TEST_CYCLE()
+    {
+        cv::gpu::compare(src1, src2, dst, cv::CMP_EQ);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Arithm, Compare, testing::Combine(
+                        ALL_DEVICES, 
+                        GPU_TYPICAL_MAT_SIZES, 
+                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1)));
+
+//////////////////////////////////////////////////////////////////////
+// BitwiseNot
+
+GPU_PERF_TEST(BitwiseNot, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src_host(size, type);
+
+    declare.in(src_host, WARMUP_RNG);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    TEST_CYCLE()
+    {
+        cv::gpu::bitwise_not(src, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Arithm, BitwiseNot, testing::Combine(
+                        ALL_DEVICES, 
+                        GPU_TYPICAL_MAT_SIZES, 
+                        testing::Values(CV_8UC1, CV_16UC1, CV_32SC1)));
+
+//////////////////////////////////////////////////////////////////////
+// BitwiseAnd
+
+GPU_PERF_TEST(BitwiseAnd, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src1_host(size, type);
+    cv::Mat src2_host(size, type);
+
+    declare.in(src1_host, src2_host, WARMUP_RNG);
+
+    cv::gpu::GpuMat src1(src1_host);
+    cv::gpu::GpuMat src2(src2_host);
+    cv::gpu::GpuMat dst;
+
+    TEST_CYCLE()
+    {
+        cv::gpu::bitwise_and(src1, src2, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Arithm, BitwiseAnd, testing::Combine(
+                        ALL_DEVICES, 
+                        GPU_TYPICAL_MAT_SIZES, 
+                        testing::Values(CV_8UC1, CV_16UC1, CV_32SC1)));
+
+GPU_PERF_TEST(BitwiseScalarAnd, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src_host(size, type);
+
+    declare.in(src_host, WARMUP_RNG);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+    cv::Scalar sc = cv::Scalar(123, 123, 123, 123);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::bitwise_and(src, sc, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Arithm, BitwiseScalarAnd, testing::Combine(
+                        ALL_DEVICES, 
+                        GPU_TYPICAL_MAT_SIZES, 
+                        testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32SC1, CV_32SC3, CV_32SC4)));
+
+//////////////////////////////////////////////////////////////////////
+// Min
+
+GPU_PERF_TEST(Min, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src1_host(size, type);
+    cv::Mat src2_host(size, type);
+
+    declare.in(src1_host, src2_host, WARMUP_RNG);
+
+    cv::gpu::GpuMat src1(src1_host);
+    cv::gpu::GpuMat src2(src2_host);
+    cv::gpu::GpuMat dst(size, type);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::min(src1, src2, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Arithm, Min, testing::Combine(
+                        ALL_DEVICES, 
+                        GPU_TYPICAL_MAT_SIZES, 
+                        testing::Values(CV_8UC1, CV_16UC1, CV_32SC1)));
+
+//////////////////////////////////////////////////////////////////////
+// MeanStdDev
+
+GPU_PERF_TEST(MeanStdDev, cv::gpu::DeviceInfo, cv::Size)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src_host(size, CV_8UC1);
+
+    declare.in(src_host, WARMUP_RNG);
+
+    cv::gpu::GpuMat src(src_host); 
+    cv::Scalar mean;
+    cv::Scalar stddev;
+    cv::gpu::GpuMat buf;
+
+    TEST_CYCLE()
+    {
+        cv::gpu::meanStdDev(src, mean, stddev, buf);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Arithm, MeanStdDev, testing::Combine(
+                        ALL_DEVICES, 
+                        GPU_TYPICAL_MAT_SIZES));
+
+//////////////////////////////////////////////////////////////////////
+// Norm
+
+GPU_PERF_TEST(Norm, cv::gpu::DeviceInfo, cv::Size, perf::MatType, NormType)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+    int normType = GET_PARAM(3);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src_host(size, type);
+
+    declare.in(src_host, WARMUP_RNG);
+
+    cv::gpu::GpuMat src(src_host);
+    double dst;
+    cv::gpu::GpuMat buf;
+
+    TEST_CYCLE()
+    {
+        dst = cv::gpu::norm(src, normType, buf);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Arithm, Norm, testing::Combine(
+                        ALL_DEVICES, 
+                        GPU_TYPICAL_MAT_SIZES, 
+                        testing::Values(CV_8UC1, CV_16UC1, CV_32SC1),
+                        testing::Values((int) cv::NORM_INF, (int) cv::NORM_L1, (int) cv::NORM_L2)));
+
+//////////////////////////////////////////////////////////////////////
+// NormDiff
+
+GPU_PERF_TEST(NormDiff, cv::gpu::DeviceInfo, cv::Size, NormType)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+    int normType = GET_PARAM(2);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src1_host(size, CV_8UC1);
+    cv::Mat src2_host(size, CV_8UC1);
+
+    declare.in(src1_host, src2_host, WARMUP_RNG);
+
+    cv::gpu::GpuMat src1(src1_host);
+    cv::gpu::GpuMat src2(src2_host);
+    double dst;
+
+    TEST_CYCLE()
+    {
+        dst = cv::gpu::norm(src1, src2, normType);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Arithm, NormDiff, testing::Combine(
+                        ALL_DEVICES, 
+                        GPU_TYPICAL_MAT_SIZES, 
+                        testing::Values((int) cv::NORM_INF, (int) cv::NORM_L1, (int) cv::NORM_L2)));
+
+//////////////////////////////////////////////////////////////////////
+// Sum
+
+GPU_PERF_TEST(Sum, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src_host(size, type);
+
+    declare.in(src_host, WARMUP_RNG);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::Scalar dst;
+    cv::gpu::GpuMat buf;
+
+    TEST_CYCLE()
+    {
+        dst = cv::gpu::sum(src, buf);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Arithm, Sum, testing::Combine(
+                        ALL_DEVICES, 
+                        GPU_TYPICAL_MAT_SIZES, 
+                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1)));
+
+//////////////////////////////////////////////////////////////////////
+// MinMax
+
+GPU_PERF_TEST(MinMax, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src_host(size, type);
+
+    declare.in(src_host, WARMUP_RNG);
+
+    cv::gpu::GpuMat src(src_host);
+    double minVal, maxVal;
+    cv::gpu::GpuMat buf;
+
+    TEST_CYCLE()
+    {
+        cv::gpu::minMax(src, &minVal, &maxVal, cv::gpu::GpuMat(), buf);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Arithm, MinMax, testing::Combine(
+                        ALL_DEVICES, 
+                        GPU_TYPICAL_MAT_SIZES, 
+                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1)));
+
+//////////////////////////////////////////////////////////////////////
+// MinMaxLoc
+
+GPU_PERF_TEST(MinMaxLoc, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src_host(size, type);
+
+    declare.in(src_host, WARMUP_RNG);
+
+    cv::gpu::GpuMat src(src_host);
+    double minVal, maxVal;
+    cv::Point minLoc, maxLoc;
+    cv::gpu::GpuMat valbuf, locbuf;
+
+    TEST_CYCLE()
+    {
+        cv::gpu::minMaxLoc(src, &minVal, &maxVal, &minLoc, &maxLoc, cv::gpu::GpuMat(), valbuf, locbuf);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Arithm, MinMaxLoc, testing::Combine(
+                        ALL_DEVICES, 
+                        GPU_TYPICAL_MAT_SIZES, 
+                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1)));
+
+//////////////////////////////////////////////////////////////////////
+// CountNonZero
+
+GPU_PERF_TEST(CountNonZero, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src_host(size, type);
+
+    fill(src_host, 0.0, 1.0);
+
+    cv::gpu::GpuMat src(src_host);
+    int dst;
+    cv::gpu::GpuMat buf;
+
+    TEST_CYCLE()
+    {
+        dst = cv::gpu::countNonZero(src, buf);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Arithm, CountNonZero, testing::Combine(
+                        ALL_DEVICES, 
+                        GPU_TYPICAL_MAT_SIZES, 
+                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1)));
+
+//////////////////////////////////////////////////////////////////////
+// AddWeighted
+
+GPU_PERF_TEST(AddWeighted, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src1_host(size, type);
+    cv::Mat src2_host(size, type);
+
+    fill(src1_host, 0.0, 100.0);
+    fill(src2_host, 0.0, 100.0);
+
+    cv::gpu::GpuMat src1(src1_host);
+    cv::gpu::GpuMat src2(src2_host);
+    cv::gpu::GpuMat dst;
+
+    TEST_CYCLE()
+    {
+        cv::gpu::addWeighted(src1, 0.5, src2, 0.5, 0.0, dst);
+    }
+
+    cv::Mat dst_host(dst);
+}
+
+INSTANTIATE_TEST_CASE_P(Arithm, AddWeighted, testing::Combine(
+                        ALL_DEVICES, 
+                        GPU_TYPICAL_MAT_SIZES, 
+                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1)));
+
+//////////////////////////////////////////////////////////////////////
+// Reduce
+
+GPU_PERF_TEST(Reduce, cv::gpu::DeviceInfo, cv::Size, perf::MatType, FlipCode)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+    int dim = GET_PARAM(3);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src_host(size, type);
+
+    fill(src_host, 0.0, 10.0);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    TEST_CYCLE()
+    {
+        cv::gpu::reduce(src, dst, dim, CV_REDUCE_MIN);
+    }
+
+    cv::Mat dst_host(dst);
+}
+
+INSTANTIATE_TEST_CASE_P(Arithm, Reduce, testing::Combine(
+                        ALL_DEVICES, 
+                        GPU_TYPICAL_MAT_SIZES, 
+                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1), 
+                        testing::Values((int) HORIZONTAL_AXIS, (int) VERTICAL_AXIS)));
+
+//////////////////////////////////////////////////////////////////////
+// GEMM
+
+GPU_PERF_TEST(GEMM, cv::gpu::DeviceInfo, cv::Size)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src1_host(size, CV_32FC1);
+    cv::Mat src2_host(size, CV_32FC1);
+    cv::Mat src3_host(size, CV_32FC1);
+
+    fill(src1_host, 0.0, 10.0);
+    fill(src2_host, 0.0, 10.0);
+    fill(src3_host, 0.0, 10.0);
+
+    cv::gpu::GpuMat src1(src1_host);
+    cv::gpu::GpuMat src2(src2_host);
+    cv::gpu::GpuMat src3(src3_host);
+    cv::gpu::GpuMat dst;
+
+    declare.time(5.0);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::gemm(src1, src2, 1.0, src3, 1.0, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Arithm, GEMM, testing::Combine(
+                        ALL_DEVICES, 
+                        testing::Values(cv::Size(512, 512), cv::Size(1024, 1024), cv::Size(2048, 2048))));
+
+#endif
diff --git a/modules/gpu/perf_cpu/perf_calib3d.cpp b/modules/gpu/perf_cpu/perf_calib3d.cpp
new file mode 100644
index 0000000000..0175527b37
--- /dev/null
+++ b/modules/gpu/perf_cpu/perf_calib3d.cpp
@@ -0,0 +1,202 @@
+#include "perf_precomp.hpp"
+
+#ifdef HAVE_CUDA
+
+//////////////////////////////////////////////////////////////////////
+// TransformPoints
+
+GPU_PERF_TEST_1(TransformPoints, cv::gpu::DeviceInfo)
+{
+    cv::gpu::DeviceInfo devInfo = GetParam();
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src_host(1, 10000, CV_32FC3);
+
+    declare.in(src_host, WARMUP_RNG);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    TEST_CYCLE()
+    {
+        cv::gpu::transformPoints(src, cv::Mat::ones(1, 3, CV_32FC1), cv::Mat::ones(1, 3, CV_32FC1), dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Calib3D, TransformPoints, ALL_DEVICES);
+
+//////////////////////////////////////////////////////////////////////
+// ProjectPoints
+
+GPU_PERF_TEST_1(ProjectPoints, cv::gpu::DeviceInfo)
+{
+    cv::gpu::DeviceInfo devInfo = GetParam();
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src_host(1, 10000, CV_32FC3);
+
+    declare.in(src_host, WARMUP_RNG);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    TEST_CYCLE()
+    {
+        cv::gpu::projectPoints(src, cv::Mat::ones(1, 3, CV_32FC1), cv::Mat::ones(1, 3, CV_32FC1), cv::Mat::ones(3, 3, CV_32FC1), cv::Mat(), dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Calib3D, ProjectPoints, ALL_DEVICES);
+
+//////////////////////////////////////////////////////////////////////
+// SolvePnPRansac
+
+GPU_PERF_TEST_1(SolvePnPRansac, cv::gpu::DeviceInfo)
+{
+    cv::gpu::DeviceInfo devInfo = GetParam();
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat object(1, 10000, CV_32FC3);
+    cv::Mat image(1, 10000, CV_32FC2);
+
+    declare.in(object, image, WARMUP_RNG);
+
+    cv::Mat rvec, tvec;
+
+    declare.time(3.0);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::solvePnPRansac(object, image, cv::Mat::ones(3, 3, CV_32FC1), cv::Mat(1, 8, CV_32F, cv::Scalar::all(0)), rvec, tvec);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Calib3D, SolvePnPRansac, ALL_DEVICES);
+
+//////////////////////////////////////////////////////////////////////
+// StereoBM
+
+GPU_PERF_TEST_1(StereoBM, cv::gpu::DeviceInfo)
+{
+    cv::gpu::DeviceInfo devInfo = GetParam();
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat img_l_host = readImage("gpu/perf/aloe.jpg", cv::IMREAD_GRAYSCALE);
+    cv::Mat img_r_host = readImage("gpu/perf/aloeR.jpg", cv::IMREAD_GRAYSCALE);
+
+    ASSERT_FALSE(img_l_host.empty());
+    ASSERT_FALSE(img_r_host.empty());
+
+    cv::gpu::GpuMat img_l(img_l_host);
+    cv::gpu::GpuMat img_r(img_r_host);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::StereoBM_GPU bm(0, 256);
+
+    declare.time(5.0);
+
+    TEST_CYCLE()
+    {
+        bm(img_l, img_r, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Calib3D, StereoBM, ALL_DEVICES);
+
+//////////////////////////////////////////////////////////////////////
+// StereoBeliefPropagation
+
+GPU_PERF_TEST_1(StereoBeliefPropagation, cv::gpu::DeviceInfo)
+{
+    cv::gpu::DeviceInfo devInfo = GetParam();
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat img_l_host = readImage("gpu/stereobp/aloe-L.png");
+    cv::Mat img_r_host = readImage("gpu/stereobp/aloe-R.png");
+
+    ASSERT_FALSE(img_l_host.empty());
+    ASSERT_FALSE(img_r_host.empty());
+
+    cv::gpu::GpuMat img_l(img_l_host);
+    cv::gpu::GpuMat img_r(img_r_host);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::StereoBeliefPropagation bp(64);
+
+    declare.time(10.0);
+
+    TEST_CYCLE()
+    {
+        bp(img_l, img_r, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Calib3D, StereoBeliefPropagation, ALL_DEVICES);
+
+//////////////////////////////////////////////////////////////////////
+// StereoConstantSpaceBP
+
+GPU_PERF_TEST_1(StereoConstantSpaceBP, cv::gpu::DeviceInfo)
+{
+    cv::gpu::DeviceInfo devInfo = GetParam();
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat img_l_host = readImage("gpu/stereobm/aloe-L.png", cv::IMREAD_GRAYSCALE);
+    cv::Mat img_r_host = readImage("gpu/stereobm/aloe-R.png", cv::IMREAD_GRAYSCALE);
+
+    ASSERT_FALSE(img_l_host.empty());
+    ASSERT_FALSE(img_r_host.empty());
+
+    cv::gpu::GpuMat img_l(img_l_host);
+    cv::gpu::GpuMat img_r(img_r_host);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::StereoConstantSpaceBP bp(128);
+
+    declare.time(10.0);
+
+    TEST_CYCLE()
+    {
+        bp(img_l, img_r, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Calib3D, StereoConstantSpaceBP, ALL_DEVICES);
+
+//////////////////////////////////////////////////////////////////////
+// DisparityBilateralFilter
+
+GPU_PERF_TEST_1(DisparityBilateralFilter, cv::gpu::DeviceInfo)
+{
+    cv::gpu::DeviceInfo devInfo = GetParam();
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat img_host = readImage("gpu/stereobm/aloe-L.png", cv::IMREAD_GRAYSCALE);
+    cv::Mat disp_host = readImage("gpu/stereobm/aloe-disp.png", cv::IMREAD_GRAYSCALE);
+
+    ASSERT_FALSE(img_host.empty());
+    ASSERT_FALSE(disp_host.empty());
+
+    cv::gpu::GpuMat img(img_host);
+    cv::gpu::GpuMat disp(disp_host);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::DisparityBilateralFilter f(128);
+
+    TEST_CYCLE()
+    {
+        f(disp, img, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Calib3D, DisparityBilateralFilter, ALL_DEVICES);
+
+#endif
+
diff --git a/modules/gpu/perf_cpu/perf_features2d.cpp b/modules/gpu/perf_cpu/perf_features2d.cpp
new file mode 100644
index 0000000000..18e1497871
--- /dev/null
+++ b/modules/gpu/perf_cpu/perf_features2d.cpp
@@ -0,0 +1,187 @@
+#include "perf_precomp.hpp"
+
+#ifdef HAVE_CUDA
+
+//////////////////////////////////////////////////////////////////////
+// BruteForceMatcher_match
+
+GPU_PERF_TEST(BruteForceMatcher_match, cv::gpu::DeviceInfo, int)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    int desc_size = GET_PARAM(1);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat query_host(3000, desc_size, CV_32FC1);
+    cv::Mat train_host(3000, desc_size, CV_32FC1);
+
+    declare.in(query_host, train_host, WARMUP_RNG);
+
+    cv::gpu::GpuMat query(query_host);
+    cv::gpu::GpuMat train(train_host);
+    cv::gpu::GpuMat trainIdx, distance;
+
+    cv::gpu::BFMatcher_GPU matcher(cv::NORM_L2);
+
+    declare.time(3.0);
+
+    TEST_CYCLE()
+    {
+        matcher.matchSingle(query, train, trainIdx, distance);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Features2D, BruteForceMatcher_match, testing::Combine(
+                        ALL_DEVICES, 
+                        testing::Values(64, 128, 256)));
+
+//////////////////////////////////////////////////////////////////////
+// BruteForceMatcher_knnMatch
+
+GPU_PERF_TEST(BruteForceMatcher_knnMatch, cv::gpu::DeviceInfo, int, int)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    int desc_size = GET_PARAM(1);
+    int k = GET_PARAM(2);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat query_host(3000, desc_size, CV_32FC1);
+    cv::Mat train_host(3000, desc_size, CV_32FC1);
+
+    declare.in(query_host, train_host, WARMUP_RNG);
+
+    cv::gpu::GpuMat query(query_host);
+    cv::gpu::GpuMat train(train_host);
+    cv::gpu::GpuMat trainIdx, distance, allDist;
+
+    cv::gpu::BFMatcher_GPU matcher(cv::NORM_L2);
+
+    declare.time(3.0);
+
+    TEST_CYCLE()
+    {
+        matcher.knnMatchSingle(query, train, trainIdx, distance, allDist, k);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Features2D, BruteForceMatcher_knnMatch, testing::Combine(
+                        ALL_DEVICES, 
+                        testing::Values(64, 128, 256),
+                        testing::Values(2, 3)));
+
+//////////////////////////////////////////////////////////////////////
+// BruteForceMatcher_radiusMatch
+
+GPU_PERF_TEST(BruteForceMatcher_radiusMatch, cv::gpu::DeviceInfo, int)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    int desc_size = GET_PARAM(1);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat query_host(3000, desc_size, CV_32FC1);
+    cv::Mat train_host(3000, desc_size, CV_32FC1);
+
+    fill(query_host, 0, 1);
+    fill(train_host, 0, 1);
+
+    cv::gpu::GpuMat query(query_host);
+    cv::gpu::GpuMat train(train_host);
+    cv::gpu::GpuMat trainIdx, nMatches, distance;
+
+    cv::gpu::BFMatcher_GPU matcher(cv::NORM_L2);
+
+    declare.time(3.0);
+
+    TEST_CYCLE()
+    {
+        matcher.radiusMatchSingle(query, train, trainIdx, distance, nMatches, 2.0);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Features2D, BruteForceMatcher_radiusMatch, testing::Combine(
+                        ALL_DEVICES, 
+                        testing::Values(64, 128, 256)));
+
+//////////////////////////////////////////////////////////////////////
+// SURF
+
+GPU_PERF_TEST_1(SURF, cv::gpu::DeviceInfo)
+{
+    cv::gpu::DeviceInfo devInfo = GetParam();
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat img_host = readImage("gpu/perf/aloe.jpg", cv::IMREAD_GRAYSCALE);
+
+    ASSERT_FALSE(img_host.empty());
+
+    cv::gpu::GpuMat img(img_host);
+    cv::gpu::GpuMat keypoints, descriptors;
+
+    cv::gpu::SURF_GPU surf;
+
+    declare.time(2.0);
+
+    TEST_CYCLE()
+    {
+        surf(img, cv::gpu::GpuMat(), keypoints, descriptors);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Features2D, SURF, DEVICES(cv::gpu::GLOBAL_ATOMICS));
+
+//////////////////////////////////////////////////////////////////////
+// FAST
+
+GPU_PERF_TEST_1(FAST, cv::gpu::DeviceInfo)
+{
+    cv::gpu::DeviceInfo devInfo = GetParam();
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat img_host = readImage("gpu/perf/aloe.jpg", cv::IMREAD_GRAYSCALE);
+
+    ASSERT_FALSE(img_host.empty());
+
+    cv::gpu::GpuMat img(img_host);
+    cv::gpu::GpuMat keypoints, descriptors;
+
+    cv::gpu::FAST_GPU fastGPU(20);
+
+    TEST_CYCLE()
+    {
+        fastGPU(img, cv::gpu::GpuMat(), keypoints);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Features2D, FAST, DEVICES(cv::gpu::GLOBAL_ATOMICS));
+
+//////////////////////////////////////////////////////////////////////
+// ORB
+
+GPU_PERF_TEST_1(ORB, cv::gpu::DeviceInfo)
+{
+    cv::gpu::DeviceInfo devInfo = GetParam();
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat img_host = readImage("gpu/perf/aloe.jpg", cv::IMREAD_GRAYSCALE);
+
+    ASSERT_FALSE(img_host.empty());
+
+    cv::gpu::GpuMat img(img_host);
+    cv::gpu::GpuMat keypoints, descriptors;
+
+    cv::gpu::ORB_GPU orbGPU(4000);
+
+    TEST_CYCLE()
+    {
+        orbGPU(img, cv::gpu::GpuMat(), keypoints, descriptors);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Features2D, ORB, DEVICES(cv::gpu::GLOBAL_ATOMICS));
+
+#endif
diff --git a/modules/gpu/perf_cpu/perf_filters.cpp b/modules/gpu/perf_cpu/perf_filters.cpp
new file mode 100644
index 0000000000..9322557283
--- /dev/null
+++ b/modules/gpu/perf_cpu/perf_filters.cpp
@@ -0,0 +1,144 @@
+#include "perf_precomp.hpp"
+
+#ifdef HAVE_CUDA
+
+//////////////////////////////////////////////////////////////////////
+// BoxFilter
+
+GPU_PERF_TEST(BoxFilter, cv::gpu::DeviceInfo, cv::Size, perf::MatType, int)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+    int ksize = GET_PARAM(3);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src_host(size, type);
+
+    declare.in(src_host, WARMUP_RNG);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    cv::Ptr<cv::gpu::FilterEngine_GPU> filter = cv::gpu::createBoxFilter_GPU(type, type, cv::Size(ksize, ksize));
+
+    TEST_CYCLE()
+    {
+        filter->apply(src, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Filter, BoxFilter, testing::Combine(
+                        ALL_DEVICES, 
+                        GPU_TYPICAL_MAT_SIZES, 
+                        testing::Values(CV_8UC1, CV_8UC4),
+                        testing::Values(3, 5)));
+
+//////////////////////////////////////////////////////////////////////
+// MorphologyFilter
+
+GPU_PERF_TEST(MorphologyFilter, cv::gpu::DeviceInfo, cv::Size, perf::MatType, MorphOp, int)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+    int op = GET_PARAM(3);
+    int ksize = GET_PARAM(4);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src_host(size, type);
+
+    declare.in(src_host, WARMUP_RNG);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    cv::Ptr<cv::gpu::FilterEngine_GPU> filter = cv::gpu::createMorphologyFilter_GPU(op, type, cv::Mat::ones(ksize, ksize, CV_8U));
+
+    TEST_CYCLE()
+    {
+        filter->apply(src, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Filter, MorphologyFilter, testing::Combine(
+                        ALL_DEVICES, 
+                        GPU_TYPICAL_MAT_SIZES, 
+                        testing::Values(CV_8UC1, CV_8UC4),
+                        testing::Values((int) cv::MORPH_ERODE, (int) cv::MORPH_DILATE),
+                        testing::Values(3, 5)));
+
+//////////////////////////////////////////////////////////////////////
+// LinearFilter
+
+GPU_PERF_TEST(LinearFilter, cv::gpu::DeviceInfo, cv::Size, perf::MatType, int)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+    int ksize = GET_PARAM(3);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src_host(size, type);
+
+    declare.in(src_host, WARMUP_RNG);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    cv::Ptr<cv::gpu::FilterEngine_GPU> filter = cv::gpu::createLinearFilter_GPU(type, type, cv::Mat::ones(ksize, ksize, CV_8U));
+
+    declare.time(1.0);
+
+    TEST_CYCLE()
+    {
+        filter->apply(src, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Filter, LinearFilter, testing::Combine(
+                        ALL_DEVICES, 
+                        GPU_TYPICAL_MAT_SIZES, 
+                        testing::Values(CV_8UC1, CV_8UC4, CV_32FC1),
+                        testing::Values(3, 5, 7, 9)));
+
+//////////////////////////////////////////////////////////////////////
+// SeparableLinearFilter
+
+GPU_PERF_TEST(SeparableLinearFilter, cv::gpu::DeviceInfo, cv::Size, perf::MatType, int)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+    int ksize = GET_PARAM(3);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src_host(size, type);
+
+    declare.in(src_host, WARMUP_RNG);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    cv::Mat kernel = cv::getGaussianKernel(ksize, 0.5, CV_32F);
+    cv::Ptr<cv::gpu::FilterEngine_GPU> filter = cv::gpu::createSeparableLinearFilter_GPU(type, type, kernel, kernel);
+
+    declare.time(1.0);
+
+    TEST_CYCLE()
+    {
+        filter->apply(src, dst, cv::Rect(0, 0, src.cols, src.rows));
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Filter, SeparableLinearFilter, testing::Combine(
+                        ALL_DEVICES, 
+                        GPU_TYPICAL_MAT_SIZES, 
+                        testing::Values(CV_8UC1, CV_8UC4, CV_32FC1),
+                        testing::Values(3, 5, 7, 9, 11, 13, 15)));
+
+#endif
diff --git a/modules/gpu/perf_cpu/perf_imgproc.cpp b/modules/gpu/perf_cpu/perf_imgproc.cpp
new file mode 100644
index 0000000000..5472acf85f
--- /dev/null
+++ b/modules/gpu/perf_cpu/perf_imgproc.cpp
@@ -0,0 +1,1023 @@
+#include "perf_precomp.hpp"
+
+#ifdef HAVE_CUDA
+
+//////////////////////////////////////////////////////////////////////
+// Remap
+
+GPU_PERF_TEST(Remap, cv::gpu::DeviceInfo, cv::Size, perf::MatType, Interpolation, BorderMode)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+    int interpolation = GET_PARAM(3);
+    int borderMode = GET_PARAM(4);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src_host(size, type);
+    cv::Mat xmap_host(size, CV_32FC1);
+    cv::Mat ymap_host(size, CV_32FC1);
+
+    declare.in(src_host, xmap_host, ymap_host, WARMUP_RNG);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat xmap(xmap_host);
+    cv::gpu::GpuMat ymap(ymap_host);
+    cv::gpu::GpuMat dst;
+
+    declare.time(3.0);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::remap(src, dst, xmap, ymap, interpolation, borderMode);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, Remap, testing::Combine(
+                        ALL_DEVICES,
+                        GPU_TYPICAL_MAT_SIZES,
+                        testing::Values(CV_8UC1, CV_8UC4, CV_16UC1, CV_32FC1),
+                        testing::Values((int) cv::INTER_NEAREST, (int) cv::INTER_LINEAR, (int) cv::INTER_CUBIC),
+                        testing::Values((int) cv::BORDER_REFLECT101, (int) cv::BORDER_REPLICATE, (int) cv::BORDER_CONSTANT)));
+
+//////////////////////////////////////////////////////////////////////
+// MeanShiftFiltering
+
+GPU_PERF_TEST_1(MeanShiftFiltering, cv::gpu::DeviceInfo)
+{
+    cv::gpu::DeviceInfo devInfo = GetParam();
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat img = readImage("gpu/meanshift/cones.png");
+    ASSERT_FALSE(img.empty());
+
+    cv::Mat rgba;
+    cv::cvtColor(img, rgba, cv::COLOR_BGR2BGRA);
+
+    cv::gpu::GpuMat src(rgba);
+    cv::gpu::GpuMat dst;
+
+    declare.time(5.0);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::meanShiftFiltering(src, dst, 50, 50);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, MeanShiftFiltering, ALL_DEVICES);
+
+//////////////////////////////////////////////////////////////////////
+// MeanShiftProc
+
+GPU_PERF_TEST_1(MeanShiftProc, cv::gpu::DeviceInfo)
+{
+    cv::gpu::DeviceInfo devInfo = GetParam();
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat img = readImage("gpu/meanshift/cones.png");
+    ASSERT_FALSE(img.empty());
+
+    cv::Mat rgba;
+    cv::cvtColor(img, rgba, cv::COLOR_BGR2BGRA);
+
+    cv::gpu::GpuMat src(rgba);
+    cv::gpu::GpuMat dstr;
+    cv::gpu::GpuMat dstsp;
+
+    declare.time(5.0);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::meanShiftProc(src, dstr, dstsp, 50, 50);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, MeanShiftProc, ALL_DEVICES);
+
+//////////////////////////////////////////////////////////////////////
+// MeanShiftSegmentation
+
+GPU_PERF_TEST_1(MeanShiftSegmentation, cv::gpu::DeviceInfo)
+{
+    cv::gpu::DeviceInfo devInfo = GetParam();
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat img = readImage("gpu/meanshift/cones.png");
+    ASSERT_FALSE(img.empty());
+
+    cv::Mat rgba;
+    cv::cvtColor(img, rgba, cv::COLOR_BGR2BGRA);
+
+    cv::gpu::GpuMat src(rgba);
+    cv::Mat dst;
+
+    declare.time(5.0);
+
+    TEST_CYCLE()
+    {
+        meanShiftSegmentation(src, dst, 10, 10, 20);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, MeanShiftSegmentation, ALL_DEVICES);
+
+//////////////////////////////////////////////////////////////////////
+// DrawColorDisp
+
+GPU_PERF_TEST(DrawColorDisp, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src_host(size, type);
+
+    fill(src_host, 0, 255);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    TEST_CYCLE()
+    {
+        cv::gpu::drawColorDisp(src, dst, 255);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, DrawColorDisp, testing::Combine(
+                        ALL_DEVICES,
+                        GPU_TYPICAL_MAT_SIZES,
+                        testing::Values(CV_8UC1, CV_16SC1)));
+
+//////////////////////////////////////////////////////////////////////
+// ReprojectImageTo3D
+
+GPU_PERF_TEST(ReprojectImageTo3D, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src_host(size, type);
+
+    declare.in(src_host, WARMUP_RNG);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    TEST_CYCLE()
+    {
+        cv::gpu::reprojectImageTo3D(src, dst, cv::Mat::ones(4, 4, CV_32FC1));
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, ReprojectImageTo3D, testing::Combine(
+                        ALL_DEVICES,
+                        GPU_TYPICAL_MAT_SIZES,
+                        testing::Values(CV_8UC1, CV_16SC1)));
+
+//////////////////////////////////////////////////////////////////////
+// CvtColor
+
+GPU_PERF_TEST(CvtColor, cv::gpu::DeviceInfo, cv::Size, perf::MatType, CvtColorInfo)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+    CvtColorInfo info = GET_PARAM(3);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src_host(size, CV_MAKETYPE(type, info.scn));
+
+    declare.in(src_host, WARMUP_RNG);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    TEST_CYCLE()
+    {
+        cv::gpu::cvtColor(src, dst, info.code, info.dcn);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, CvtColor, testing::Combine(
+                        ALL_DEVICES,
+                        GPU_TYPICAL_MAT_SIZES,
+                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1),
+                        testing::Values(
+                            CvtColorInfo(4, 4, cv::COLOR_RGBA2BGRA), CvtColorInfo(4, 1, cv::COLOR_BGRA2GRAY), CvtColorInfo(1, 4, cv::COLOR_GRAY2BGRA),
+                            CvtColorInfo(4, 4, cv::COLOR_BGR2XYZ), CvtColorInfo(4, 4, cv::COLOR_BGR2YCrCb), CvtColorInfo(4, 4, cv::COLOR_YCrCb2BGR),
+                            CvtColorInfo(4, 4, cv::COLOR_BGR2HSV), CvtColorInfo(4, 4, cv::COLOR_HSV2BGR))));
+
+//////////////////////////////////////////////////////////////////////
+// SwapChannels
+
+GPU_PERF_TEST(SwapChannels, cv::gpu::DeviceInfo, cv::Size)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src_host(size, CV_8UC4);
+
+    declare.in(src_host, WARMUP_RNG);
+
+    cv::gpu::GpuMat src(src_host);
+
+    const int dstOrder[] = {2, 1, 0, 3};
+
+    TEST_CYCLE()
+    {
+        cv::gpu::swapChannels(src, dstOrder);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, SwapChannels, testing::Combine(ALL_DEVICES, GPU_TYPICAL_MAT_SIZES));
+
+//////////////////////////////////////////////////////////////////////
+// Threshold
+
+GPU_PERF_TEST(Threshold, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src_host(size, type);
+
+    declare.in(src_host, WARMUP_RNG);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst(size, type);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::threshold(src, dst, 100.0, 255.0, cv::THRESH_BINARY);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, Threshold, testing::Combine(
+                        ALL_DEVICES,
+                        GPU_TYPICAL_MAT_SIZES,
+                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1)));
+
+//////////////////////////////////////////////////////////////////////
+// Resize
+
+GPU_PERF_TEST(Resize, cv::gpu::DeviceInfo, cv::Size, perf::MatType, Interpolation, double)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+    int interpolation = GET_PARAM(3);
+    double f = GET_PARAM(4);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src_host(size, type);
+
+    declare.in(src_host, WARMUP_RNG);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    declare.time(1.0);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::resize(src, dst, cv::Size(), f, f, interpolation);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, Resize, testing::Combine(
+                        ALL_DEVICES,
+                        testing::Values(perf::szSXGA, perf::sz1080p),
+                        testing::Values(CV_8UC1, CV_8UC4, CV_16UC1, CV_32FC1),
+                        testing::Values((int) cv::INTER_NEAREST, (int) cv::INTER_LINEAR, (int) cv::INTER_CUBIC),
+                        testing::Values(0.5, 2.0)));
+
+//////////////////////////////////////////////////////////////////////
+// WarpAffine
+
+GPU_PERF_TEST(WarpAffine, cv::gpu::DeviceInfo, cv::Size, perf::MatType, Interpolation)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+    int interpolation = GET_PARAM(3);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src_host(size, type);
+
+    declare.in(src_host, WARMUP_RNG);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    const double aplha = CV_PI / 4;
+    double mat[2][3] = { {std::cos(aplha), -std::sin(aplha), src.cols / 2},
+                         {std::sin(aplha),  std::cos(aplha), 0}};
+    cv::Mat M(2, 3, CV_64F, (void*) mat);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::warpAffine(src, dst, M, size, interpolation, cv::BORDER_CONSTANT, cv::Scalar());
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, WarpAffine, testing::Combine(
+                        ALL_DEVICES,
+                        GPU_TYPICAL_MAT_SIZES,
+                        testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32FC1, CV_32FC3, CV_32FC4),
+                        testing::Values((int) cv::INTER_NEAREST, (int) cv::INTER_LINEAR, (int) cv::INTER_CUBIC)));
+
+//////////////////////////////////////////////////////////////////////
+// WarpPerspective
+
+GPU_PERF_TEST(WarpPerspective, cv::gpu::DeviceInfo, cv::Size, perf::MatType, Interpolation)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+    int interpolation = GET_PARAM(3);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src_host(size, type);
+
+    declare.in(src_host, WARMUP_RNG);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    const double aplha = CV_PI / 4;
+    double mat[3][3] = { {std::cos(aplha), -std::sin(aplha), src.cols / 2},
+                         {std::sin(aplha),  std::cos(aplha), 0},
+                         {0.0,              0.0,             1.0}};
+    cv::Mat M(3, 3, CV_64F, (void*) mat);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::warpPerspective(src, dst, M, size, interpolation, cv::BORDER_CONSTANT, cv::Scalar());
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, WarpPerspective, testing::Combine(
+                        ALL_DEVICES,
+                        GPU_TYPICAL_MAT_SIZES,
+                        testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32FC1, CV_32FC3, CV_32FC4),
+                        testing::Values((int) cv::INTER_NEAREST, (int) cv::INTER_LINEAR, (int) cv::INTER_CUBIC)));
+
+//////////////////////////////////////////////////////////////////////
+// BuildWarpPlaneMaps
+
+GPU_PERF_TEST(BuildWarpPlaneMaps, cv::gpu::DeviceInfo, cv::Size)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::gpu::GpuMat map_x;
+    cv::gpu::GpuMat map_y;
+
+    TEST_CYCLE()
+    {
+        cv::gpu::buildWarpPlaneMaps(size, cv::Rect(0, 0, size.width, size.height), cv::Mat::eye(3, 3, CV_32FC1),
+                                    cv::Mat::ones(3, 3, CV_32FC1), cv::Mat::zeros(1, 3, CV_32F), 1.0, map_x, map_y);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, BuildWarpPlaneMaps, testing::Combine(
+                        ALL_DEVICES,
+                        GPU_TYPICAL_MAT_SIZES));
+
+//////////////////////////////////////////////////////////////////////
+// BuildWarpCylindricalMaps
+
+GPU_PERF_TEST(BuildWarpCylindricalMaps, cv::gpu::DeviceInfo, cv::Size)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::gpu::GpuMat map_x;
+    cv::gpu::GpuMat map_y;
+
+    TEST_CYCLE()
+    {
+        cv::gpu::buildWarpCylindricalMaps(size, cv::Rect(0, 0, size.width, size.height), cv::Mat::eye(3, 3, CV_32FC1),
+                                          cv::Mat::ones(3, 3, CV_32FC1), 1.0, map_x, map_y);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, BuildWarpCylindricalMaps, testing::Combine(
+                        ALL_DEVICES,
+                        GPU_TYPICAL_MAT_SIZES));
+
+//////////////////////////////////////////////////////////////////////
+// BuildWarpSphericalMaps
+
+GPU_PERF_TEST(BuildWarpSphericalMaps, cv::gpu::DeviceInfo, cv::Size)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::gpu::GpuMat map_x;
+    cv::gpu::GpuMat map_y;
+
+    TEST_CYCLE()
+    {
+        cv::gpu::buildWarpSphericalMaps(size, cv::Rect(0, 0, size.width, size.height), cv::Mat::eye(3, 3, CV_32FC1),
+                                        cv::Mat::ones(3, 3, CV_32FC1), 1.0, map_x, map_y);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, BuildWarpSphericalMaps, testing::Combine(
+                        ALL_DEVICES,
+                        GPU_TYPICAL_MAT_SIZES));
+
+//////////////////////////////////////////////////////////////////////
+// Rotate
+
+GPU_PERF_TEST(Rotate, cv::gpu::DeviceInfo, cv::Size, perf::MatType, Interpolation)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+    int interpolation = GET_PARAM(3);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src_host(size, type);
+
+    declare.in(src_host, WARMUP_RNG);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    TEST_CYCLE()
+    {
+        cv::gpu::rotate(src, dst, size, 30.0, 0, 0, interpolation);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, Rotate, testing::Combine(
+                        ALL_DEVICES,
+                        GPU_TYPICAL_MAT_SIZES,
+                        testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32FC1, CV_32FC3, CV_32FC4),
+                        testing::Values((int) cv::INTER_NEAREST, (int) cv::INTER_LINEAR, (int) cv::INTER_CUBIC)));
+
+//////////////////////////////////////////////////////////////////////
+// CopyMakeBorder
+
+GPU_PERF_TEST(CopyMakeBorder, cv::gpu::DeviceInfo, cv::Size, perf::MatType, BorderMode)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+    int borderType = GET_PARAM(3);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src_host(size, type);
+
+    declare.in(src_host, WARMUP_RNG);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    TEST_CYCLE()
+    {
+        cv::gpu::copyMakeBorder(src, dst, 5, 5, 5, 5, borderType);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, CopyMakeBorder, testing::Combine(
+                        ALL_DEVICES,
+                        GPU_TYPICAL_MAT_SIZES,
+                        testing::Values(CV_8UC1, CV_8UC4, CV_32FC1),
+                        testing::Values((int) cv::BORDER_REPLICATE, (int) cv::BORDER_REFLECT, (int) cv::BORDER_WRAP, (int) cv::BORDER_CONSTANT)));
+
+//////////////////////////////////////////////////////////////////////
+// Integral
+
+GPU_PERF_TEST(Integral, cv::gpu::DeviceInfo, cv::Size)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src_host(size, CV_8UC1);
+
+    declare.in(src_host, WARMUP_RNG);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+    cv::gpu::GpuMat buf;
+
+    TEST_CYCLE()
+    {
+        cv::gpu::integralBuffered(src, dst, buf);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, Integral, testing::Combine(
+                        ALL_DEVICES,
+                        GPU_TYPICAL_MAT_SIZES));
+
+//////////////////////////////////////////////////////////////////////
+// IntegralSqr
+
+GPU_PERF_TEST(IntegralSqr, cv::gpu::DeviceInfo, cv::Size)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src_host(size, CV_8UC1);
+
+    declare.in(src_host, WARMUP_RNG);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    TEST_CYCLE()
+    {
+        cv::gpu::sqrIntegral(src, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, IntegralSqr, testing::Combine(
+                        ALL_DEVICES,
+                        GPU_TYPICAL_MAT_SIZES));
+
+//////////////////////////////////////////////////////////////////////
+// ColumnSum
+
+GPU_PERF_TEST(ColumnSum, cv::gpu::DeviceInfo, cv::Size)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src_host(size, CV_32FC1);
+
+    declare.in(src_host, WARMUP_RNG);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    TEST_CYCLE()
+    {
+        cv::gpu::columnSum(src, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, ColumnSum, testing::Combine(
+                        ALL_DEVICES,
+                        GPU_TYPICAL_MAT_SIZES));
+
+//////////////////////////////////////////////////////////////////////
+// CornerHarris
+
+GPU_PERF_TEST(CornerHarris, cv::gpu::DeviceInfo, perf::MatType)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    int type = GET_PARAM(1);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat img = readImage("gpu/stereobm/aloe-L.png", cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(img.empty());
+
+    img.convertTo(img, type, type == CV_32F ? 1.0 / 255.0 : 1.0);
+
+    cv::gpu::GpuMat src(img);
+    cv::gpu::GpuMat dst;
+    cv::gpu::GpuMat Dx;
+    cv::gpu::GpuMat Dy;
+
+    int blockSize = 3;
+    int ksize = 7;
+    double k = 0.5;
+
+    TEST_CYCLE()
+    {
+        cv::gpu::cornerHarris(src, dst, Dx, Dy, blockSize, ksize, k);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, CornerHarris, testing::Combine(
+                        ALL_DEVICES,
+                        testing::Values(CV_8UC1, CV_32FC1)));
+
+//////////////////////////////////////////////////////////////////////
+// CornerMinEigenVal
+
+GPU_PERF_TEST(CornerMinEigenVal, cv::gpu::DeviceInfo, perf::MatType)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    int type = GET_PARAM(1);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat img = readImage("gpu/stereobm/aloe-L.png", cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(img.empty());
+
+    img.convertTo(img, type, type == CV_32F ? 1.0 / 255.0 : 1.0);
+
+    cv::gpu::GpuMat src(img);
+    cv::gpu::GpuMat dst;
+    cv::gpu::GpuMat Dx;
+    cv::gpu::GpuMat Dy;
+
+    int blockSize = 3;
+    int ksize = 7;
+
+    TEST_CYCLE()
+    {
+        cv::gpu::cornerMinEigenVal(src, dst, Dx, Dy, blockSize, ksize);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, CornerMinEigenVal, testing::Combine(
+                        ALL_DEVICES,
+                        testing::Values(CV_8UC1, CV_32FC1)));
+
+//////////////////////////////////////////////////////////////////////
+// MulSpectrums
+
+GPU_PERF_TEST(MulSpectrums, cv::gpu::DeviceInfo, cv::Size)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat a_host(size, CV_32FC2);
+    cv::Mat b_host(size, CV_32FC2);
+
+    declare.in(a_host, b_host, WARMUP_RNG);
+
+    cv::gpu::GpuMat a(a_host);
+    cv::gpu::GpuMat b(b_host);
+    cv::gpu::GpuMat dst;
+
+    TEST_CYCLE()
+    {
+        cv::gpu::mulSpectrums(a, b, dst, 0);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, MulSpectrums, testing::Combine(
+                        ALL_DEVICES,
+                        GPU_TYPICAL_MAT_SIZES));
+
+//////////////////////////////////////////////////////////////////////
+// Dft
+
+GPU_PERF_TEST(Dft, cv::gpu::DeviceInfo, cv::Size)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src_host(size, CV_32FC2);
+
+    declare.in(src_host, WARMUP_RNG);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    declare.time(2.0);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::dft(src, dst, size);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, Dft, testing::Combine(
+                        ALL_DEVICES,
+                        GPU_TYPICAL_MAT_SIZES));
+
+//////////////////////////////////////////////////////////////////////
+// Convolve
+
+GPU_PERF_TEST(Convolve, cv::gpu::DeviceInfo, cv::Size, int, bool)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+    int templ_size = GET_PARAM(2);
+    bool ccorr = GET_PARAM(3);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::gpu::GpuMat image = cv::gpu::createContinuous(size, CV_32FC1);
+    cv::gpu::GpuMat templ = cv::gpu::createContinuous(templ_size, templ_size, CV_32FC1);
+
+    image.setTo(cv::Scalar(1.0));
+    templ.setTo(cv::Scalar(1.0));
+
+    cv::gpu::GpuMat dst;
+    cv::gpu::ConvolveBuf buf;
+
+    declare.time(2.0);
+
+    TEST_CYCLE()
+    {
+        cv::gpu::convolve(image, templ, dst, ccorr, buf);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, Convolve, testing::Combine(
+                        ALL_DEVICES,
+                        GPU_TYPICAL_MAT_SIZES,
+                        testing::Values(3, 9, 27, 32, 64),
+                        testing::Bool()));
+
+//////////////////////////////////////////////////////////////////////
+// PyrDown
+
+GPU_PERF_TEST(PyrDown, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src_host(size, type);
+
+    declare.in(src_host, WARMUP_RNG);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    TEST_CYCLE()
+    {
+        cv::gpu::pyrDown(src, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, PyrDown, testing::Combine(
+                        ALL_DEVICES,
+                        GPU_TYPICAL_MAT_SIZES,
+                        testing::Values(CV_8UC1, CV_8UC4, CV_16SC3, CV_32FC1)));
+
+//////////////////////////////////////////////////////////////////////
+// PyrUp
+
+GPU_PERF_TEST(PyrUp, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src_host(size, type);
+
+    declare.in(src_host, WARMUP_RNG);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    TEST_CYCLE()
+    {
+        cv::gpu::pyrUp(src, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, PyrUp, testing::Combine(
+                        ALL_DEVICES,
+                        GPU_TYPICAL_MAT_SIZES,
+                        testing::Values(CV_8UC1, CV_8UC4, CV_16SC3, CV_32FC1)));
+
+//////////////////////////////////////////////////////////////////////
+// BlendLinear
+
+GPU_PERF_TEST(BlendLinear, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat img1_host(size, type);
+    cv::Mat img2_host(size, type);
+
+    declare.in(img1_host, img2_host, WARMUP_RNG);
+
+    cv::gpu::GpuMat img1(img1_host);
+    cv::gpu::GpuMat img2(img2_host);
+    cv::gpu::GpuMat weights1(size, CV_32FC1, cv::Scalar::all(0.5));
+    cv::gpu::GpuMat weights2(size, CV_32FC1, cv::Scalar::all(0.5));
+    cv::gpu::GpuMat dst;
+
+    TEST_CYCLE()
+    {
+        cv::gpu::blendLinear(img1, img2, weights1, weights2, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, BlendLinear, testing::Combine(
+                        ALL_DEVICES,
+                        GPU_TYPICAL_MAT_SIZES,
+                        testing::Values(CV_8UC1, CV_32FC1)));
+
+//////////////////////////////////////////////////////////////////////
+// AlphaComp
+
+GPU_PERF_TEST(AlphaComp, cv::gpu::DeviceInfo, cv::Size, perf::MatType, AlphaOp)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+    int alpha_op = GET_PARAM(3);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat img1_host(size, type);
+    cv::Mat img2_host(size, type);
+
+    declare.in(img1_host, img2_host, WARMUP_RNG);
+
+    cv::gpu::GpuMat img1(img1_host);
+    cv::gpu::GpuMat img2(img2_host);
+    cv::gpu::GpuMat dst;
+
+    TEST_CYCLE()
+    {
+        cv::gpu::alphaComp(img1, img2, dst, alpha_op);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, AlphaComp, testing::Combine(
+                        ALL_DEVICES,
+                        GPU_TYPICAL_MAT_SIZES,
+                        testing::Values(CV_8UC4, CV_16UC4, CV_32SC4, CV_32FC4),
+                        testing::Values((int)cv::gpu::ALPHA_OVER, (int)cv::gpu::ALPHA_IN, (int)cv::gpu::ALPHA_OUT, (int)cv::gpu::ALPHA_ATOP, (int)cv::gpu::ALPHA_XOR, (int)cv::gpu::ALPHA_PLUS, (int)cv::gpu::ALPHA_OVER_PREMUL, (int)cv::gpu::ALPHA_IN_PREMUL, (int)cv::gpu::ALPHA_OUT_PREMUL, (int)cv::gpu::ALPHA_ATOP_PREMUL, (int)cv::gpu::ALPHA_XOR_PREMUL, (int)cv::gpu::ALPHA_PLUS_PREMUL, (int)cv::gpu::ALPHA_PREMUL)));
+
+//////////////////////////////////////////////////////////////////////
+// Canny
+
+GPU_PERF_TEST_1(Canny, cv::gpu::DeviceInfo)
+{
+    cv::gpu::DeviceInfo devInfo = GetParam();
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat image_host = readImage("perf/1280x1024.jpg", cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(image_host.empty());
+
+    cv::gpu::GpuMat image(image_host);
+    cv::gpu::GpuMat dst;
+    cv::gpu::CannyBuf buf;
+
+    TEST_CYCLE()
+    {
+        cv::gpu::Canny(image, buf, dst, 50.0, 100.0);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, Canny, ALL_DEVICES);
+
+//////////////////////////////////////////////////////////////////////
+// CalcHist
+
+GPU_PERF_TEST(CalcHist, cv::gpu::DeviceInfo, cv::Size)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src_host(size, CV_8UC1);
+
+    declare.in(src_host, WARMUP_RNG);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat hist;
+    cv::gpu::GpuMat buf;
+
+    TEST_CYCLE()
+    {
+        cv::gpu::calcHist(src, hist, buf);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, CalcHist, testing::Combine(
+                        ALL_DEVICES,
+                        GPU_TYPICAL_MAT_SIZES));
+
+//////////////////////////////////////////////////////////////////////
+// EqualizeHist
+
+GPU_PERF_TEST(EqualizeHist, cv::gpu::DeviceInfo, cv::Size)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src_host(size, CV_8UC1);
+
+    declare.in(src_host, WARMUP_RNG);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+    cv::gpu::GpuMat hist;
+    cv::gpu::GpuMat buf;
+
+    TEST_CYCLE()
+    {
+        cv::gpu::equalizeHist(src, dst, hist, buf);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, EqualizeHist, testing::Combine(
+                        ALL_DEVICES,
+                        GPU_TYPICAL_MAT_SIZES));
+
+//////////////////////////////////////////////////////////////////////
+// ImagePyramid
+
+GPU_PERF_TEST(ImagePyramid_build, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src_host(size, type);
+
+    declare.in(src_host, WARMUP_RNG);
+
+    cv::gpu::GpuMat src(src_host);
+
+    cv::gpu::ImagePyramid pyr;
+
+    TEST_CYCLE()
+    {
+        pyr.build(src, 5);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, ImagePyramid_build, testing::Combine(
+                        ALL_DEVICES,
+                        GPU_TYPICAL_MAT_SIZES,
+                        testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32FC1, CV_32FC3, CV_32FC4)));
+
+GPU_PERF_TEST(ImagePyramid_getLayer, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src_host(size, type);
+
+    declare.in(src_host, WARMUP_RNG);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+
+    cv::gpu::ImagePyramid pyr(src, 3);
+
+    TEST_CYCLE()
+    {
+        pyr.getLayer(dst, cv::Size(size.width / 2 + 10, size.height / 2 + 10));
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, ImagePyramid_getLayer, testing::Combine(
+                        ALL_DEVICES,
+                        GPU_TYPICAL_MAT_SIZES,
+                        testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32FC1, CV_32FC3, CV_32FC4)));
+
+#endif
diff --git a/modules/gpu/perf_cpu/perf_main.cpp b/modules/gpu/perf_cpu/perf_main.cpp
new file mode 100644
index 0000000000..0cd4002775
--- /dev/null
+++ b/modules/gpu/perf_cpu/perf_main.cpp
@@ -0,0 +1,20 @@
+#include "perf_precomp.hpp"
+
+#ifdef HAVE_CUDA
+
+int main(int argc, char **argv)
+{
+    testing::InitGoogleTest(&argc, argv);
+    perf::TestBase::Init(argc, argv);
+    return RUN_ALL_TESTS();
+}
+
+#else
+
+int main()
+{
+    printf("OpenCV was built without CUDA support\n");
+    return 0;
+}
+
+#endif
diff --git a/modules/gpu/perf_cpu/perf_matop.cpp b/modules/gpu/perf_cpu/perf_matop.cpp
new file mode 100644
index 0000000000..f56dbf3e5f
--- /dev/null
+++ b/modules/gpu/perf_cpu/perf_matop.cpp
@@ -0,0 +1,185 @@
+#include "perf_precomp.hpp"
+
+#ifdef HAVE_CUDA
+
+//////////////////////////////////////////////////////////////////////
+// Merge
+
+GPU_PERF_TEST(Merge, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    const int num_channels = 4;
+
+    std::vector<cv::gpu::GpuMat> src(num_channels);
+    for (int i = 0; i < num_channels; ++i)
+        src[i] = cv::gpu::GpuMat(size, type, cv::Scalar::all(i)); 
+
+    cv::gpu::GpuMat dst;
+
+    TEST_CYCLE()
+    {
+        cv::gpu::merge(src, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(MatOp, Merge, testing::Combine(
+                        ALL_DEVICES, 
+                        GPU_TYPICAL_MAT_SIZES, 
+                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1)));
+
+//////////////////////////////////////////////////////////////////////
+// Split
+
+GPU_PERF_TEST(Split, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    const int num_channels = 4;
+
+    cv::gpu::GpuMat src(size, CV_MAKETYPE(type, num_channels), cv::Scalar(1, 2, 3, 4));
+
+    std::vector<cv::gpu::GpuMat> dst(num_channels);
+    for (int i = 0; i < num_channels; ++i)
+        dst[i] = cv::gpu::GpuMat(size, type); 
+
+    TEST_CYCLE()
+    {
+        cv::gpu::split(src, dst);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(MatOp, Split, testing::Combine(
+                        ALL_DEVICES, 
+                        GPU_TYPICAL_MAT_SIZES, 
+                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1)));
+
+//////////////////////////////////////////////////////////////////////
+// SetTo
+
+GPU_PERF_TEST(SetTo, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::gpu::GpuMat src(size, type);
+    cv::Scalar val(1, 2, 3, 4);
+
+    TEST_CYCLE()
+    {
+        src.setTo(val);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(MatOp, SetTo, testing::Combine(
+                        ALL_DEVICES, 
+                        GPU_TYPICAL_MAT_SIZES, 
+                        testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32FC1, CV_32FC3, CV_32FC4)));
+
+//////////////////////////////////////////////////////////////////////
+// SetToMasked
+
+GPU_PERF_TEST(SetToMasked, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src_host(size, type);
+    cv::Mat mask_host(size, CV_8UC1);
+
+    declare.in(src_host, WARMUP_RNG);
+    fill(mask_host, 0, 2);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::Scalar val(1, 2, 3, 4);
+    cv::gpu::GpuMat mask(mask_host);
+    
+    TEST_CYCLE()
+    {
+        src.setTo(val, mask);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(MatOp, SetToMasked, testing::Combine(
+                        ALL_DEVICES, 
+                        GPU_TYPICAL_MAT_SIZES, 
+                        testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32FC1, CV_32FC3, CV_32FC4)));
+
+//////////////////////////////////////////////////////////////////////
+// CopyToMasked
+
+GPU_PERF_TEST(CopyToMasked, cv::gpu::DeviceInfo, cv::Size, perf::MatType)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+    int type = GET_PARAM(2);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src_host(size, type);
+    cv::Mat mask_host(size, CV_8UC1);
+
+    declare.in(src_host, WARMUP_RNG);
+    fill(mask_host, 0, 2);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat mask(mask_host);
+    cv::gpu::GpuMat dst;
+    
+    TEST_CYCLE()
+    {
+        src.copyTo(dst, mask);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(MatOp, CopyToMasked, testing::Combine(
+                        ALL_DEVICES, 
+                        GPU_TYPICAL_MAT_SIZES, 
+                        testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32FC1, CV_32FC3, CV_32FC4)));
+
+//////////////////////////////////////////////////////////////////////
+// ConvertTo
+
+GPU_PERF_TEST(ConvertTo, cv::gpu::DeviceInfo, cv::Size, perf::MatType, perf::MatType)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    cv::Size size = GET_PARAM(1);
+    int type1 = GET_PARAM(2);
+    int type2 = GET_PARAM(3);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat src_host(size, type1);
+
+    declare.in(src_host, WARMUP_RNG);
+
+    cv::gpu::GpuMat src(src_host);
+    cv::gpu::GpuMat dst;
+    
+    TEST_CYCLE()
+    {
+        src.convertTo(dst, type2, 0.5, 1.0);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(MatOp, ConvertTo, testing::Combine(
+                        ALL_DEVICES, 
+                        GPU_TYPICAL_MAT_SIZES, 
+                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1), 
+                        testing::Values(CV_8UC1, CV_16UC1, CV_32FC1)));
+
+#endif
diff --git a/modules/gpu/perf_cpu/perf_objdetect.cpp b/modules/gpu/perf_cpu/perf_objdetect.cpp
new file mode 100644
index 0000000000..b6c02aaba7
--- /dev/null
+++ b/modules/gpu/perf_cpu/perf_objdetect.cpp
@@ -0,0 +1,27 @@
+#include "perf_precomp.hpp"
+
+#ifdef HAVE_CUDA
+
+GPU_PERF_TEST_1(HOG, cv::gpu::DeviceInfo)
+{
+    cv::gpu::DeviceInfo devInfo = GetParam();
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat img_host = readImage("gpu/hog/road.png", cv::IMREAD_GRAYSCALE);
+
+    cv::gpu::GpuMat img(img_host);
+    std::vector<cv::Rect> found_locations;
+
+    cv::gpu::HOGDescriptor hog;
+    hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
+
+    TEST_CYCLE()
+    {
+        hog.detectMultiScale(img, found_locations);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(ObjDetect, HOG, ALL_DEVICES);
+
+#endif
diff --git a/modules/gpu/perf_cpu/perf_precomp.cpp b/modules/gpu/perf_cpu/perf_precomp.cpp
new file mode 100644
index 0000000000..8552ac3d42
--- /dev/null
+++ b/modules/gpu/perf_cpu/perf_precomp.cpp
@@ -0,0 +1 @@
+#include "perf_precomp.hpp"
diff --git a/modules/gpu/perf_cpu/perf_precomp.hpp b/modules/gpu/perf_cpu/perf_precomp.hpp
new file mode 100644
index 0000000000..ef2839be51
--- /dev/null
+++ b/modules/gpu/perf_cpu/perf_precomp.hpp
@@ -0,0 +1,18 @@
+#ifndef __OPENCV_PERF_PRECOMP_HPP__
+#define __OPENCV_PERF_PRECOMP_HPP__
+
+#include <cstdio>
+#include <iostream>
+#include "cvconfig.h"
+#include "opencv2/ts/ts.hpp"
+#include "opencv2/ts/ts_perf.hpp"
+#include "opencv2/core/core.hpp"
+#include "opencv2/highgui/highgui.hpp"
+#include "opencv2/gpu/gpu.hpp"
+#include "perf_utility.hpp"
+
+#if GTEST_CREATE_SHARED_LIBRARY
+#error no modules except ts should have GTEST_CREATE_SHARED_LIBRARY defined
+#endif
+
+#endif
diff --git a/modules/gpu/perf_cpu/perf_utility.cpp b/modules/gpu/perf_cpu/perf_utility.cpp
new file mode 100644
index 0000000000..c0b2fadade
--- /dev/null
+++ b/modules/gpu/perf_cpu/perf_utility.cpp
@@ -0,0 +1,201 @@
+#include "perf_precomp.hpp"
+
+using namespace std;
+using namespace cv;
+using namespace cv::gpu;
+
+void fill(Mat& m, double a, double b)
+{
+    RNG rng(123456789);
+    rng.fill(m, RNG::UNIFORM, a, b);
+}
+
+void PrintTo(const CvtColorInfo& info, ostream* os)
+{
+    static const char* str[] = 
+    {
+        "BGR2BGRA",
+        "BGRA2BGR",
+        "BGR2RGBA",
+        "RGBA2BGR",
+        "BGR2RGB",
+        "BGRA2RGBA",
+
+        "BGR2GRAY",
+        "RGB2GRAY",
+        "GRAY2BGR",
+        "GRAY2BGRA",
+        "BGRA2GRAY",
+        "RGBA2GRAY",
+
+        "BGR2BGR565",
+        "RGB2BGR565",
+        "BGR5652BGR",
+        "BGR5652RGB",
+        "BGRA2BGR565",
+        "RGBA2BGR565",
+        "BGR5652BGRA",
+        "BGR5652RGBA",
+
+        "GRAY2BGR565",
+        "BGR5652GRAY",
+
+        "BGR2BGR555",
+        "RGB2BGR555",
+        "BGR5552BGR",
+        "BGR5552RGB",
+        "BGRA2BGR555",
+        "RGBA2BGR555",
+        "BGR5552BGRA",
+        "BGR5552RGBA",
+
+        "GRAY2BGR555",
+        "BGR5552GRAY",
+
+        "BGR2XYZ",
+        "RGB2XYZ",
+        "XYZ2BGR",
+        "XYZ2RGB",
+
+        "BGR2YCrCb",
+        "RGB2YCrCb",
+        "YCrCb2BGR",
+        "YCrCb2RGB",
+
+        "BGR2HSV",
+        "RGB2HSV",
+
+        0,
+        0,
+
+        0,
+        0,
+
+        0,
+        0,
+        0,
+        0,
+
+        0,
+        0,
+
+        "BGR2HLS",
+        "RGB2HLS",
+
+        "HSV2BGR",
+        "HSV2RGB",
+
+        0,
+        0,
+        0,
+        0,
+        
+        "HLS2BGR",
+        "HLS2RGB",
+
+        0,
+        0,
+        0,
+        0,
+
+        "BGR2HSV_FULL",
+        "RGB2HSV_FULL",
+        "BGR2HLS_FULL",
+        "RGB2HLS_FULL",
+
+        "HSV2BGR_FULL",
+        "HSV2RGB_FULL",
+        "HLS2BGR_FULL",
+        "HLS2RGB_FULL",
+
+        0,
+        0,
+        0,
+        0,
+
+        0,
+        0,
+        0,
+        0,
+
+        "BGR2YUV",
+        "RGB2YUV",
+        "YUV2BGR",
+        "YUV2RGB",
+
+        0,
+        0,
+        0,
+        0,
+
+        0,
+        0,
+        0,
+        0 
+    };
+
+    *os << str[info.code];
+}
+
+void cv::gpu::PrintTo(const DeviceInfo& info, ostream* os)
+{
+    *os << info.name();
+}
+
+Mat readImage(const string& fileName, int flags)
+{
+    return imread(perf::TestBase::getDataPath(fileName), flags);
+}
+
+bool supportFeature(const DeviceInfo& info, FeatureSet feature)
+{
+    return TargetArchs::builtWith(feature) && info.supports(feature);
+}
+
+const vector<DeviceInfo>& devices()
+{
+    static vector<DeviceInfo> devs;
+    static bool first = true;
+
+    if (first)
+    {
+        int deviceCount = getCudaEnabledDeviceCount();
+
+        devs.reserve(deviceCount);
+
+        for (int i = 0; i < deviceCount; ++i)
+        {
+            DeviceInfo info(i);
+            if (info.isCompatible())
+                devs.push_back(info);
+        }
+
+        first = false;
+    }
+
+    return devs;
+}
+
+vector<DeviceInfo> devices(FeatureSet feature)
+{
+    const vector<DeviceInfo>& d = devices();
+    
+    vector<DeviceInfo> devs_filtered;
+
+    if (TargetArchs::builtWith(feature))
+    {
+        devs_filtered.reserve(d.size());
+
+        for (size_t i = 0, size = d.size(); i < size; ++i)
+        {
+            const DeviceInfo& info = d[i];
+
+            if (info.supports(feature))
+                devs_filtered.push_back(info);
+        }
+    }
+
+    return devs_filtered;
+}
+
+
diff --git a/modules/gpu/perf_cpu/perf_utility.hpp b/modules/gpu/perf_cpu/perf_utility.hpp
new file mode 100644
index 0000000000..f15610b983
--- /dev/null
+++ b/modules/gpu/perf_cpu/perf_utility.hpp
@@ -0,0 +1,69 @@
+#ifndef __OPENCV_PERF_GPU_UTILITY_HPP__
+#define __OPENCV_PERF_GPU_UTILITY_HPP__
+
+void fill(cv::Mat& m, double a, double b);
+
+enum {HORIZONTAL_AXIS = 0, VERTICAL_AXIS = 1, BOTH_AXIS = -1};
+
+CV_ENUM(MorphOp, cv::MORPH_ERODE, cv::MORPH_DILATE)
+CV_ENUM(BorderMode, cv::BORDER_REFLECT101, cv::BORDER_REPLICATE, cv::BORDER_CONSTANT, cv::BORDER_REFLECT, cv::BORDER_WRAP)
+CV_ENUM(FlipCode, HORIZONTAL_AXIS, VERTICAL_AXIS, BOTH_AXIS)
+CV_ENUM(Interpolation, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC)
+CV_ENUM(MatchMethod, cv::TM_SQDIFF, cv::TM_SQDIFF_NORMED, cv::TM_CCORR, cv::TM_CCORR_NORMED, cv::TM_CCOEFF, cv::TM_CCOEFF_NORMED)
+CV_ENUM(NormType, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2)
+CV_ENUM(AlphaOp, cv::gpu::ALPHA_OVER, cv::gpu::ALPHA_IN, cv::gpu::ALPHA_OUT, cv::gpu::ALPHA_ATOP, cv::gpu::ALPHA_XOR, cv::gpu::ALPHA_PLUS, cv::gpu::ALPHA_OVER_PREMUL, cv::gpu::ALPHA_IN_PREMUL, cv::gpu::ALPHA_OUT_PREMUL, cv::gpu::ALPHA_ATOP_PREMUL, cv::gpu::ALPHA_XOR_PREMUL, cv::gpu::ALPHA_PLUS_PREMUL, cv::gpu::ALPHA_PREMUL)
+
+struct CvtColorInfo
+{
+    int scn;
+    int dcn;
+    int code;
+
+    explicit CvtColorInfo(int scn_=0, int dcn_=0, int code_=0) : scn(scn_), dcn(dcn_), code(code_) {}
+};
+
+void PrintTo(const CvtColorInfo& info, std::ostream* os);
+
+namespace cv { namespace gpu
+{
+    void PrintTo(const cv::gpu::DeviceInfo& info, std::ostream* os);
+}}
+
+#define GPU_PERF_TEST(name, ...) \
+    struct name : perf::TestBaseWithParam< std::tr1::tuple< __VA_ARGS__ > > \
+    { \
+    public: \
+        name() {} \
+    protected: \
+        void PerfTestBody(); \
+    }; \
+    TEST_P(name, perf){ RunPerfTestBody(); } \
+    void name :: PerfTestBody()
+
+#define GPU_PERF_TEST_1(name, param_type) \
+    struct name : perf::TestBaseWithParam< param_type > \
+    { \
+    public: \
+        name() {} \
+    protected: \
+        void PerfTestBody(); \
+    }; \
+    TEST_P(name, perf){ RunPerfTestBody(); } \
+    void name :: PerfTestBody()
+
+#define GPU_TYPICAL_MAT_SIZES testing::Values(perf::szSXGA, perf::sz1080p, cv::Size(1800, 1500))
+
+cv::Mat readImage(const std::string& fileName, int flags = cv::IMREAD_COLOR);
+
+bool supportFeature(const cv::gpu::DeviceInfo& info, cv::gpu::FeatureSet feature);
+
+const std::vector<cv::gpu::DeviceInfo>& devices();
+
+std::vector<cv::gpu::DeviceInfo> devices(cv::gpu::FeatureSet feature);
+
+#define ALL_DEVICES testing::ValuesIn(devices())
+#define DEVICES(feature) testing::ValuesIn(devices(feature))
+
+#define GET_PARAM(k) std::tr1::get< k >(GetParam())
+
+#endif // __OPENCV_PERF_GPU_UTILITY_HPP__
diff --git a/modules/gpu/perf_cpu/perf_video.cpp b/modules/gpu/perf_cpu/perf_video.cpp
new file mode 100644
index 0000000000..ff80aabb63
--- /dev/null
+++ b/modules/gpu/perf_cpu/perf_video.cpp
@@ -0,0 +1,261 @@
+#include "perf_precomp.hpp"
+
+#ifdef HAVE_CUDA
+
+//////////////////////////////////////////////////////
+// BroxOpticalFlow
+
+GPU_PERF_TEST_1(BroxOpticalFlow, cv::gpu::DeviceInfo)
+{
+    cv::gpu::DeviceInfo devInfo = GetParam();
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat frame0_host = readImage("gpu/opticalflow/frame0.png", cv::IMREAD_GRAYSCALE);
+    cv::Mat frame1_host = readImage("gpu/opticalflow/frame1.png", cv::IMREAD_GRAYSCALE);
+
+    ASSERT_FALSE(frame0_host.empty());
+    ASSERT_FALSE(frame1_host.empty());
+
+    frame0_host.convertTo(frame0_host, CV_32FC1, 1.0 / 255.0);
+    frame1_host.convertTo(frame1_host, CV_32FC1, 1.0 / 255.0);
+
+    cv::gpu::GpuMat frame0(frame0_host);
+    cv::gpu::GpuMat frame1(frame1_host);
+    cv::gpu::GpuMat u; 
+    cv::gpu::GpuMat v;
+
+    cv::gpu::BroxOpticalFlow d_flow(0.197f /*alpha*/, 50.0f /*gamma*/, 0.8f /*scale_factor*/, 
+                                    10 /*inner_iterations*/, 77 /*outer_iterations*/, 10 /*solver_iterations*/);
+
+    declare.time(10);
+
+    TEST_CYCLE()
+    {
+        d_flow(frame0, frame1, u, v);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Video, BroxOpticalFlow, ALL_DEVICES);
+
+//////////////////////////////////////////////////////
+// InterpolateFrames
+
+GPU_PERF_TEST_1(InterpolateFrames, cv::gpu::DeviceInfo)
+{
+    cv::gpu::DeviceInfo devInfo = GetParam();
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat frame0_host = readImage("gpu/perf/aloe.jpg", cv::IMREAD_GRAYSCALE);
+    cv::Mat frame1_host = readImage("gpu/perf/aloeR.jpg", cv::IMREAD_GRAYSCALE);
+
+    ASSERT_FALSE(frame0_host.empty());
+    ASSERT_FALSE(frame1_host.empty());
+
+    frame0_host.convertTo(frame0_host, CV_32FC1, 1.0 / 255.0);
+    frame1_host.convertTo(frame1_host, CV_32FC1, 1.0 / 255.0);
+
+    cv::gpu::GpuMat frame0(frame0_host);
+    cv::gpu::GpuMat frame1(frame1_host);
+    cv::gpu::GpuMat fu, fv; 
+    cv::gpu::GpuMat bu, bv;
+
+    cv::gpu::BroxOpticalFlow d_flow(0.197f /*alpha*/, 50.0f /*gamma*/, 0.8f /*scale_factor*/, 
+                                    10 /*inner_iterations*/, 77 /*outer_iterations*/, 10 /*solver_iterations*/);
+    
+    d_flow(frame0, frame1, fu, fv);
+    d_flow(frame1, frame0, bu, bv);
+
+    cv::gpu::GpuMat newFrame;
+    cv::gpu::GpuMat buf;
+
+    TEST_CYCLE()
+    {
+        cv::gpu::interpolateFrames(frame0, frame1, fu, fv, bu, bv, 0.5f, newFrame, buf);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Video, InterpolateFrames, ALL_DEVICES);
+
+//////////////////////////////////////////////////////
+// CreateOpticalFlowNeedleMap
+
+GPU_PERF_TEST_1(CreateOpticalFlowNeedleMap, cv::gpu::DeviceInfo)
+{
+    cv::gpu::DeviceInfo devInfo = GetParam();
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat frame0_host = readImage("gpu/perf/aloe.jpg", cv::IMREAD_GRAYSCALE);
+    cv::Mat frame1_host = readImage("gpu/perf/aloeR.jpg", cv::IMREAD_GRAYSCALE);
+
+    ASSERT_FALSE(frame0_host.empty());
+    ASSERT_FALSE(frame1_host.empty());
+
+    frame0_host.convertTo(frame0_host, CV_32FC1, 1.0 / 255.0);
+    frame1_host.convertTo(frame1_host, CV_32FC1, 1.0 / 255.0);
+
+    cv::gpu::GpuMat frame0(frame0_host);
+    cv::gpu::GpuMat frame1(frame1_host);
+    cv::gpu::GpuMat u, v;
+
+    cv::gpu::BroxOpticalFlow d_flow(0.197f /*alpha*/, 50.0f /*gamma*/, 0.8f /*scale_factor*/, 
+                                    10 /*inner_iterations*/, 77 /*outer_iterations*/, 10 /*solver_iterations*/);
+    
+    d_flow(frame0, frame1, u, v);
+
+    cv::gpu::GpuMat vertex, colors;
+
+    TEST_CYCLE()
+    {
+        cv::gpu::createOpticalFlowNeedleMap(u, v, vertex, colors);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Video, CreateOpticalFlowNeedleMap, ALL_DEVICES);
+
+//////////////////////////////////////////////////////
+// GoodFeaturesToTrack
+
+GPU_PERF_TEST(GoodFeaturesToTrack, cv::gpu::DeviceInfo, double)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    double minDistance = GET_PARAM(1);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+    
+    cv::Mat image_host = readImage("gpu/perf/aloe.jpg", cv::IMREAD_GRAYSCALE);
+
+    ASSERT_FALSE(image_host.empty());
+
+    cv::gpu::GoodFeaturesToTrackDetector_GPU detector(8000, 0.01, minDistance);
+
+    cv::gpu::GpuMat image(image_host);
+    cv::gpu::GpuMat pts;
+
+    TEST_CYCLE()
+    {
+        detector(image, pts);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Video, GoodFeaturesToTrack, testing::Combine(ALL_DEVICES, testing::Values(0.0, 3.0)));
+
+//////////////////////////////////////////////////////
+// PyrLKOpticalFlowSparse
+
+GPU_PERF_TEST(PyrLKOpticalFlowSparse, cv::gpu::DeviceInfo, bool, int, int)
+{
+    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
+    bool useGray = GET_PARAM(1);
+    int points = GET_PARAM(2);
+    int win_size = GET_PARAM(3);
+
+    cv::gpu::setDevice(devInfo.deviceID());
+    
+    cv::Mat frame0_host = readImage("gpu/opticalflow/frame0.png", useGray ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR);
+    cv::Mat frame1_host = readImage("gpu/opticalflow/frame1.png", useGray ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR);
+
+    ASSERT_FALSE(frame0_host.empty());
+    ASSERT_FALSE(frame1_host.empty());
+
+    cv::Mat gray_frame;
+    if (useGray)
+        gray_frame = frame0_host;
+    else
+        cv::cvtColor(frame0_host, gray_frame, cv::COLOR_BGR2GRAY);
+
+    cv::gpu::GpuMat pts;
+
+    cv::gpu::GoodFeaturesToTrackDetector_GPU detector(points, 0.01, 0.0);
+    detector(cv::gpu::GpuMat(gray_frame), pts);
+
+    cv::gpu::PyrLKOpticalFlow pyrLK;
+    pyrLK.winSize = cv::Size(win_size, win_size);
+
+    cv::gpu::GpuMat frame0(frame0_host);
+    cv::gpu::GpuMat frame1(frame1_host);
+    cv::gpu::GpuMat nextPts;
+    cv::gpu::GpuMat status;
+
+    TEST_CYCLE()
+    {
+        pyrLK.sparse(frame0, frame1, pts, nextPts, status);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Video, PyrLKOpticalFlowSparse, testing::Combine
+                        (
+                            ALL_DEVICES, 
+                            testing::Bool(), 
+                            testing::Values(1000, 2000, 4000, 8000), 
+                            testing::Values(17, 21)
+                        ));
+
+//////////////////////////////////////////////////////
+// PyrLKOpticalFlowDense
+
+GPU_PERF_TEST_1(PyrLKOpticalFlowDense, cv::gpu::DeviceInfo)
+{
+    cv::gpu::DeviceInfo devInfo = GetParam();
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat frame0_host = readImage("gpu/opticalflow/frame0.png", cv::IMREAD_GRAYSCALE);
+    cv::Mat frame1_host = readImage("gpu/opticalflow/frame1.png", cv::IMREAD_GRAYSCALE);
+
+    ASSERT_FALSE(frame0_host.empty());
+    ASSERT_FALSE(frame1_host.empty());
+
+    cv::gpu::GpuMat frame0(frame0_host);
+    cv::gpu::GpuMat frame1(frame1_host);
+    cv::gpu::GpuMat u; 
+    cv::gpu::GpuMat v;
+
+    cv::gpu::PyrLKOpticalFlow pyrLK;
+
+    declare.time(10);
+
+    TEST_CYCLE()
+    {
+        pyrLK.dense(frame0, frame1, u, v);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Video, PyrLKOpticalFlowDense, ALL_DEVICES);
+
+
+//////////////////////////////////////////////////////
+// FarnebackOpticalFlowTest
+
+GPU_PERF_TEST_1(FarnebackOpticalFlowTest, cv::gpu::DeviceInfo)
+{
+    cv::gpu::DeviceInfo devInfo = GetParam();
+
+    cv::gpu::setDevice(devInfo.deviceID());
+
+    cv::Mat frame0_host = readImage("gpu/opticalflow/frame0.png", cv::IMREAD_GRAYSCALE);
+    cv::Mat frame1_host = readImage("gpu/opticalflow/frame1.png", cv::IMREAD_GRAYSCALE);
+
+    ASSERT_FALSE(frame0_host.empty());
+    ASSERT_FALSE(frame1_host.empty());
+
+    cv::gpu::GpuMat frame0(frame0_host);
+    cv::gpu::GpuMat frame1(frame1_host);
+    cv::gpu::GpuMat u;
+    cv::gpu::GpuMat v;
+
+    cv::gpu::FarnebackOpticalFlow calc;
+
+    declare.time(10);
+
+    TEST_CYCLE()
+    {
+        calc(frame0, frame1, u, v);
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(Video, FarnebackOpticalFlowTest, ALL_DEVICES);
+
+#endif