From e895b7455e1efea6aa1fe0c96ab19f2ff6182439 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Wed, 28 Aug 2013 15:45:13 +0400
Subject: [PATCH] renamed gpu namespace -> cuda

---
 modules/core/include/opencv2/core/base.hpp    |   2 +-
 .../core/include/opencv2/core/cuda/block.hpp  |   2 +-
 .../opencv2/core/cuda/border_interpolate.hpp  |   4 +-
 .../core/include/opencv2/core/cuda/color.hpp  |   4 +-
 .../core/include/opencv2/core/cuda/common.hpp |  10 +-
 .../opencv2/core/cuda/datamov_utils.hpp       |   4 +-
 .../opencv2/core/cuda/detail/color_detail.hpp |  70 +-
 .../opencv2/core/cuda/detail/reduce.hpp       |   2 +-
 .../core/cuda/detail/reduce_key_val.hpp       |   2 +-
 .../core/cuda/detail/transform_detail.hpp     |   4 +-
 .../core/cuda/detail/type_traits_detail.hpp   |   4 +-
 .../core/cuda/detail/vec_distance_detail.hpp  |   4 +-
 .../opencv2/core/cuda/dynamic_smem.hpp        |   2 +-
 .../include/opencv2/core/cuda/emulation.hpp   |   4 +-
 .../include/opencv2/core/cuda/filters.hpp     |   4 +-
 .../include/opencv2/core/cuda/funcattrib.hpp  |   4 +-
 .../include/opencv2/core/cuda/functional.hpp  |   4 +-
 .../core/include/opencv2/core/cuda/limits.hpp |   4 +-
 .../core/include/opencv2/core/cuda/reduce.hpp |   2 +-
 .../opencv2/core/cuda/saturate_cast.hpp       |   2 +-
 .../core/include/opencv2/core/cuda/scan.hpp   |   6 +-
 .../opencv2/core/cuda/simd_functions.hpp      |   2 +-
 .../include/opencv2/core/cuda/transform.hpp   |   2 +-
 .../include/opencv2/core/cuda/type_traits.hpp |   2 +-
 .../include/opencv2/core/cuda/utility.hpp     |   4 +-
 .../opencv2/core/cuda/vec_distance.hpp        |   4 +-
 .../include/opencv2/core/cuda/vec_math.hpp    |   4 +-
 .../include/opencv2/core/cuda/vec_traits.hpp  |   4 +-
 .../core/include/opencv2/core/cuda/warp.hpp   |   4 +-
 .../include/opencv2/core/cuda/warp_reduce.hpp |   4 +-
 .../opencv2/core/cuda/warp_shuffle.hpp        |   2 +-
 modules/core/include/opencv2/core/gpu.hpp     |   8 +-
 modules/core/include/opencv2/core/gpu.inl.hpp |   6 +-
 .../opencv2/core/gpu_stream_accessor.hpp      |   2 +-
 .../core/include/opencv2/core/gpu_types.hpp   |   2 +-
 modules/core/include/opencv2/core/mat.hpp     |  20 +-
 modules/core/include/opencv2/core/opengl.hpp  |  12 +-
 .../core/include/opencv2/core/private.gpu.hpp |  12 +-
 modules/core/src/cuda/gpu_mat.cu              |  28 +-
 modules/core/src/gpu_cuda_mem.cpp             |  14 +-
 modules/core/src/gpu_info.cpp                 | 140 ++--
 modules/core/src/gpu_mat.cpp                  |  48 +-
 modules/core/src/gpu_stream.cpp               |  48 +-
 modules/core/src/matrix.cpp                   |  78 +-
 modules/core/src/opengl.cpp                   |  20 +-
 .../cudev/include/opencv2/cudev/common.hpp    |   2 +-
 modules/cudev/test/test_arithm_func.cu        |   2 +-
 modules/cudev/test/test_arithm_op.cu          |   2 +-
 modules/cudev/test/test_bitwize_op.cu         |   2 +-
 modules/cudev/test/test_cmp_op.cu             |   2 +-
 modules/cudev/test/test_color_cvt.cu          |   2 +-
 modules/cudev/test/test_cvt.cu                |   2 +-
 modules/cudev/test/test_deriv.cu              |   2 +-
 modules/cudev/test/test_integral.cu           |   2 +-
 modules/cudev/test/test_lut.cu                |   2 +-
 modules/cudev/test/test_pyramids.cu           |   2 +-
 modules/cudev/test/test_reduction.cu          |   2 +-
 modules/cudev/test/test_split_merge.cu        |   2 +-
 modules/cudev/test/test_warp.cu               |   2 +-
 modules/cudev/test/transpose.cu               |   2 +-
 modules/gpu/include/opencv2/gpu.hpp           |   4 +-
 modules/gpu/perf/perf_calib3d.cpp             |   8 +-
 modules/gpu/perf/perf_labeling.cpp            |  14 +-
 modules/gpu/perf/perf_matop.cpp               |  16 +-
 modules/gpu/perf/perf_objdetect.cpp           |  20 +-
 modules/gpu/perf4au/main.cpp                  |  54 +-
 modules/gpu/src/calib3d.cpp                   |  20 +-
 modules/gpu/src/cascadeclassifier.cpp         |  56 +-
 modules/gpu/src/cuda/calib3d.cu               |   8 +-
 modules/gpu/src/cuda/ccomponetns.cu           |   2 +-
 modules/gpu/src/cuda/global_motion.cu         |   2 +-
 modules/gpu/src/cuda/hog.cu                   |   4 +-
 modules/gpu/src/cuda/lbp.cu                   |   2 +-
 modules/gpu/src/cuda/lbp.hpp                  |   2 +-
 modules/gpu/src/global_motion.cpp             |  16 +-
 modules/gpu/src/graphcuts.cpp                 |  18 +-
 modules/gpu/src/hog.cpp                       |  94 +--
 modules/gpu/test/test_calib3d.cpp             |  28 +-
 modules/gpu/test/test_global_motion.cpp       |   8 +-
 modules/gpu/test/test_gpumat.cpp              |  86 +--
 modules/gpu/test/test_labeling.cpp            |  14 +-
 modules/gpu/test/test_objdetect.cpp           |  56 +-
 modules/gpu/test/test_opengl.cpp              |  14 +-
 modules/gpu/test/test_stream.cpp              |  28 +-
 .../gpuarithm/include/opencv2/gpuarithm.hpp   |   4 +-
 modules/gpuarithm/perf/perf_arithm.cpp        |  54 +-
 modules/gpuarithm/perf/perf_core.cpp          |  46 +-
 .../perf/perf_element_operations.cpp          | 284 ++++----
 modules/gpuarithm/perf/perf_reductions.cpp    |  70 +-
 modules/gpuarithm/src/arithm.cpp              |  36 +-
 modules/gpuarithm/src/core.cpp                |  46 +-
 modules/gpuarithm/src/cuda/absdiff_mat.cu     |   6 +-
 modules/gpuarithm/src/cuda/absdiff_scalar.cu  |   6 +-
 modules/gpuarithm/src/cuda/add_mat.cu         |   6 +-
 modules/gpuarithm/src/cuda/add_scalar.cu      |   6 +-
 modules/gpuarithm/src/cuda/add_weighted.cu    |   6 +-
 modules/gpuarithm/src/cuda/bitwise_mat.cu     |   6 +-
 modules/gpuarithm/src/cuda/bitwise_scalar.cu  |  12 +-
 modules/gpuarithm/src/cuda/cmp_mat.cu         |   6 +-
 modules/gpuarithm/src/cuda/cmp_scalar.cu      |   6 +-
 .../gpuarithm/src/cuda/copy_make_border.cu    |   4 +-
 modules/gpuarithm/src/cuda/countnonzero.cu    |   4 +-
 modules/gpuarithm/src/cuda/div_mat.cu         |   6 +-
 modules/gpuarithm/src/cuda/div_scalar.cu      |   6 +-
 modules/gpuarithm/src/cuda/integral.cu        |   2 +-
 modules/gpuarithm/src/cuda/math.cu            |  16 +-
 modules/gpuarithm/src/cuda/minmax.cu          |   4 +-
 modules/gpuarithm/src/cuda/minmax_mat.cu      |  12 +-
 modules/gpuarithm/src/cuda/minmaxloc.cu       |   4 +-
 modules/gpuarithm/src/cuda/mul_mat.cu         |   6 +-
 modules/gpuarithm/src/cuda/mul_scalar.cu      |   6 +-
 modules/gpuarithm/src/cuda/mul_spectrums.cu   |   4 +-
 modules/gpuarithm/src/cuda/polar_cart.cu      |   4 +-
 modules/gpuarithm/src/cuda/reduce.cu          |   4 +-
 modules/gpuarithm/src/cuda/split_merge.cu     |   4 +-
 modules/gpuarithm/src/cuda/sub_mat.cu         |   6 +-
 modules/gpuarithm/src/cuda/sub_scalar.cu      |   6 +-
 modules/gpuarithm/src/cuda/sum.cu             |   4 +-
 modules/gpuarithm/src/cuda/threshold.cu       |   6 +-
 modules/gpuarithm/src/cuda/transpose.cu       |   4 +-
 modules/gpuarithm/src/cuda/unroll_detail.hpp  |  12 +-
 modules/gpuarithm/src/element_operations.cpp  | 124 ++--
 modules/gpuarithm/src/reductions.cpp          |  76 +-
 modules/gpuarithm/test/test_arithm.cpp        |  82 +--
 modules/gpuarithm/test/test_core.cpp          |  84 +--
 .../test/test_element_operations.cpp          | 670 +++++++++---------
 modules/gpuarithm/test/test_reductions.cpp    | 146 ++--
 .../gpubgsegm/include/opencv2/gpubgsegm.hpp   |  12 +-
 modules/gpubgsegm/perf/perf_bgsegm.cpp        |  34 +-
 modules/gpubgsegm/src/cuda/fgd.cu             |   4 +-
 modules/gpubgsegm/src/cuda/fgd.hpp            |  12 +-
 modules/gpubgsegm/src/cuda/gmg.cu             |   2 +-
 modules/gpubgsegm/src/cuda/mog.cu             |   2 +-
 modules/gpubgsegm/src/cuda/mog2.cu            |   2 +-
 modules/gpubgsegm/src/fgd.cpp                 |  64 +-
 modules/gpubgsegm/src/gmg.cpp                 |  20 +-
 modules/gpubgsegm/src/mog.cpp                 |  14 +-
 modules/gpubgsegm/src/mog2.cpp                |  16 +-
 modules/gpubgsegm/test/test_bgsegm.cpp        |  50 +-
 modules/gpucodec/perf/perf_video.cpp          |   4 +-
 modules/gpucodec/src/cuda/nv12_to_rgb.cu      |   2 +-
 modules/gpucodec/src/cuda/rgb_to_yv12.cu      |   2 +-
 modules/gpucodec/src/video_decoder.hpp        |   6 +-
 modules/gpucodec/src/video_reader.cpp         |   6 +-
 modules/gpucodec/src/video_writer.cpp         |   4 +-
 modules/gpucodec/test/test_video.cpp          |  10 +-
 .../include/opencv2/gpufeatures2d.hpp         |   6 +-
 .../gpufeatures2d/perf/perf_features2d.cpp    |  40 +-
 .../gpufeatures2d/src/brute_force_matcher.cpp | 140 ++--
 modules/gpufeatures2d/src/cuda/bf_knnmatch.cu |   4 +-
 modules/gpufeatures2d/src/cuda/bf_match.cu    |   4 +-
 .../gpufeatures2d/src/cuda/bf_radius_match.cu |   4 +-
 modules/gpufeatures2d/src/cuda/fast.cu        |   2 +-
 modules/gpufeatures2d/src/cuda/orb.cu         |   2 +-
 modules/gpufeatures2d/src/fast.cpp            |  40 +-
 modules/gpufeatures2d/src/orb.cpp             |  76 +-
 .../gpufeatures2d/test/test_features2d.cpp    | 120 ++--
 .../gpufilters/include/opencv2/gpufilters.hpp |   4 +-
 modules/gpufilters/perf/perf_filters.cpp      |  54 +-
 modules/gpufilters/src/cuda/column_filter.hpp |   4 +-
 modules/gpufilters/src/cuda/filter2d.cu       |   2 +-
 modules/gpufilters/src/cuda/row_filter.hpp    |   4 +-
 modules/gpufilters/src/filtering.cpp          | 102 +--
 modules/gpufilters/test/test_filters.cpp      | 100 +--
 .../gpuimgproc/include/opencv2/gpuimgproc.hpp |  16 +-
 .../gpuimgproc/perf/perf_bilateral_filter.cpp |   6 +-
 modules/gpuimgproc/perf/perf_blend.cpp        |  12 +-
 modules/gpuimgproc/perf/perf_canny.cpp        |   6 +-
 modules/gpuimgproc/perf/perf_color.cpp        |  36 +-
 modules/gpuimgproc/perf/perf_corners.cpp      |  12 +-
 modules/gpuimgproc/perf/perf_gftt.cpp         |   6 +-
 modules/gpuimgproc/perf/perf_histogram.cpp    |  36 +-
 modules/gpuimgproc/perf/perf_hough.cpp        |  42 +-
 .../gpuimgproc/perf/perf_match_template.cpp   |  16 +-
 modules/gpuimgproc/perf/perf_mean_shift.cpp   |  18 +-
 modules/gpuimgproc/src/bilateral_filter.cpp   |  10 +-
 modules/gpuimgproc/src/blend.cpp              |  10 +-
 modules/gpuimgproc/src/canny.cpp              |  10 +-
 modules/gpuimgproc/src/color.cpp              | 184 ++---
 modules/gpuimgproc/src/corners.cpp            |  26 +-
 .../gpuimgproc/src/cuda/bilateral_filter.cu   |   6 +-
 modules/gpuimgproc/src/cuda/blend.cu          |   4 +-
 .../gpuimgproc/src/cuda/build_point_list.cu   |   2 +-
 modules/gpuimgproc/src/cuda/canny.cu          |   8 +-
 modules/gpuimgproc/src/cuda/clahe.cu          |   4 +-
 modules/gpuimgproc/src/cuda/color.cu          |   6 +-
 modules/gpuimgproc/src/cuda/corners.cu        |   2 +-
 modules/gpuimgproc/src/cuda/debayer.cu        |   2 +-
 .../gpuimgproc/src/cuda/generalized_hough.cu  |   2 +-
 modules/gpuimgproc/src/cuda/gftt.cu           |   2 +-
 modules/gpuimgproc/src/cuda/hist.cu           |   6 +-
 modules/gpuimgproc/src/cuda/hough_circles.cu  |   2 +-
 modules/gpuimgproc/src/cuda/hough_lines.cu    |   2 +-
 modules/gpuimgproc/src/cuda/hough_segments.cu |   2 +-
 modules/gpuimgproc/src/cuda/match_template.cu |   4 +-
 modules/gpuimgproc/src/cuda/mean_shift.cu     |   2 +-
 modules/gpuimgproc/src/cvt_color_internal.h   |   2 +-
 modules/gpuimgproc/src/generalized_hough.cpp  |  44 +-
 modules/gpuimgproc/src/gftt.cpp               |  18 +-
 modules/gpuimgproc/src/histogram.cpp          |  40 +-
 modules/gpuimgproc/src/hough_circles.cpp      |  24 +-
 modules/gpuimgproc/src/hough_lines.cpp        |  12 +-
 modules/gpuimgproc/src/hough_segments.cpp     |  14 +-
 modules/gpuimgproc/src/match_template.cpp     |  72 +-
 modules/gpuimgproc/src/mean_shift.cpp         |  18 +-
 modules/gpuimgproc/src/mssegmentation.cpp     |   6 +-
 .../gpuimgproc/test/test_bilateral_filter.cpp |  10 +-
 modules/gpuimgproc/test/test_blend.cpp        |  10 +-
 modules/gpuimgproc/test/test_canny.cpp        |  10 +-
 modules/gpuimgproc/test/test_color.cpp        | 554 +++++++--------
 modules/gpuimgproc/test/test_corners.cpp      |  20 +-
 modules/gpuimgproc/test/test_gftt.cpp         |  16 +-
 modules/gpuimgproc/test/test_histogram.cpp    |  40 +-
 modules/gpuimgproc/test/test_hough.cpp        |  30 +-
 .../gpuimgproc/test/test_match_template.cpp   |  54 +-
 modules/gpuimgproc/test/test_mean_shift.cpp   |  34 +-
 .../include/opencv2/gpulegacy/NCVPyramid.hpp  |   2 +-
 .../include/opencv2/gpulegacy/private.hpp     |   6 +-
 modules/gpulegacy/src/NCV.cpp                 |   2 +-
 .../gpulegacy/src/cuda/NCVBroxOpticalFlow.cu  |   4 +-
 .../src/cuda/NCVHaarObjectDetection.cu        |   4 +-
 modules/gpulegacy/src/cuda/NCVPyramid.cu      |   4 +-
 modules/gpulegacy/src/cuda/NPP_staging.cu     |   4 +-
 modules/gpulegacy/test/test_main.cpp          |   2 +-
 modules/gpulegacy/test/test_nvidia.cpp        |   6 +-
 .../gpuoptflow/include/opencv2/gpuoptflow.hpp |   4 +-
 modules/gpuoptflow/perf/perf_optflow.cpp      |  98 +--
 modules/gpuoptflow/src/bm.cpp                 |   6 +-
 modules/gpuoptflow/src/bm_fast.cpp            |  10 +-
 modules/gpuoptflow/src/brox.cpp               |   6 +-
 modules/gpuoptflow/src/cuda/bm.cu             |   4 +-
 modules/gpuoptflow/src/cuda/bm_fast.cu        |   4 +-
 modules/gpuoptflow/src/cuda/farneback.cu      |   4 +-
 modules/gpuoptflow/src/cuda/needle_map.cu     |   2 +-
 modules/gpuoptflow/src/cuda/pyrlk.cu          |   4 +-
 modules/gpuoptflow/src/cuda/tvl1flow.cu       |   4 +-
 modules/gpuoptflow/src/farneback.cpp          |  32 +-
 modules/gpuoptflow/src/interpolate_frames.cpp |   6 +-
 modules/gpuoptflow/src/needle_map.cpp         |  16 +-
 modules/gpuoptflow/src/pyrlk.cpp              |  32 +-
 modules/gpuoptflow/src/tvl1flow.cpp           |  40 +-
 modules/gpuoptflow/test/test_optflow.cpp      |  76 +-
 .../gpustereo/include/opencv2/gpustereo.hpp   |  14 +-
 modules/gpustereo/perf/perf_stereo.cpp        |  44 +-
 .../src/cuda/disparity_bilateral_filter.cu    |   4 +-
 modules/gpustereo/src/cuda/stereobm.cu        |   4 +-
 modules/gpustereo/src/cuda/stereobp.cu        |   4 +-
 modules/gpustereo/src/cuda/stereocsbp.cu      |   4 +-
 modules/gpustereo/src/cuda/util.cu            |   4 +-
 .../src/disparity_bilateral_filter.cpp        |  12 +-
 modules/gpustereo/src/stereobm.cpp            |  18 +-
 modules/gpustereo/src/stereobp.cpp            |  20 +-
 modules/gpustereo/src/stereocsbp.cpp          |  16 +-
 modules/gpustereo/src/util.cpp                |  18 +-
 modules/gpustereo/test/test_stereo.cpp        |  42 +-
 .../gpuwarping/include/opencv2/gpuwarping.hpp |   4 +-
 modules/gpuwarping/perf/perf_warping.cpp      |  76 +-
 .../gpuwarping/src/cuda/build_warp_maps.cu    |   4 +-
 modules/gpuwarping/src/cuda/pyr_down.cu       |   4 +-
 modules/gpuwarping/src/cuda/pyr_up.cu         |   4 +-
 modules/gpuwarping/src/cuda/remap.cu          |   4 +-
 modules/gpuwarping/src/cuda/resize.cu         |   2 +-
 modules/gpuwarping/src/cuda/warp.cu           |   4 +-
 modules/gpuwarping/src/pyramids.cpp           |  26 +-
 modules/gpuwarping/src/remap.cpp              |   8 +-
 modules/gpuwarping/src/resize.cpp             |   6 +-
 modules/gpuwarping/src/warp.cpp               |  58 +-
 modules/gpuwarping/test/test_pyramids.cpp     |  20 +-
 modules/gpuwarping/test/test_remap.cpp        |  10 +-
 modules/gpuwarping/test/test_resize.cpp       |  20 +-
 modules/gpuwarping/test/test_warp_affine.cpp  |  30 +-
 .../gpuwarping/test/test_warp_perspective.cpp |  30 +-
 .../nonfree/include/opencv2/nonfree/gpu.hpp   |   4 +-
 modules/nonfree/perf/perf_gpu.cpp             |   8 +-
 modules/nonfree/src/cuda/surf.cu              |   6 +-
 modules/nonfree/src/surf_gpu.cpp              |  60 +-
 modules/nonfree/test/test_gpu.cpp             |  12 +-
 modules/photo/include/opencv2/photo/gpu.hpp   |   4 +-
 modules/photo/perf/perf_gpu.cpp               |  18 +-
 modules/photo/src/cuda/nlm.cu                 |  14 +-
 modules/photo/src/denoising_gpu.cpp           |  30 +-
 modules/photo/test/test_denoising_gpu.cpp     |  10 +-
 .../include/opencv2/softcascade.hpp           |   4 +-
 .../perf/perf_cuda_softcascade.cpp            |  24 +-
 modules/softcascade/src/cuda/channels.cu      |  26 +-
 modules/softcascade/src/cuda/icf-sc.cu        |  22 +-
 modules/softcascade/src/cuda_invoker.hpp      |   6 +-
 modules/softcascade/src/detector_cuda.cpp     |  96 +--
 .../test/test_cuda_softcascade.cpp            |  18 +-
 modules/softcascade/test/utility.cpp          |   2 +-
 modules/softcascade/test/utility.hpp          |   8 +-
 .../opencv2/stitching/detail/matchers.hpp     |  10 +-
 .../opencv2/stitching/detail/warpers.hpp      |  30 +-
 modules/stitching/src/blenders.cpp            |  20 +-
 modules/stitching/src/matchers.cpp            |   2 +-
 modules/stitching/src/seam_finders.cpp        |  16 +-
 modules/stitching/src/stitcher.cpp            |   2 +-
 modules/stitching/src/warpers.cpp             |  36 +-
 modules/superres/perf/perf_superres.cpp       |   2 +-
 modules/superres/src/btv_l1_gpu.cpp           |  42 +-
 modules/superres/src/cuda/btv_l1_gpu.cu       |   6 +-
 modules/superres/src/frame_source.cpp         |   2 +-
 modules/superres/src/input_array_utility.cpp  |   4 +-
 modules/superres/src/input_array_utility.hpp  |   4 +-
 modules/superres/src/optical_flow.cpp         |   2 +-
 modules/ts/include/opencv2/ts/gpu_test.hpp    |  16 +-
 modules/ts/include/opencv2/ts/ts_perf.hpp     |   8 +-
 modules/ts/src/gpu_perf.cpp                   |   4 +-
 modules/ts/src/gpu_test.cpp                   |   4 +-
 modules/ts/src/ts_perf.cpp                    |  10 +-
 .../opencv2/videostab/global_motion.hpp       |  10 +-
 .../opencv2/videostab/optical_flow.hpp        |  16 +-
 .../opencv2/videostab/wobble_suppression.hpp  |   6 +-
 modules/videostab/src/global_motion.cpp       |  12 +-
 modules/videostab/src/optical_flow.cpp        |  12 +-
 modules/videostab/src/wobble_suppression.cpp  |   8 +-
 samples/cpp/stitching_detailed.cpp            |   8 +-
 .../gpu-basics-similarity.cpp                 | 138 ++--
 samples/cpp/videostab.cpp                     |   2 +-
 samples/gpu/alpha_comp.cpp                    |   2 +-
 samples/gpu/bgfg_segm.cpp                     |  10 +-
 samples/gpu/brox_optical_flow.cpp             |   4 +-
 samples/gpu/cascadeclassifier.cpp             |   8 +-
 samples/gpu/cascadeclassifier_nvidia_api.cpp  |   4 +-
 samples/gpu/driver_api_multi.cpp              |   6 +-
 samples/gpu/driver_api_stereo_multi.cpp       |  10 +-
 samples/gpu/farneback_optical_flow.cpp        |   2 +-
 samples/gpu/generalized_hough.cpp             |  10 +-
 samples/gpu/hog.cpp                           |  14 +-
 samples/gpu/houghlines.cpp                    |   4 +-
 samples/gpu/morphology.cpp                    |  16 +-
 samples/gpu/multi.cpp                         |   6 +-
 samples/gpu/opengl.cpp                        |   2 +-
 samples/gpu/optical_flow.cpp                  |   2 +-
 samples/gpu/opticalflow_nvidia_api.cpp        |   2 +-
 samples/gpu/performance/performance.cpp       |   2 +-
 samples/gpu/performance/tests.cpp             | 286 ++++----
 samples/gpu/pyrlk_optical_flow.cpp            |   4 +-
 samples/gpu/softcascade.cpp                   |   8 +-
 samples/gpu/stereo_match.cpp                  |  18 +-
 samples/gpu/stereo_multi.cpp                  |  10 +-
 samples/gpu/surf_keypoint_matcher.cpp         |   4 +-
 samples/gpu/video_reader.cpp                  |   4 +-
 343 files changed, 3883 insertions(+), 3883 deletions(-)

diff --git a/modules/core/include/opencv2/core/base.hpp b/modules/core/include/opencv2/core/base.hpp
index 013f573a64..8073f0eba1 100644
--- a/modules/core/include/opencv2/core/base.hpp
+++ b/modules/core/include/opencv2/core/base.hpp
@@ -490,7 +490,7 @@ namespace ogl
     class CV_EXPORTS Arrays;
 }
 
-namespace gpu
+namespace cuda
 {
     class CV_EXPORTS GpuMat;
     class CV_EXPORTS CudaMem;
diff --git a/modules/core/include/opencv2/core/cuda/block.hpp b/modules/core/include/opencv2/core/cuda/block.hpp
index 04bfdba714..d6c2217097 100644
--- a/modules/core/include/opencv2/core/cuda/block.hpp
+++ b/modules/core/include/opencv2/core/cuda/block.hpp
@@ -43,7 +43,7 @@
 #ifndef __OPENCV_GPU_DEVICE_BLOCK_HPP__
 #define __OPENCV_GPU_DEVICE_BLOCK_HPP__
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     struct Block
     {
diff --git a/modules/core/include/opencv2/core/cuda/border_interpolate.hpp b/modules/core/include/opencv2/core/cuda/border_interpolate.hpp
index 6c53f09eee..0850b36afb 100644
--- a/modules/core/include/opencv2/core/cuda/border_interpolate.hpp
+++ b/modules/core/include/opencv2/core/cuda/border_interpolate.hpp
@@ -47,7 +47,7 @@
 #include "vec_traits.hpp"
 #include "vec_math.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     //////////////////////////////////////////////////////////////
     // BrdConstant
@@ -709,6 +709,6 @@ namespace cv { namespace gpu { namespace cudev
         int width;
         D val;
     };
-}}} // namespace cv { namespace gpu { namespace cudev
+}}} // namespace cv { namespace cuda { namespace cudev
 
 #endif // __OPENCV_GPU_BORDER_INTERPOLATE_HPP__
diff --git a/modules/core/include/opencv2/core/cuda/color.hpp b/modules/core/include/opencv2/core/cuda/color.hpp
index d5f94c92a4..b978d0b4ab 100644
--- a/modules/core/include/opencv2/core/cuda/color.hpp
+++ b/modules/core/include/opencv2/core/cuda/color.hpp
@@ -45,7 +45,7 @@
 
 #include "detail/color_detail.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     // All OPENCV_GPU_IMPLEMENT_*_TRAITS(ColorSpace1_to_ColorSpace2, ...) macros implements
     // template <typename T> class ColorSpace1_to_ColorSpace2_traits
@@ -296,6 +296,6 @@ namespace cv { namespace gpu { namespace cudev
     OPENCV_GPU_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_lbgra, 4, 4, false, 0)
 
     #undef OPENCV_GPU_IMPLEMENT_Luv2RGB_TRAITS
-}}} // namespace cv { namespace gpu { namespace cudev
+}}} // namespace cv { namespace cuda { namespace cudev
 
 #endif // __OPENCV_GPU_BORDER_INTERPOLATE_HPP__
diff --git a/modules/core/include/opencv2/core/cuda/common.hpp b/modules/core/include/opencv2/core/cuda/common.hpp
index 6b3fef19a4..ae18750f5e 100644
--- a/modules/core/include/opencv2/core/cuda/common.hpp
+++ b/modules/core/include/opencv2/core/cuda/common.hpp
@@ -56,7 +56,7 @@
     #endif
 #endif
 
-namespace cv { namespace gpu {
+namespace cv { namespace cuda {
     static inline void checkCudaError(cudaError_t err, const char* file, const int line, const char* func)
     {
         if (cudaSuccess != err)
@@ -66,13 +66,13 @@ namespace cv { namespace gpu {
 
 #ifndef cudaSafeCall
     #if defined(__GNUC__)
-        #define cudaSafeCall(expr)  cv::gpu::checkCudaError(expr, __FILE__, __LINE__, __func__)
+        #define cudaSafeCall(expr)  cv::cuda::checkCudaError(expr, __FILE__, __LINE__, __func__)
     #else /* defined(__CUDACC__) || defined(__MSVC__) */
-        #define cudaSafeCall(expr)  cv::gpu::checkCudaError(expr, __FILE__, __LINE__, "")
+        #define cudaSafeCall(expr)  cv::cuda::checkCudaError(expr, __FILE__, __LINE__, "")
     #endif
 #endif
 
-namespace cv { namespace gpu
+namespace cv { namespace cuda
 {
     template <typename T> static inline bool isAligned(const T* ptr, size_t size)
     {
@@ -85,7 +85,7 @@ namespace cv { namespace gpu
     }
 }}
 
-namespace cv { namespace gpu
+namespace cv { namespace cuda
 {
     namespace cudev
     {
diff --git a/modules/core/include/opencv2/core/cuda/datamov_utils.hpp b/modules/core/include/opencv2/core/cuda/datamov_utils.hpp
index 10df540936..c7f2e7c0f6 100644
--- a/modules/core/include/opencv2/core/cuda/datamov_utils.hpp
+++ b/modules/core/include/opencv2/core/cuda/datamov_utils.hpp
@@ -45,7 +45,7 @@
 
 #include "common.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 200
 
@@ -100,6 +100,6 @@ namespace cv { namespace gpu { namespace cudev
         #undef OPENCV_GPU_ASM_PTR
 
     #endif // __CUDA_ARCH__ >= 200
-}}} // namespace cv { namespace gpu { namespace cudev
+}}} // namespace cv { namespace cuda { namespace cudev
 
 #endif // __OPENCV_GPU_DATAMOV_UTILS_HPP__
diff --git a/modules/core/include/opencv2/core/cuda/detail/color_detail.hpp b/modules/core/include/opencv2/core/cuda/detail/color_detail.hpp
index a0a673410a..b66ea1bf60 100644
--- a/modules/core/include/opencv2/core/cuda/detail/color_detail.hpp
+++ b/modules/core/include/opencv2/core/cuda/detail/color_detail.hpp
@@ -49,7 +49,7 @@
 #include "../limits.hpp"
 #include "../functional.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     #ifndef CV_DESCALE
         #define CV_DESCALE(x, n) (((x) + (1 << ((n)-1))) >> (n))
@@ -146,7 +146,7 @@ namespace cv { namespace gpu { namespace cudev
 #define OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(name, scn, dcn, bidx) \
     template <typename T> struct name ## _traits \
     { \
-        typedef ::cv::gpu::cudev::color_detail::RGB2RGB<T, scn, dcn, bidx> functor_type; \
+        typedef ::cv::cuda::cudev::color_detail::RGB2RGB<T, scn, dcn, bidx> functor_type; \
         static __host__ __device__ __forceinline__ functor_type create_functor() \
         { \
             return functor_type(); \
@@ -219,7 +219,7 @@ namespace cv { namespace gpu { namespace cudev
 #define OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(name, scn, bidx, green_bits) \
     struct name ## _traits \
     { \
-        typedef ::cv::gpu::cudev::color_detail::RGB2RGB5x5<scn, bidx, green_bits> functor_type; \
+        typedef ::cv::cuda::cudev::color_detail::RGB2RGB5x5<scn, bidx, green_bits> functor_type; \
         static __host__ __device__ __forceinline__ functor_type create_functor() \
         { \
             return functor_type(); \
@@ -300,7 +300,7 @@ namespace cv { namespace gpu { namespace cudev
 #define OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(name, dcn, bidx, green_bits) \
     struct name ## _traits \
     { \
-        typedef ::cv::gpu::cudev::color_detail::RGB5x52RGB<dcn, bidx, green_bits> functor_type; \
+        typedef ::cv::cuda::cudev::color_detail::RGB5x52RGB<dcn, bidx, green_bits> functor_type; \
         static __host__ __device__ __forceinline__ functor_type create_functor() \
         { \
             return functor_type(); \
@@ -346,7 +346,7 @@ namespace cv { namespace gpu { namespace cudev
 #define OPENCV_GPU_IMPLEMENT_GRAY2RGB_TRAITS(name, dcn) \
     template <typename T> struct name ## _traits \
     { \
-        typedef ::cv::gpu::cudev::color_detail::Gray2RGB<T, dcn> functor_type; \
+        typedef ::cv::cuda::cudev::color_detail::Gray2RGB<T, dcn> functor_type; \
         static __host__ __device__ __forceinline__ functor_type create_functor() \
         { \
             return functor_type(); \
@@ -388,7 +388,7 @@ namespace cv { namespace gpu { namespace cudev
 #define OPENCV_GPU_IMPLEMENT_GRAY2RGB5x5_TRAITS(name, green_bits) \
     struct name ## _traits \
     { \
-        typedef ::cv::gpu::cudev::color_detail::Gray2RGB5x5<green_bits> functor_type; \
+        typedef ::cv::cuda::cudev::color_detail::Gray2RGB5x5<green_bits> functor_type; \
         static __host__ __device__ __forceinline__ functor_type create_functor() \
         { \
             return functor_type(); \
@@ -430,7 +430,7 @@ namespace cv { namespace gpu { namespace cudev
 #define OPENCV_GPU_IMPLEMENT_RGB5x52GRAY_TRAITS(name, green_bits) \
     struct name ## _traits \
     { \
-        typedef ::cv::gpu::cudev::color_detail::RGB5x52Gray<green_bits> functor_type; \
+        typedef ::cv::cuda::cudev::color_detail::RGB5x52Gray<green_bits> functor_type; \
         static __host__ __device__ __forceinline__ functor_type create_functor() \
         { \
             return functor_type(); \
@@ -481,7 +481,7 @@ namespace cv { namespace gpu { namespace cudev
 #define OPENCV_GPU_IMPLEMENT_RGB2GRAY_TRAITS(name, scn, bidx) \
     template <typename T> struct name ## _traits \
     { \
-        typedef ::cv::gpu::cudev::color_detail::RGB2Gray<T, scn, bidx> functor_type; \
+        typedef ::cv::cuda::cudev::color_detail::RGB2Gray<T, scn, bidx> functor_type; \
         static __host__ __device__ __forceinline__ functor_type create_functor() \
         { \
             return functor_type(); \
@@ -532,7 +532,7 @@ namespace cv { namespace gpu { namespace cudev
 #define OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(name, scn, dcn, bidx) \
     template <typename T> struct name ## _traits \
     { \
-        typedef ::cv::gpu::cudev::color_detail::RGB2YUV<T, scn, dcn, bidx> functor_type; \
+        typedef ::cv::cuda::cudev::color_detail::RGB2YUV<T, scn, dcn, bidx> functor_type; \
         static __host__ __device__ __forceinline__ functor_type create_functor() \
         { \
             return functor_type(); \
@@ -620,7 +620,7 @@ namespace cv { namespace gpu { namespace cudev
 #define OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(name, scn, dcn, bidx) \
     template <typename T> struct name ## _traits \
     { \
-        typedef ::cv::gpu::cudev::color_detail::YUV2RGB<T, scn, dcn, bidx> functor_type; \
+        typedef ::cv::cuda::cudev::color_detail::YUV2RGB<T, scn, dcn, bidx> functor_type; \
         static __host__ __device__ __forceinline__ functor_type create_functor() \
         { \
             return functor_type(); \
@@ -699,7 +699,7 @@ namespace cv { namespace gpu { namespace cudev
 #define OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(name, scn, dcn, bidx) \
     template <typename T> struct name ## _traits \
     { \
-        typedef ::cv::gpu::cudev::color_detail::RGB2YCrCb<T, scn, dcn, bidx> functor_type; \
+        typedef ::cv::cuda::cudev::color_detail::RGB2YCrCb<T, scn, dcn, bidx> functor_type; \
         static __host__ __device__ __forceinline__ functor_type create_functor() \
         { \
             return functor_type(); \
@@ -778,7 +778,7 @@ namespace cv { namespace gpu { namespace cudev
 #define OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(name, scn, dcn, bidx) \
     template <typename T> struct name ## _traits \
     { \
-        typedef ::cv::gpu::cudev::color_detail::YCrCb2RGB<T, scn, dcn, bidx> functor_type; \
+        typedef ::cv::cuda::cudev::color_detail::YCrCb2RGB<T, scn, dcn, bidx> functor_type; \
         static __host__ __device__ __forceinline__ functor_type create_functor() \
         { \
             return functor_type(); \
@@ -854,7 +854,7 @@ namespace cv { namespace gpu { namespace cudev
 #define OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(name, scn, dcn, bidx) \
     template <typename T> struct name ## _traits \
     { \
-        typedef ::cv::gpu::cudev::color_detail::RGB2XYZ<T, scn, dcn, bidx> functor_type; \
+        typedef ::cv::cuda::cudev::color_detail::RGB2XYZ<T, scn, dcn, bidx> functor_type; \
         static __host__ __device__ __forceinline__ functor_type create_functor() \
         { \
             return functor_type(); \
@@ -929,7 +929,7 @@ namespace cv { namespace gpu { namespace cudev
 #define OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(name, scn, dcn, bidx) \
     template <typename T> struct name ## _traits \
     { \
-        typedef ::cv::gpu::cudev::color_detail::XYZ2RGB<T, scn, dcn, bidx> functor_type; \
+        typedef ::cv::cuda::cudev::color_detail::XYZ2RGB<T, scn, dcn, bidx> functor_type; \
         static __host__ __device__ __forceinline__ functor_type create_functor() \
         { \
             return functor_type(); \
@@ -1067,7 +1067,7 @@ namespace cv { namespace gpu { namespace cudev
 #define OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(name, scn, dcn, bidx) \
     template <typename T> struct name ## _traits \
     { \
-        typedef ::cv::gpu::cudev::color_detail::RGB2HSV<T, scn, dcn, bidx, 180> functor_type; \
+        typedef ::cv::cuda::cudev::color_detail::RGB2HSV<T, scn, dcn, bidx, 180> functor_type; \
         static __host__ __device__ __forceinline__ functor_type create_functor() \
         { \
             return functor_type(); \
@@ -1075,7 +1075,7 @@ namespace cv { namespace gpu { namespace cudev
     }; \
     template <typename T> struct name ## _full_traits \
     { \
-        typedef ::cv::gpu::cudev::color_detail::RGB2HSV<T, scn, dcn, bidx, 256> functor_type; \
+        typedef ::cv::cuda::cudev::color_detail::RGB2HSV<T, scn, dcn, bidx, 256> functor_type; \
         static __host__ __device__ __forceinline__ functor_type create_functor() \
         { \
             return functor_type(); \
@@ -1083,7 +1083,7 @@ namespace cv { namespace gpu { namespace cudev
     }; \
     template <> struct name ## _traits<float> \
     { \
-        typedef ::cv::gpu::cudev::color_detail::RGB2HSV<float, scn, dcn, bidx, 360> functor_type; \
+        typedef ::cv::cuda::cudev::color_detail::RGB2HSV<float, scn, dcn, bidx, 360> functor_type; \
         static __host__ __device__ __forceinline__ functor_type create_functor() \
         { \
             return functor_type(); \
@@ -1091,7 +1091,7 @@ namespace cv { namespace gpu { namespace cudev
     }; \
     template <> struct name ## _full_traits<float> \
     { \
-        typedef ::cv::gpu::cudev::color_detail::RGB2HSV<float, scn, dcn, bidx, 360> functor_type; \
+        typedef ::cv::cuda::cudev::color_detail::RGB2HSV<float, scn, dcn, bidx, 360> functor_type; \
         static __host__ __device__ __forceinline__ functor_type create_functor() \
         { \
             return functor_type(); \
@@ -1207,7 +1207,7 @@ namespace cv { namespace gpu { namespace cudev
 #define OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(name, scn, dcn, bidx) \
     template <typename T> struct name ## _traits \
     { \
-        typedef ::cv::gpu::cudev::color_detail::HSV2RGB<T, scn, dcn, bidx, 180> functor_type; \
+        typedef ::cv::cuda::cudev::color_detail::HSV2RGB<T, scn, dcn, bidx, 180> functor_type; \
         static __host__ __device__ __forceinline__ functor_type create_functor() \
         { \
             return functor_type(); \
@@ -1215,7 +1215,7 @@ namespace cv { namespace gpu { namespace cudev
     }; \
     template <typename T> struct name ## _full_traits \
     { \
-        typedef ::cv::gpu::cudev::color_detail::HSV2RGB<T, scn, dcn, bidx, 255> functor_type; \
+        typedef ::cv::cuda::cudev::color_detail::HSV2RGB<T, scn, dcn, bidx, 255> functor_type; \
         static __host__ __device__ __forceinline__ functor_type create_functor() \
         { \
             return functor_type(); \
@@ -1223,7 +1223,7 @@ namespace cv { namespace gpu { namespace cudev
     }; \
     template <> struct name ## _traits<float> \
     { \
-        typedef ::cv::gpu::cudev::color_detail::HSV2RGB<float, scn, dcn, bidx, 360> functor_type; \
+        typedef ::cv::cuda::cudev::color_detail::HSV2RGB<float, scn, dcn, bidx, 360> functor_type; \
         static __host__ __device__ __forceinline__ functor_type create_functor() \
         { \
             return functor_type(); \
@@ -1231,7 +1231,7 @@ namespace cv { namespace gpu { namespace cudev
     }; \
     template <> struct name ## _full_traits<float> \
     { \
-        typedef ::cv::gpu::cudev::color_detail::HSV2RGB<float, scn, dcn, bidx, 360> functor_type; \
+        typedef ::cv::cuda::cudev::color_detail::HSV2RGB<float, scn, dcn, bidx, 360> functor_type; \
         static __host__ __device__ __forceinline__ functor_type create_functor() \
         { \
             return functor_type(); \
@@ -1340,7 +1340,7 @@ namespace cv { namespace gpu { namespace cudev
 #define OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(name, scn, dcn, bidx) \
     template <typename T> struct name ## _traits \
     { \
-        typedef ::cv::gpu::cudev::color_detail::RGB2HLS<T, scn, dcn, bidx, 180> functor_type; \
+        typedef ::cv::cuda::cudev::color_detail::RGB2HLS<T, scn, dcn, bidx, 180> functor_type; \
         static __host__ __device__ __forceinline__ functor_type create_functor() \
         { \
             return functor_type(); \
@@ -1348,7 +1348,7 @@ namespace cv { namespace gpu { namespace cudev
     }; \
     template <typename T> struct name ## _full_traits \
     { \
-        typedef ::cv::gpu::cudev::color_detail::RGB2HLS<T, scn, dcn, bidx, 256> functor_type; \
+        typedef ::cv::cuda::cudev::color_detail::RGB2HLS<T, scn, dcn, bidx, 256> functor_type; \
         static __host__ __device__ __forceinline__ functor_type create_functor() \
         { \
             return functor_type(); \
@@ -1356,7 +1356,7 @@ namespace cv { namespace gpu { namespace cudev
     }; \
     template <> struct name ## _traits<float> \
     { \
-        typedef ::cv::gpu::cudev::color_detail::RGB2HLS<float, scn, dcn, bidx, 360> functor_type; \
+        typedef ::cv::cuda::cudev::color_detail::RGB2HLS<float, scn, dcn, bidx, 360> functor_type; \
         static __host__ __device__ __forceinline__ functor_type create_functor() \
         { \
             return functor_type(); \
@@ -1364,7 +1364,7 @@ namespace cv { namespace gpu { namespace cudev
     }; \
     template <> struct name ## _full_traits<float> \
     { \
-        typedef ::cv::gpu::cudev::color_detail::RGB2HLS<float, scn, dcn, bidx, 360> functor_type; \
+        typedef ::cv::cuda::cudev::color_detail::RGB2HLS<float, scn, dcn, bidx, 360> functor_type; \
         static __host__ __device__ __forceinline__ functor_type create_functor() \
         { \
             return functor_type(); \
@@ -1480,7 +1480,7 @@ namespace cv { namespace gpu { namespace cudev
 #define OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(name, scn, dcn, bidx) \
     template <typename T> struct name ## _traits \
     { \
-        typedef ::cv::gpu::cudev::color_detail::HLS2RGB<T, scn, dcn, bidx, 180> functor_type; \
+        typedef ::cv::cuda::cudev::color_detail::HLS2RGB<T, scn, dcn, bidx, 180> functor_type; \
         static __host__ __device__ __forceinline__ functor_type create_functor() \
         { \
             return functor_type(); \
@@ -1488,7 +1488,7 @@ namespace cv { namespace gpu { namespace cudev
     }; \
     template <typename T> struct name ## _full_traits \
     { \
-        typedef ::cv::gpu::cudev::color_detail::HLS2RGB<T, scn, dcn, bidx, 255> functor_type; \
+        typedef ::cv::cuda::cudev::color_detail::HLS2RGB<T, scn, dcn, bidx, 255> functor_type; \
         static __host__ __device__ __forceinline__ functor_type create_functor() \
         { \
             return functor_type(); \
@@ -1496,7 +1496,7 @@ namespace cv { namespace gpu { namespace cudev
     }; \
     template <> struct name ## _traits<float> \
     { \
-        typedef ::cv::gpu::cudev::color_detail::HLS2RGB<float, scn, dcn, bidx, 360> functor_type; \
+        typedef ::cv::cuda::cudev::color_detail::HLS2RGB<float, scn, dcn, bidx, 360> functor_type; \
         static __host__ __device__ __forceinline__ functor_type create_functor() \
         { \
             return functor_type(); \
@@ -1504,7 +1504,7 @@ namespace cv { namespace gpu { namespace cudev
     }; \
     template <> struct name ## _full_traits<float> \
     { \
-        typedef ::cv::gpu::cudev::color_detail::HLS2RGB<float, scn, dcn, bidx, 360> functor_type; \
+        typedef ::cv::cuda::cudev::color_detail::HLS2RGB<float, scn, dcn, bidx, 360> functor_type; \
         static __host__ __device__ __forceinline__ functor_type create_functor() \
         { \
             return functor_type(); \
@@ -1649,7 +1649,7 @@ namespace cv { namespace gpu { namespace cudev
 #define OPENCV_GPU_IMPLEMENT_RGB2Lab_TRAITS(name, scn, dcn, srgb, blueIdx) \
     template <typename T> struct name ## _traits \
     { \
-        typedef ::cv::gpu::cudev::color_detail::RGB2Lab<T, scn, dcn, srgb, blueIdx> functor_type; \
+        typedef ::cv::cuda::cudev::color_detail::RGB2Lab<T, scn, dcn, srgb, blueIdx> functor_type; \
         static __host__ __device__ __forceinline__ functor_type create_functor() \
         { \
             return functor_type(); \
@@ -1762,7 +1762,7 @@ namespace cv { namespace gpu { namespace cudev
 #define OPENCV_GPU_IMPLEMENT_Lab2RGB_TRAITS(name, scn, dcn, srgb, blueIdx) \
     template <typename T> struct name ## _traits \
     { \
-        typedef ::cv::gpu::cudev::color_detail::Lab2RGB<T, scn, dcn, srgb, blueIdx> functor_type; \
+        typedef ::cv::cuda::cudev::color_detail::Lab2RGB<T, scn, dcn, srgb, blueIdx> functor_type; \
         static __host__ __device__ __forceinline__ functor_type create_functor() \
         { \
             return functor_type(); \
@@ -1861,7 +1861,7 @@ namespace cv { namespace gpu { namespace cudev
 #define OPENCV_GPU_IMPLEMENT_RGB2Luv_TRAITS(name, scn, dcn, srgb, blueIdx) \
     template <typename T> struct name ## _traits \
     { \
-        typedef ::cv::gpu::cudev::color_detail::RGB2Luv<T, scn, dcn, srgb, blueIdx> functor_type; \
+        typedef ::cv::cuda::cudev::color_detail::RGB2Luv<T, scn, dcn, srgb, blueIdx> functor_type; \
         static __host__ __device__ __forceinline__ functor_type create_functor() \
         { \
             return functor_type(); \
@@ -1962,7 +1962,7 @@ namespace cv { namespace gpu { namespace cudev
 #define OPENCV_GPU_IMPLEMENT_Luv2RGB_TRAITS(name, scn, dcn, srgb, blueIdx) \
     template <typename T> struct name ## _traits \
     { \
-        typedef ::cv::gpu::cudev::color_detail::Luv2RGB<T, scn, dcn, srgb, blueIdx> functor_type; \
+        typedef ::cv::cuda::cudev::color_detail::Luv2RGB<T, scn, dcn, srgb, blueIdx> functor_type; \
         static __host__ __device__ __forceinline__ functor_type create_functor() \
         { \
             return functor_type(); \
@@ -1971,6 +1971,6 @@ namespace cv { namespace gpu { namespace cudev
 
     #undef CV_DESCALE
 
-}}} // namespace cv { namespace gpu { namespace cudev
+}}} // namespace cv { namespace cuda { namespace cudev
 
 #endif // __OPENCV_GPU_COLOR_DETAIL_HPP__
diff --git a/modules/core/include/opencv2/core/cuda/detail/reduce.hpp b/modules/core/include/opencv2/core/cuda/detail/reduce.hpp
index eba9b41a78..e4bd3c9bf5 100644
--- a/modules/core/include/opencv2/core/cuda/detail/reduce.hpp
+++ b/modules/core/include/opencv2/core/cuda/detail/reduce.hpp
@@ -47,7 +47,7 @@
 #include "../warp.hpp"
 #include "../warp_shuffle.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace reduce_detail
     {
diff --git a/modules/core/include/opencv2/core/cuda/detail/reduce_key_val.hpp b/modules/core/include/opencv2/core/cuda/detail/reduce_key_val.hpp
index 1049e6714c..5fd8ad4c05 100644
--- a/modules/core/include/opencv2/core/cuda/detail/reduce_key_val.hpp
+++ b/modules/core/include/opencv2/core/cuda/detail/reduce_key_val.hpp
@@ -47,7 +47,7 @@
 #include "../warp.hpp"
 #include "../warp_shuffle.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace reduce_key_val_detail
     {
diff --git a/modules/core/include/opencv2/core/cuda/detail/transform_detail.hpp b/modules/core/include/opencv2/core/cuda/detail/transform_detail.hpp
index 2ac309b0c6..ede7a78f8d 100644
--- a/modules/core/include/opencv2/core/cuda/detail/transform_detail.hpp
+++ b/modules/core/include/opencv2/core/cuda/detail/transform_detail.hpp
@@ -47,7 +47,7 @@
 #include "../vec_traits.hpp"
 #include "../functional.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace transform_detail
     {
@@ -390,6 +390,6 @@ namespace cv { namespace gpu { namespace cudev
             }
         };
     } // namespace transform_detail
-}}} // namespace cv { namespace gpu { namespace cudev
+}}} // namespace cv { namespace cuda { namespace cudev
 
 #endif // __OPENCV_GPU_TRANSFORM_DETAIL_HPP__
diff --git a/modules/core/include/opencv2/core/cuda/detail/type_traits_detail.hpp b/modules/core/include/opencv2/core/cuda/detail/type_traits_detail.hpp
index 4292d88003..3464eb1a88 100644
--- a/modules/core/include/opencv2/core/cuda/detail/type_traits_detail.hpp
+++ b/modules/core/include/opencv2/core/cuda/detail/type_traits_detail.hpp
@@ -46,7 +46,7 @@
 #include "../common.hpp"
 #include "../vec_traits.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace type_traits_detail
     {
@@ -182,6 +182,6 @@ namespace cv { namespace gpu { namespace cudev
             enum { value = 1 };
         };
     } // namespace type_traits_detail
-}}} // namespace cv { namespace gpu { namespace cudev
+}}} // namespace cv { namespace cuda { namespace cudev
 
 #endif // __OPENCV_GPU_TYPE_TRAITS_DETAIL_HPP__
diff --git a/modules/core/include/opencv2/core/cuda/detail/vec_distance_detail.hpp b/modules/core/include/opencv2/core/cuda/detail/vec_distance_detail.hpp
index a2a31a703c..12cdc35c83 100644
--- a/modules/core/include/opencv2/core/cuda/detail/vec_distance_detail.hpp
+++ b/modules/core/include/opencv2/core/cuda/detail/vec_distance_detail.hpp
@@ -45,7 +45,7 @@
 
 #include "../datamov_utils.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace vec_distance_detail
     {
@@ -112,6 +112,6 @@ namespace cv { namespace gpu { namespace cudev
             }
         };
     } // namespace vec_distance_detail
-}}} // namespace cv { namespace gpu { namespace cudev
+}}} // namespace cv { namespace cuda { namespace cudev
 
 #endif // __OPENCV_GPU_VEC_DISTANCE_DETAIL_HPP__
diff --git a/modules/core/include/opencv2/core/cuda/dynamic_smem.hpp b/modules/core/include/opencv2/core/cuda/dynamic_smem.hpp
index aa20e53b82..1d1e73c42b 100644
--- a/modules/core/include/opencv2/core/cuda/dynamic_smem.hpp
+++ b/modules/core/include/opencv2/core/cuda/dynamic_smem.hpp
@@ -43,7 +43,7 @@
 #ifndef __OPENCV_GPU_DYNAMIC_SMEM_HPP__
 #define __OPENCV_GPU_DYNAMIC_SMEM_HPP__
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     template<class T> struct DynamicSharedMem
     {
diff --git a/modules/core/include/opencv2/core/cuda/emulation.hpp b/modules/core/include/opencv2/core/cuda/emulation.hpp
index b484f2378e..0f18c2bdf3 100644
--- a/modules/core/include/opencv2/core/cuda/emulation.hpp
+++ b/modules/core/include/opencv2/core/cuda/emulation.hpp
@@ -46,7 +46,7 @@
 #include "common.hpp"
 #include "warp_reduce.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     struct Emulation
     {
@@ -256,6 +256,6 @@ namespace cv { namespace gpu { namespace cudev
             }
         };
     }; //struct Emulation
-}}} // namespace cv { namespace gpu { namespace cudev
+}}} // namespace cv { namespace cuda { namespace cudev
 
 #endif /* OPENCV_GPU_EMULATION_HPP_ */
diff --git a/modules/core/include/opencv2/core/cuda/filters.hpp b/modules/core/include/opencv2/core/cuda/filters.hpp
index f35f662e8b..1857a02391 100644
--- a/modules/core/include/opencv2/core/cuda/filters.hpp
+++ b/modules/core/include/opencv2/core/cuda/filters.hpp
@@ -48,7 +48,7 @@
 #include "vec_math.hpp"
 #include "type_traits.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     template <typename Ptr2D> struct PointFilter
     {
@@ -273,6 +273,6 @@ namespace cv { namespace gpu { namespace cudev
         float scale_x, scale_y;
         int width, haight;
     };
-}}} // namespace cv { namespace gpu { namespace cudev
+}}} // namespace cv { namespace cuda { namespace cudev
 
 #endif // __OPENCV_GPU_FILTERS_HPP__
diff --git a/modules/core/include/opencv2/core/cuda/funcattrib.hpp b/modules/core/include/opencv2/core/cuda/funcattrib.hpp
index 46ef81926c..c08a9fd42d 100644
--- a/modules/core/include/opencv2/core/cuda/funcattrib.hpp
+++ b/modules/core/include/opencv2/core/cuda/funcattrib.hpp
@@ -45,7 +45,7 @@
 
 #include <cstdio>
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     template<class Func>
     void printFuncAttrib(Func& func)
@@ -66,6 +66,6 @@ namespace cv { namespace gpu { namespace cudev
         printf("\n");
         fflush(stdout);
     }
-}}} // namespace cv { namespace gpu { namespace cudev
+}}} // namespace cv { namespace cuda { namespace cudev
 
 #endif  /* __OPENCV_GPU_DEVICE_FUNCATTRIB_HPP_ */
diff --git a/modules/core/include/opencv2/core/cuda/functional.hpp b/modules/core/include/opencv2/core/cuda/functional.hpp
index d6c019236a..c46490f094 100644
--- a/modules/core/include/opencv2/core/cuda/functional.hpp
+++ b/modules/core/include/opencv2/core/cuda/functional.hpp
@@ -49,7 +49,7 @@
 #include "type_traits.hpp"
 #include "device_functions.h"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     // Function Objects
     template<typename Argument, typename Result> struct unary_function : public std::unary_function<Argument, Result> {};
@@ -784,6 +784,6 @@ namespace cv { namespace gpu { namespace cudev
 
 #define OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(type) \
     template <> struct TransformFunctorTraits< type > : DefaultTransformFunctorTraits< type >
-}}} // namespace cv { namespace gpu { namespace cudev
+}}} // namespace cv { namespace cuda { namespace cudev
 
 #endif // __OPENCV_GPU_FUNCTIONAL_HPP__
diff --git a/modules/core/include/opencv2/core/cuda/limits.hpp b/modules/core/include/opencv2/core/cuda/limits.hpp
index 0439de795c..f853f7476a 100644
--- a/modules/core/include/opencv2/core/cuda/limits.hpp
+++ b/modules/core/include/opencv2/core/cuda/limits.hpp
@@ -47,7 +47,7 @@
 #include <float.h>
 #include "common.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
 
 template <class T> struct numeric_limits;
@@ -117,6 +117,6 @@ template <> struct numeric_limits<double>
     static const bool is_signed = true;
 };
 
-}}} // namespace cv { namespace gpu { namespace cudev {
+}}} // namespace cv { namespace cuda { namespace cudev {
 
 #endif // __OPENCV_GPU_LIMITS_GPU_HPP__
diff --git a/modules/core/include/opencv2/core/cuda/reduce.hpp b/modules/core/include/opencv2/core/cuda/reduce.hpp
index 722e2bbeb6..d663dc189e 100644
--- a/modules/core/include/opencv2/core/cuda/reduce.hpp
+++ b/modules/core/include/opencv2/core/cuda/reduce.hpp
@@ -47,7 +47,7 @@
 #include "detail/reduce.hpp"
 #include "detail/reduce_key_val.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     template <int N, typename T, class Op>
     __device__ __forceinline__ void reduce(volatile T* smem, T& val, unsigned int tid, const Op& op)
diff --git a/modules/core/include/opencv2/core/cuda/saturate_cast.hpp b/modules/core/include/opencv2/core/cuda/saturate_cast.hpp
index b30f5e7ce0..5b7cd10948 100644
--- a/modules/core/include/opencv2/core/cuda/saturate_cast.hpp
+++ b/modules/core/include/opencv2/core/cuda/saturate_cast.hpp
@@ -45,7 +45,7 @@
 
 #include "common.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(uchar v) { return _Tp(v); }
     template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(schar v) { return _Tp(v); }
diff --git a/modules/core/include/opencv2/core/cuda/scan.hpp b/modules/core/include/opencv2/core/cuda/scan.hpp
index ecde123bb3..2202974c62 100644
--- a/modules/core/include/opencv2/core/cuda/scan.hpp
+++ b/modules/core/include/opencv2/core/cuda/scan.hpp
@@ -48,7 +48,7 @@
 #include "opencv2/core/cuda/warp.hpp"
 #include "opencv2/core/cuda/warp_shuffle.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     enum ScanKind { EXCLUSIVE = 0,  INCLUSIVE = 1 };
 
@@ -174,13 +174,13 @@ namespace cv { namespace gpu { namespace cudev
     __device__ T warpScanInclusive(T idata, volatile T* s_Data, unsigned int tid)
     {
     #if __CUDA_ARCH__ >= 300
-        const unsigned int laneId = cv::gpu::cudev::Warp::laneId();
+        const unsigned int laneId = cv::cuda::cudev::Warp::laneId();
 
         // scan on shuffl functions
         #pragma unroll
         for (int i = 1; i <= (OPENCV_GPU_WARP_SIZE / 2); i *= 2)
         {
-            const T n = cv::gpu::cudev::shfl_up(idata, i);
+            const T n = cv::cuda::cudev::shfl_up(idata, i);
             if (laneId >= i)
                   idata += n;
         }
diff --git a/modules/core/include/opencv2/core/cuda/simd_functions.hpp b/modules/core/include/opencv2/core/cuda/simd_functions.hpp
index aedd5632f2..a335704d16 100644
--- a/modules/core/include/opencv2/core/cuda/simd_functions.hpp
+++ b/modules/core/include/opencv2/core/cuda/simd_functions.hpp
@@ -123,7 +123,7 @@
   vmin4(a,b)      per-byte unsigned minimum: min(a, b)
 */
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     // 2
 
diff --git a/modules/core/include/opencv2/core/cuda/transform.hpp b/modules/core/include/opencv2/core/cuda/transform.hpp
index 7c82e36469..3d42128966 100644
--- a/modules/core/include/opencv2/core/cuda/transform.hpp
+++ b/modules/core/include/opencv2/core/cuda/transform.hpp
@@ -47,7 +47,7 @@
 #include "utility.hpp"
 #include "detail/transform_detail.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     template <typename T, typename D, typename UnOp, typename Mask>
     static inline void transform(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, const Mask& mask, cudaStream_t stream)
diff --git a/modules/core/include/opencv2/core/cuda/type_traits.hpp b/modules/core/include/opencv2/core/cuda/type_traits.hpp
index 8a58264bfc..4e9dbb7674 100644
--- a/modules/core/include/opencv2/core/cuda/type_traits.hpp
+++ b/modules/core/include/opencv2/core/cuda/type_traits.hpp
@@ -45,7 +45,7 @@
 
 #include "detail/type_traits_detail.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     template <typename T> struct IsSimpleParameter
     {
diff --git a/modules/core/include/opencv2/core/cuda/utility.hpp b/modules/core/include/opencv2/core/cuda/utility.hpp
index 82c61a2014..e193230438 100644
--- a/modules/core/include/opencv2/core/cuda/utility.hpp
+++ b/modules/core/include/opencv2/core/cuda/utility.hpp
@@ -46,7 +46,7 @@
 #include "saturate_cast.hpp"
 #include "datamov_utils.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     #define OPENCV_GPU_LOG_WARP_SIZE        (5)
     #define OPENCV_GPU_WARP_SIZE            (1 << OPENCV_GPU_LOG_WARP_SIZE)
@@ -208,6 +208,6 @@ namespace cv { namespace gpu { namespace cudev
 
         return false;
     }
-}}} // namespace cv { namespace gpu { namespace cudev
+}}} // namespace cv { namespace cuda { namespace cudev
 
 #endif // __OPENCV_GPU_UTILITY_HPP__
diff --git a/modules/core/include/opencv2/core/cuda/vec_distance.hpp b/modules/core/include/opencv2/core/cuda/vec_distance.hpp
index 4b88410207..787127e149 100644
--- a/modules/core/include/opencv2/core/cuda/vec_distance.hpp
+++ b/modules/core/include/opencv2/core/cuda/vec_distance.hpp
@@ -47,7 +47,7 @@
 #include "functional.hpp"
 #include "detail/vec_distance_detail.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     template <typename T> struct L1Dist
     {
@@ -219,6 +219,6 @@ namespace cv { namespace gpu { namespace cudev
 
         U vec1Vals[MAX_LEN / THREAD_DIM];
     };
-}}} // namespace cv { namespace gpu { namespace cudev
+}}} // namespace cv { namespace cuda { namespace cudev
 
 #endif // __OPENCV_GPU_VEC_DISTANCE_HPP__
diff --git a/modules/core/include/opencv2/core/cuda/vec_math.hpp b/modules/core/include/opencv2/core/cuda/vec_math.hpp
index 0dc92c3ef6..87cacf04a1 100644
--- a/modules/core/include/opencv2/core/cuda/vec_math.hpp
+++ b/modules/core/include/opencv2/core/cuda/vec_math.hpp
@@ -46,7 +46,7 @@
 #include "vec_traits.hpp"
 #include "saturate_cast.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
 
 // saturate_cast
@@ -917,6 +917,6 @@ CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, double, double, double)
 
 #undef CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC
 
-}}} // namespace cv { namespace gpu { namespace device
+}}} // namespace cv { namespace cuda { namespace device
 
 #endif // __OPENCV_GPU_VECMATH_HPP__
diff --git a/modules/core/include/opencv2/core/cuda/vec_traits.hpp b/modules/core/include/opencv2/core/cuda/vec_traits.hpp
index 304b05c919..dd069b7698 100644
--- a/modules/core/include/opencv2/core/cuda/vec_traits.hpp
+++ b/modules/core/include/opencv2/core/cuda/vec_traits.hpp
@@ -45,7 +45,7 @@
 
 #include "common.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     template<typename T, int N> struct TypeVec;
 
@@ -275,6 +275,6 @@ namespace cv { namespace gpu { namespace cudev
         static __device__ __host__ __forceinline__ char8 make(schar a0, schar a1, schar a2, schar a3, schar a4, schar a5, schar a6, schar a7) {return make_char8(a0, a1, a2, a3, a4, a5, a6, a7);}
         static __device__ __host__ __forceinline__ char8 make(const schar* v) {return make_char8(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);}
     };
-}}} // namespace cv { namespace gpu { namespace cudev
+}}} // namespace cv { namespace cuda { namespace cudev
 
 #endif // __OPENCV_GPU_VEC_TRAITS_HPP__
diff --git a/modules/core/include/opencv2/core/cuda/warp.hpp b/modules/core/include/opencv2/core/cuda/warp.hpp
index 6d2b7745f1..4fac490544 100644
--- a/modules/core/include/opencv2/core/cuda/warp.hpp
+++ b/modules/core/include/opencv2/core/cuda/warp.hpp
@@ -43,7 +43,7 @@
 #ifndef __OPENCV_GPU_DEVICE_WARP_HPP__
 #define __OPENCV_GPU_DEVICE_WARP_HPP__
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     struct Warp
     {
@@ -126,6 +126,6 @@ namespace cv { namespace gpu { namespace cudev
                 *t = value;
         }
     };
-}}} // namespace cv { namespace gpu { namespace cudev
+}}} // namespace cv { namespace cuda { namespace cudev
 
 #endif /* __OPENCV_GPU_DEVICE_WARP_HPP__ */
diff --git a/modules/core/include/opencv2/core/cuda/warp_reduce.hpp b/modules/core/include/opencv2/core/cuda/warp_reduce.hpp
index 82185e8c0c..3058c47ee7 100644
--- a/modules/core/include/opencv2/core/cuda/warp_reduce.hpp
+++ b/modules/core/include/opencv2/core/cuda/warp_reduce.hpp
@@ -43,7 +43,7 @@
 #ifndef OPENCV_GPU_WARP_REDUCE_HPP__
 #define OPENCV_GPU_WARP_REDUCE_HPP__
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     template <class T>
     __device__ __forceinline__ T warp_reduce(volatile T *ptr , const unsigned int tid = threadIdx.x)
@@ -63,6 +63,6 @@ namespace cv { namespace gpu { namespace cudev
 
         return ptr[tid - lane];
     }
-}}} // namespace cv { namespace gpu { namespace cudev {
+}}} // namespace cv { namespace cuda { namespace cudev {
 
 #endif /* OPENCV_GPU_WARP_REDUCE_HPP__ */
diff --git a/modules/core/include/opencv2/core/cuda/warp_shuffle.hpp b/modules/core/include/opencv2/core/cuda/warp_shuffle.hpp
index aabcacfa4b..a54639ee6e 100644
--- a/modules/core/include/opencv2/core/cuda/warp_shuffle.hpp
+++ b/modules/core/include/opencv2/core/cuda/warp_shuffle.hpp
@@ -43,7 +43,7 @@
 #ifndef __OPENCV_GPU_WARP_SHUFFLE_HPP__
 #define __OPENCV_GPU_WARP_SHUFFLE_HPP__
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     template <typename T>
     __device__ __forceinline__ T shfl(T val, int srcLane, int width = warpSize)
diff --git a/modules/core/include/opencv2/core/gpu.hpp b/modules/core/include/opencv2/core/gpu.hpp
index 9bacfc1a26..9f7636ec7f 100644
--- a/modules/core/include/opencv2/core/gpu.hpp
+++ b/modules/core/include/opencv2/core/gpu.hpp
@@ -51,7 +51,7 @@
 #include "opencv2/core.hpp"
 #include "opencv2/core/gpu_types.hpp"
 
-namespace cv { namespace gpu {
+namespace cv { namespace cuda {
 
 //////////////////////////////// GpuMat ///////////////////////////////
 
@@ -664,12 +664,12 @@ private:
 CV_EXPORTS void printCudaDeviceInfo(int device);
 CV_EXPORTS void printShortCudaDeviceInfo(int device);
 
-}} // namespace cv { namespace gpu {
+}} // namespace cv { namespace cuda {
 
 namespace cv {
 
-template <> CV_EXPORTS void Ptr<cv::gpu::Stream::Impl>::delete_obj();
-template <> CV_EXPORTS void Ptr<cv::gpu::Event::Impl>::delete_obj();
+template <> CV_EXPORTS void Ptr<cv::cuda::Stream::Impl>::delete_obj();
+template <> CV_EXPORTS void Ptr<cv::cuda::Event::Impl>::delete_obj();
 
 }
 
diff --git a/modules/core/include/opencv2/core/gpu.inl.hpp b/modules/core/include/opencv2/core/gpu.inl.hpp
index 13861170cb..599b0c2ae3 100644
--- a/modules/core/include/opencv2/core/gpu.inl.hpp
+++ b/modules/core/include/opencv2/core/gpu.inl.hpp
@@ -46,7 +46,7 @@
 
 #include "opencv2/core/gpu.hpp"
 
-namespace cv { namespace gpu {
+namespace cv { namespace cuda {
 
 //////////////////////////////// GpuMat ///////////////////////////////
 
@@ -587,14 +587,14 @@ bool DeviceInfo::supports(FeatureSet feature_set) const
     return version >= feature_set;
 }
 
-}} // namespace cv { namespace gpu {
+}} // namespace cv { namespace cuda {
 
 //////////////////////////////// Mat ////////////////////////////////
 
 namespace cv {
 
 inline
-Mat::Mat(const gpu::GpuMat& m)
+Mat::Mat(const cuda::GpuMat& m)
     : flags(0), dims(0), rows(0), cols(0), data(0), refcount(0), datastart(0), dataend(0), datalimit(0), allocator(0), size(&rows)
 {
     m.download(*this);
diff --git a/modules/core/include/opencv2/core/gpu_stream_accessor.hpp b/modules/core/include/opencv2/core/gpu_stream_accessor.hpp
index cf7d3c4316..42d510add2 100644
--- a/modules/core/include/opencv2/core/gpu_stream_accessor.hpp
+++ b/modules/core/include/opencv2/core/gpu_stream_accessor.hpp
@@ -57,7 +57,7 @@
 
 namespace cv
 {
-    namespace gpu
+    namespace cuda
     {
         class Stream;
         class Event;
diff --git a/modules/core/include/opencv2/core/gpu_types.hpp b/modules/core/include/opencv2/core/gpu_types.hpp
index 4997034cca..9f2a4697a3 100644
--- a/modules/core/include/opencv2/core/gpu_types.hpp
+++ b/modules/core/include/opencv2/core/gpu_types.hpp
@@ -55,7 +55,7 @@
 
 namespace cv
 {
-    namespace gpu
+    namespace cuda
     {
         // Simple lightweight structures that encapsulates information about an image on device.
         // It is intended to pass to nvcc-compiled code. GpuMat depends on headers that nvcc can't compile
diff --git a/modules/core/include/opencv2/core/mat.hpp b/modules/core/include/opencv2/core/mat.hpp
index c132be9456..8cb27d5de8 100644
--- a/modules/core/include/opencv2/core/mat.hpp
+++ b/modules/core/include/opencv2/core/mat.hpp
@@ -93,14 +93,14 @@ public:
     template<typename _Tp> _InputArray(const _Tp* vec, int n);
     template<typename _Tp, int m, int n> _InputArray(const Matx<_Tp, m, n>& matx);
     _InputArray(const double& val);
-    _InputArray(const gpu::GpuMat& d_mat);
+    _InputArray(const cuda::GpuMat& d_mat);
     _InputArray(const ogl::Buffer& buf);
-    _InputArray(const gpu::CudaMem& cuda_mem);
+    _InputArray(const cuda::CudaMem& cuda_mem);
     template<typename _Tp> _InputArray(const cudev::GpuMat_<_Tp>& m);
 
     virtual Mat getMat(int i=-1) const;
     virtual void getMatVector(std::vector<Mat>& mv) const;
-    virtual gpu::GpuMat getGpuMat() const;
+    virtual cuda::GpuMat getGpuMat() const;
     virtual ogl::Buffer getOGlBuffer() const;
 
     virtual int kind() const;
@@ -142,9 +142,9 @@ public:
     _OutputArray();
     _OutputArray(Mat& m);
     _OutputArray(std::vector<Mat>& vec);
-    _OutputArray(gpu::GpuMat& d_mat);
+    _OutputArray(cuda::GpuMat& d_mat);
     _OutputArray(ogl::Buffer& buf);
-    _OutputArray(gpu::CudaMem& cuda_mem);
+    _OutputArray(cuda::CudaMem& cuda_mem);
     template<typename _Tp> _OutputArray(cudev::GpuMat_<_Tp>& m);
     template<typename _Tp> _OutputArray(std::vector<_Tp>& vec);
     template<typename _Tp> _OutputArray(std::vector<std::vector<_Tp> >& vec);
@@ -155,9 +155,9 @@ public:
 
     _OutputArray(const Mat& m);
     _OutputArray(const std::vector<Mat>& vec);
-    _OutputArray(const gpu::GpuMat& d_mat);
+    _OutputArray(const cuda::GpuMat& d_mat);
     _OutputArray(const ogl::Buffer& buf);
-    _OutputArray(const gpu::CudaMem& cuda_mem);
+    _OutputArray(const cuda::CudaMem& cuda_mem);
     template<typename _Tp> _OutputArray(const cudev::GpuMat_<_Tp>& m);
     template<typename _Tp> _OutputArray(const std::vector<_Tp>& vec);
     template<typename _Tp> _OutputArray(const std::vector<std::vector<_Tp> >& vec);
@@ -170,9 +170,9 @@ public:
     virtual bool fixedType() const;
     virtual bool needed() const;
     virtual Mat& getMatRef(int i=-1) const;
-    virtual gpu::GpuMat& getGpuMatRef() const;
+    virtual cuda::GpuMat& getGpuMatRef() const;
     virtual ogl::Buffer& getOGlBufferRef() const;
-    virtual gpu::CudaMem& getCudaMemRef() const;
+    virtual cuda::CudaMem& getCudaMemRef() const;
     virtual void create(Size sz, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const;
     virtual void create(int rows, int cols, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const;
     virtual void create(int dims, const int* size, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const;
@@ -506,7 +506,7 @@ public:
     //Mat(const void* img, bool copyData=false);
 
     //! download data from GpuMat
-    explicit Mat(const gpu::GpuMat& m);
+    explicit Mat(const cuda::GpuMat& m);
 
     //! destructor - calls release()
     ~Mat();
diff --git a/modules/core/include/opencv2/core/opengl.hpp b/modules/core/include/opencv2/core/opengl.hpp
index a4ee091f7f..751b2dfb23 100644
--- a/modules/core/include/opencv2/core/opengl.hpp
+++ b/modules/core/include/opencv2/core/opengl.hpp
@@ -99,12 +99,12 @@ public:
     //! copy from host/device memory (blocking)
     void copyFrom(InputArray arr, Target target = ARRAY_BUFFER, bool autoRelease = false);
     //! copy from device memory (non blocking)
-    void copyFrom(InputArray arr, gpu::Stream& stream, Target target = ARRAY_BUFFER, bool autoRelease = false);
+    void copyFrom(InputArray arr, cuda::Stream& stream, Target target = ARRAY_BUFFER, bool autoRelease = false);
 
     //! copy to host/device memory (blocking)
     void copyTo(OutputArray arr) const;
     //! copy to device memory (non blocking)
-    void copyTo(OutputArray arr, gpu::Stream& stream) const;
+    void copyTo(OutputArray arr, cuda::Stream& stream) const;
 
     //! create copy of current buffer
     Buffer clone(Target target = ARRAY_BUFFER, bool autoRelease = false) const;
@@ -120,12 +120,12 @@ public:
     void unmapHost();
 
     //! map to device memory (blocking)
-    gpu::GpuMat mapDevice();
+    cuda::GpuMat mapDevice();
     void unmapDevice();
 
     //! map to device memory (non blocking)
-    gpu::GpuMat mapDevice(gpu::Stream& stream);
-    void unmapDevice(gpu::Stream& stream);
+    cuda::GpuMat mapDevice(cuda::Stream& stream);
+    void unmapDevice(cuda::Stream& stream);
 
     int rows() const;
     int cols() const;
@@ -276,7 +276,7 @@ CV_EXPORTS void render(const Arrays& arr, InputArray indices, int mode = POINTS,
 
 }} // namespace cv::ogl
 
-namespace cv { namespace gpu {
+namespace cv { namespace cuda {
 
 //! set a CUDA device to use OpenGL interoperability
 CV_EXPORTS void setGlDevice(int device = 0);
diff --git a/modules/core/include/opencv2/core/private.gpu.hpp b/modules/core/include/opencv2/core/private.gpu.hpp
index d26cd0017f..70279bb02a 100644
--- a/modules/core/include/opencv2/core/private.gpu.hpp
+++ b/modules/core/include/opencv2/core/private.gpu.hpp
@@ -75,7 +75,7 @@
 #  endif
 #endif
 
-namespace cv { namespace gpu {
+namespace cv { namespace cuda {
     CV_EXPORTS cv::String getNppErrorMessage(int code);
     CV_EXPORTS cv::String getCudaDriverApiErrorMessage(int code);
 }}
@@ -88,7 +88,7 @@ static inline void throw_no_cuda() { CV_Error(cv::Error::GpuNotSupported, "The l
 
 static inline void throw_no_cuda() { CV_Error(cv::Error::StsNotImplemented, "The called functionality is disabled for current build or platform"); }
 
-namespace cv { namespace gpu
+namespace cv { namespace cuda
 {
     static inline void checkNppError(int code, const char* file, const int line, const char* func)
     {
@@ -131,11 +131,11 @@ namespace cv { namespace gpu
 }}
 
 #if defined(__GNUC__)
-    #define nppSafeCall(expr)  cv::gpu::checkNppError(expr, __FILE__, __LINE__, __func__)
-    #define cuSafeCall(expr)  cv::gpu::checkCudaDriverApiError(expr, __FILE__, __LINE__, __func__)
+    #define nppSafeCall(expr)  cv::cuda::checkNppError(expr, __FILE__, __LINE__, __func__)
+    #define cuSafeCall(expr)  cv::cuda::checkCudaDriverApiError(expr, __FILE__, __LINE__, __func__)
 #else /* defined(__CUDACC__) || defined(__MSVC__) */
-    #define nppSafeCall(expr)  cv::gpu::checkNppError(expr, __FILE__, __LINE__, "")
-    #define cuSafeCall(expr)  cv::gpu::checkCudaDriverApiError(expr, __FILE__, __LINE__, "")
+    #define nppSafeCall(expr)  cv::cuda::checkNppError(expr, __FILE__, __LINE__, "")
+    #define cuSafeCall(expr)  cv::cuda::checkCudaDriverApiError(expr, __FILE__, __LINE__, "")
 #endif
 
 #endif // HAVE_CUDA
diff --git a/modules/core/src/cuda/gpu_mat.cu b/modules/core/src/cuda/gpu_mat.cu
index 0db1584212..5944c38979 100644
--- a/modules/core/src/cuda/gpu_mat.cu
+++ b/modules/core/src/cuda/gpu_mat.cu
@@ -52,13 +52,13 @@
 #include "opencv2/cudev.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 using namespace cv::cudev;
 
 /////////////////////////////////////////////////////
 /// create
 
-void cv::gpu::GpuMat::create(int _rows, int _cols, int _type)
+void cv::cuda::GpuMat::create(int _rows, int _cols, int _type)
 {
     CV_DbgAssert( _rows >= 0 && _cols >= 0 );
 
@@ -108,7 +108,7 @@ void cv::gpu::GpuMat::create(int _rows, int _cols, int _type)
 /////////////////////////////////////////////////////
 /// release
 
-void cv::gpu::GpuMat::release()
+void cv::cuda::GpuMat::release()
 {
     if (refcount && CV_XADD(refcount, -1) == 1)
     {
@@ -124,7 +124,7 @@ void cv::gpu::GpuMat::release()
 /////////////////////////////////////////////////////
 /// upload
 
-void cv::gpu::GpuMat::upload(InputArray arr)
+void cv::cuda::GpuMat::upload(InputArray arr)
 {
     Mat mat = arr.getMat();
 
@@ -135,7 +135,7 @@ void cv::gpu::GpuMat::upload(InputArray arr)
     CV_CUDEV_SAFE_CALL( cudaMemcpy2D(data, step, mat.data, mat.step, cols * elemSize(), rows, cudaMemcpyHostToDevice) );
 }
 
-void cv::gpu::GpuMat::upload(InputArray arr, Stream& _stream)
+void cv::cuda::GpuMat::upload(InputArray arr, Stream& _stream)
 {
     Mat mat = arr.getMat();
 
@@ -150,7 +150,7 @@ void cv::gpu::GpuMat::upload(InputArray arr, Stream& _stream)
 /////////////////////////////////////////////////////
 /// download
 
-void cv::gpu::GpuMat::download(OutputArray _dst) const
+void cv::cuda::GpuMat::download(OutputArray _dst) const
 {
     CV_DbgAssert( !empty() );
 
@@ -160,7 +160,7 @@ void cv::gpu::GpuMat::download(OutputArray _dst) const
     CV_CUDEV_SAFE_CALL( cudaMemcpy2D(dst.data, dst.step, data, step, cols * elemSize(), rows, cudaMemcpyDeviceToHost) );
 }
 
-void cv::gpu::GpuMat::download(OutputArray _dst, Stream& _stream) const
+void cv::cuda::GpuMat::download(OutputArray _dst, Stream& _stream) const
 {
     CV_DbgAssert( !empty() );
 
@@ -174,7 +174,7 @@ void cv::gpu::GpuMat::download(OutputArray _dst, Stream& _stream) const
 /////////////////////////////////////////////////////
 /// copyTo
 
-void cv::gpu::GpuMat::copyTo(OutputArray _dst) const
+void cv::cuda::GpuMat::copyTo(OutputArray _dst) const
 {
     CV_DbgAssert( !empty() );
 
@@ -184,7 +184,7 @@ void cv::gpu::GpuMat::copyTo(OutputArray _dst) const
     CV_CUDEV_SAFE_CALL( cudaMemcpy2D(dst.data, dst.step, data, step, cols * elemSize(), rows, cudaMemcpyDeviceToDevice) );
 }
 
-void cv::gpu::GpuMat::copyTo(OutputArray _dst, Stream& _stream) const
+void cv::cuda::GpuMat::copyTo(OutputArray _dst, Stream& _stream) const
 {
     CV_DbgAssert( !empty() );
 
@@ -220,7 +220,7 @@ namespace
     }
 }
 
-void cv::gpu::GpuMat::copyTo(OutputArray _dst, InputArray _mask, Stream& stream) const
+void cv::cuda::GpuMat::copyTo(OutputArray _dst, InputArray _mask, Stream& stream) const
 {
     CV_DbgAssert( !empty() );
     CV_DbgAssert( depth() <= CV_64F && channels() <= 4 );
@@ -279,7 +279,7 @@ namespace
     }
 }
 
-GpuMat& cv::gpu::GpuMat::setTo(Scalar value, Stream& stream)
+GpuMat& cv::cuda::GpuMat::setTo(Scalar value, Stream& stream)
 {
     CV_DbgAssert( !empty() );
     CV_DbgAssert( depth() <= CV_64F && channels() <= 4 );
@@ -333,7 +333,7 @@ GpuMat& cv::gpu::GpuMat::setTo(Scalar value, Stream& stream)
     return *this;
 }
 
-GpuMat& cv::gpu::GpuMat::setTo(Scalar value, InputArray _mask, Stream& stream)
+GpuMat& cv::cuda::GpuMat::setTo(Scalar value, InputArray _mask, Stream& stream)
 {
     CV_DbgAssert( !empty() );
     CV_DbgAssert( depth() <= CV_64F && channels() <= 4 );
@@ -412,7 +412,7 @@ namespace
     }
 }
 
-void cv::gpu::GpuMat::convertTo(OutputArray _dst, int rtype, Stream& stream) const
+void cv::cuda::GpuMat::convertTo(OutputArray _dst, int rtype, Stream& stream) const
 {
     if (rtype < 0)
         rtype = type();
@@ -453,7 +453,7 @@ void cv::gpu::GpuMat::convertTo(OutputArray _dst, int rtype, Stream& stream) con
     funcs[sdepth][ddepth](reshape(1), dst.reshape(1), stream);
 }
 
-void cv::gpu::GpuMat::convertTo(OutputArray _dst, int rtype, double alpha, double beta, Stream& stream) const
+void cv::cuda::GpuMat::convertTo(OutputArray _dst, int rtype, double alpha, double beta, Stream& stream) const
 {
     if (rtype < 0)
         rtype = type();
diff --git a/modules/core/src/gpu_cuda_mem.cpp b/modules/core/src/gpu_cuda_mem.cpp
index 52de069cbc..0eb73606aa 100644
--- a/modules/core/src/gpu_cuda_mem.cpp
+++ b/modules/core/src/gpu_cuda_mem.cpp
@@ -44,7 +44,7 @@
 #include "precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 namespace
 {
@@ -57,7 +57,7 @@ namespace
     }
 }
 
-void cv::gpu::CudaMem::create(int rows_, int cols_, int type_)
+void cv::cuda::CudaMem::create(int rows_, int cols_, int type_)
 {
 #ifndef HAVE_CUDA
     (void) rows_;
@@ -121,7 +121,7 @@ void cv::gpu::CudaMem::create(int rows_, int cols_, int type_)
 #endif
 }
 
-CudaMem cv::gpu::CudaMem::reshape(int new_cn, int new_rows) const
+CudaMem cv::cuda::CudaMem::reshape(int new_cn, int new_rows) const
 {
     CudaMem hdr = *this;
 
@@ -164,7 +164,7 @@ CudaMem cv::gpu::CudaMem::reshape(int new_cn, int new_rows) const
     return hdr;
 }
 
-void cv::gpu::CudaMem::release()
+void cv::cuda::CudaMem::release()
 {
 #ifdef HAVE_CUDA
     if (refcount && CV_XADD(refcount, -1) == 1)
@@ -179,7 +179,7 @@ void cv::gpu::CudaMem::release()
 #endif
 }
 
-GpuMat cv::gpu::CudaMem::createGpuMatHeader() const
+GpuMat cv::cuda::CudaMem::createGpuMatHeader() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -194,7 +194,7 @@ GpuMat cv::gpu::CudaMem::createGpuMatHeader() const
 #endif
 }
 
-void cv::gpu::registerPageLocked(Mat& m)
+void cv::cuda::registerPageLocked(Mat& m)
 {
 #ifndef HAVE_CUDA
     (void) m;
@@ -205,7 +205,7 @@ void cv::gpu::registerPageLocked(Mat& m)
 #endif
 }
 
-void cv::gpu::unregisterPageLocked(Mat& m)
+void cv::cuda::unregisterPageLocked(Mat& m)
 {
 #ifndef HAVE_CUDA
     (void) m;
diff --git a/modules/core/src/gpu_info.cpp b/modules/core/src/gpu_info.cpp
index 5a1e567463..5ad33ce8a1 100644
--- a/modules/core/src/gpu_info.cpp
+++ b/modules/core/src/gpu_info.cpp
@@ -43,9 +43,9 @@
 #include "precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
-int cv::gpu::getCudaEnabledDeviceCount()
+int cv::cuda::getCudaEnabledDeviceCount()
 {
 #ifndef HAVE_CUDA
     return 0;
@@ -64,7 +64,7 @@ int cv::gpu::getCudaEnabledDeviceCount()
 #endif
 }
 
-void cv::gpu::setDevice(int device)
+void cv::cuda::setDevice(int device)
 {
 #ifndef HAVE_CUDA
     (void) device;
@@ -74,7 +74,7 @@ void cv::gpu::setDevice(int device)
 #endif
 }
 
-int cv::gpu::getDevice()
+int cv::cuda::getDevice()
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -86,7 +86,7 @@ int cv::gpu::getDevice()
 #endif
 }
 
-void cv::gpu::resetDevice()
+void cv::cuda::resetDevice()
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -95,7 +95,7 @@ void cv::gpu::resetDevice()
 #endif
 }
 
-bool cv::gpu::deviceSupports(FeatureSet feature_set)
+bool cv::cuda::deviceSupports(FeatureSet feature_set)
 {
 #ifndef HAVE_CUDA
     (void) feature_set;
@@ -225,7 +225,7 @@ namespace
 
 #endif
 
-bool cv::gpu::TargetArchs::builtWith(cv::gpu::FeatureSet feature_set)
+bool cv::cuda::TargetArchs::builtWith(cv::cuda::FeatureSet feature_set)
 {
 #ifndef HAVE_CUDA
     (void) feature_set;
@@ -236,7 +236,7 @@ bool cv::gpu::TargetArchs::builtWith(cv::gpu::FeatureSet feature_set)
 #endif
 }
 
-bool cv::gpu::TargetArchs::hasPtx(int major, int minor)
+bool cv::cuda::TargetArchs::hasPtx(int major, int minor)
 {
 #ifndef HAVE_CUDA
     (void) major;
@@ -248,7 +248,7 @@ bool cv::gpu::TargetArchs::hasPtx(int major, int minor)
 #endif
 }
 
-bool cv::gpu::TargetArchs::hasBin(int major, int minor)
+bool cv::cuda::TargetArchs::hasBin(int major, int minor)
 {
 #ifndef HAVE_CUDA
     (void) major;
@@ -260,7 +260,7 @@ bool cv::gpu::TargetArchs::hasBin(int major, int minor)
 #endif
 }
 
-bool cv::gpu::TargetArchs::hasEqualOrLessPtx(int major, int minor)
+bool cv::cuda::TargetArchs::hasEqualOrLessPtx(int major, int minor)
 {
 #ifndef HAVE_CUDA
     (void) major;
@@ -272,7 +272,7 @@ bool cv::gpu::TargetArchs::hasEqualOrLessPtx(int major, int minor)
 #endif
 }
 
-bool cv::gpu::TargetArchs::hasEqualOrGreaterPtx(int major, int minor)
+bool cv::cuda::TargetArchs::hasEqualOrGreaterPtx(int major, int minor)
 {
 #ifndef HAVE_CUDA
     (void) major;
@@ -284,7 +284,7 @@ bool cv::gpu::TargetArchs::hasEqualOrGreaterPtx(int major, int minor)
 #endif
 }
 
-bool cv::gpu::TargetArchs::hasEqualOrGreaterBin(int major, int minor)
+bool cv::cuda::TargetArchs::hasEqualOrGreaterBin(int major, int minor)
 {
 #ifndef HAVE_CUDA
     (void) major;
@@ -345,7 +345,7 @@ namespace
 
 #endif
 
-const char* cv::gpu::DeviceInfo::name() const
+const char* cv::cuda::DeviceInfo::name() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -355,7 +355,7 @@ const char* cv::gpu::DeviceInfo::name() const
 #endif
 }
 
-size_t cv::gpu::DeviceInfo::totalGlobalMem() const
+size_t cv::cuda::DeviceInfo::totalGlobalMem() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -365,7 +365,7 @@ size_t cv::gpu::DeviceInfo::totalGlobalMem() const
 #endif
 }
 
-size_t cv::gpu::DeviceInfo::sharedMemPerBlock() const
+size_t cv::cuda::DeviceInfo::sharedMemPerBlock() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -375,7 +375,7 @@ size_t cv::gpu::DeviceInfo::sharedMemPerBlock() const
 #endif
 }
 
-int cv::gpu::DeviceInfo::regsPerBlock() const
+int cv::cuda::DeviceInfo::regsPerBlock() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -385,7 +385,7 @@ int cv::gpu::DeviceInfo::regsPerBlock() const
 #endif
 }
 
-int cv::gpu::DeviceInfo::warpSize() const
+int cv::cuda::DeviceInfo::warpSize() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -395,7 +395,7 @@ int cv::gpu::DeviceInfo::warpSize() const
 #endif
 }
 
-size_t cv::gpu::DeviceInfo::memPitch() const
+size_t cv::cuda::DeviceInfo::memPitch() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -405,7 +405,7 @@ size_t cv::gpu::DeviceInfo::memPitch() const
 #endif
 }
 
-int cv::gpu::DeviceInfo::maxThreadsPerBlock() const
+int cv::cuda::DeviceInfo::maxThreadsPerBlock() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -415,7 +415,7 @@ int cv::gpu::DeviceInfo::maxThreadsPerBlock() const
 #endif
 }
 
-Vec3i cv::gpu::DeviceInfo::maxThreadsDim() const
+Vec3i cv::cuda::DeviceInfo::maxThreadsDim() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -425,7 +425,7 @@ Vec3i cv::gpu::DeviceInfo::maxThreadsDim() const
 #endif
 }
 
-Vec3i cv::gpu::DeviceInfo::maxGridSize() const
+Vec3i cv::cuda::DeviceInfo::maxGridSize() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -435,7 +435,7 @@ Vec3i cv::gpu::DeviceInfo::maxGridSize() const
 #endif
 }
 
-int cv::gpu::DeviceInfo::clockRate() const
+int cv::cuda::DeviceInfo::clockRate() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -445,7 +445,7 @@ int cv::gpu::DeviceInfo::clockRate() const
 #endif
 }
 
-size_t cv::gpu::DeviceInfo::totalConstMem() const
+size_t cv::cuda::DeviceInfo::totalConstMem() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -455,7 +455,7 @@ size_t cv::gpu::DeviceInfo::totalConstMem() const
 #endif
 }
 
-int cv::gpu::DeviceInfo::majorVersion() const
+int cv::cuda::DeviceInfo::majorVersion() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -465,7 +465,7 @@ int cv::gpu::DeviceInfo::majorVersion() const
 #endif
 }
 
-int cv::gpu::DeviceInfo::minorVersion() const
+int cv::cuda::DeviceInfo::minorVersion() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -475,7 +475,7 @@ int cv::gpu::DeviceInfo::minorVersion() const
 #endif
 }
 
-size_t cv::gpu::DeviceInfo::textureAlignment() const
+size_t cv::cuda::DeviceInfo::textureAlignment() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -485,7 +485,7 @@ size_t cv::gpu::DeviceInfo::textureAlignment() const
 #endif
 }
 
-size_t cv::gpu::DeviceInfo::texturePitchAlignment() const
+size_t cv::cuda::DeviceInfo::texturePitchAlignment() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -495,7 +495,7 @@ size_t cv::gpu::DeviceInfo::texturePitchAlignment() const
 #endif
 }
 
-int cv::gpu::DeviceInfo::multiProcessorCount() const
+int cv::cuda::DeviceInfo::multiProcessorCount() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -505,7 +505,7 @@ int cv::gpu::DeviceInfo::multiProcessorCount() const
 #endif
 }
 
-bool cv::gpu::DeviceInfo::kernelExecTimeoutEnabled() const
+bool cv::cuda::DeviceInfo::kernelExecTimeoutEnabled() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -515,7 +515,7 @@ bool cv::gpu::DeviceInfo::kernelExecTimeoutEnabled() const
 #endif
 }
 
-bool cv::gpu::DeviceInfo::integrated() const
+bool cv::cuda::DeviceInfo::integrated() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -525,7 +525,7 @@ bool cv::gpu::DeviceInfo::integrated() const
 #endif
 }
 
-bool cv::gpu::DeviceInfo::canMapHostMemory() const
+bool cv::cuda::DeviceInfo::canMapHostMemory() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -535,7 +535,7 @@ bool cv::gpu::DeviceInfo::canMapHostMemory() const
 #endif
 }
 
-DeviceInfo::ComputeMode cv::gpu::DeviceInfo::computeMode() const
+DeviceInfo::ComputeMode cv::cuda::DeviceInfo::computeMode() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -553,7 +553,7 @@ DeviceInfo::ComputeMode cv::gpu::DeviceInfo::computeMode() const
 #endif
 }
 
-int cv::gpu::DeviceInfo::maxTexture1D() const
+int cv::cuda::DeviceInfo::maxTexture1D() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -563,7 +563,7 @@ int cv::gpu::DeviceInfo::maxTexture1D() const
 #endif
 }
 
-int cv::gpu::DeviceInfo::maxTexture1DMipmap() const
+int cv::cuda::DeviceInfo::maxTexture1DMipmap() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -578,7 +578,7 @@ int cv::gpu::DeviceInfo::maxTexture1DMipmap() const
 #endif
 }
 
-int cv::gpu::DeviceInfo::maxTexture1DLinear() const
+int cv::cuda::DeviceInfo::maxTexture1DLinear() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -588,7 +588,7 @@ int cv::gpu::DeviceInfo::maxTexture1DLinear() const
 #endif
 }
 
-Vec2i cv::gpu::DeviceInfo::maxTexture2D() const
+Vec2i cv::cuda::DeviceInfo::maxTexture2D() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -598,7 +598,7 @@ Vec2i cv::gpu::DeviceInfo::maxTexture2D() const
 #endif
 }
 
-Vec2i cv::gpu::DeviceInfo::maxTexture2DMipmap() const
+Vec2i cv::cuda::DeviceInfo::maxTexture2DMipmap() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -613,7 +613,7 @@ Vec2i cv::gpu::DeviceInfo::maxTexture2DMipmap() const
 #endif
 }
 
-Vec3i cv::gpu::DeviceInfo::maxTexture2DLinear() const
+Vec3i cv::cuda::DeviceInfo::maxTexture2DLinear() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -623,7 +623,7 @@ Vec3i cv::gpu::DeviceInfo::maxTexture2DLinear() const
 #endif
 }
 
-Vec2i cv::gpu::DeviceInfo::maxTexture2DGather() const
+Vec2i cv::cuda::DeviceInfo::maxTexture2DGather() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -633,7 +633,7 @@ Vec2i cv::gpu::DeviceInfo::maxTexture2DGather() const
 #endif
 }
 
-Vec3i cv::gpu::DeviceInfo::maxTexture3D() const
+Vec3i cv::cuda::DeviceInfo::maxTexture3D() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -643,7 +643,7 @@ Vec3i cv::gpu::DeviceInfo::maxTexture3D() const
 #endif
 }
 
-int cv::gpu::DeviceInfo::maxTextureCubemap() const
+int cv::cuda::DeviceInfo::maxTextureCubemap() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -653,7 +653,7 @@ int cv::gpu::DeviceInfo::maxTextureCubemap() const
 #endif
 }
 
-Vec2i cv::gpu::DeviceInfo::maxTexture1DLayered() const
+Vec2i cv::cuda::DeviceInfo::maxTexture1DLayered() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -663,7 +663,7 @@ Vec2i cv::gpu::DeviceInfo::maxTexture1DLayered() const
 #endif
 }
 
-Vec3i cv::gpu::DeviceInfo::maxTexture2DLayered() const
+Vec3i cv::cuda::DeviceInfo::maxTexture2DLayered() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -673,7 +673,7 @@ Vec3i cv::gpu::DeviceInfo::maxTexture2DLayered() const
 #endif
 }
 
-Vec2i cv::gpu::DeviceInfo::maxTextureCubemapLayered() const
+Vec2i cv::cuda::DeviceInfo::maxTextureCubemapLayered() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -683,7 +683,7 @@ Vec2i cv::gpu::DeviceInfo::maxTextureCubemapLayered() const
 #endif
 }
 
-int cv::gpu::DeviceInfo::maxSurface1D() const
+int cv::cuda::DeviceInfo::maxSurface1D() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -693,7 +693,7 @@ int cv::gpu::DeviceInfo::maxSurface1D() const
 #endif
 }
 
-Vec2i cv::gpu::DeviceInfo::maxSurface2D() const
+Vec2i cv::cuda::DeviceInfo::maxSurface2D() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -703,7 +703,7 @@ Vec2i cv::gpu::DeviceInfo::maxSurface2D() const
 #endif
 }
 
-Vec3i cv::gpu::DeviceInfo::maxSurface3D() const
+Vec3i cv::cuda::DeviceInfo::maxSurface3D() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -713,7 +713,7 @@ Vec3i cv::gpu::DeviceInfo::maxSurface3D() const
 #endif
 }
 
-Vec2i cv::gpu::DeviceInfo::maxSurface1DLayered() const
+Vec2i cv::cuda::DeviceInfo::maxSurface1DLayered() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -723,7 +723,7 @@ Vec2i cv::gpu::DeviceInfo::maxSurface1DLayered() const
 #endif
 }
 
-Vec3i cv::gpu::DeviceInfo::maxSurface2DLayered() const
+Vec3i cv::cuda::DeviceInfo::maxSurface2DLayered() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -733,7 +733,7 @@ Vec3i cv::gpu::DeviceInfo::maxSurface2DLayered() const
 #endif
 }
 
-int cv::gpu::DeviceInfo::maxSurfaceCubemap() const
+int cv::cuda::DeviceInfo::maxSurfaceCubemap() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -743,7 +743,7 @@ int cv::gpu::DeviceInfo::maxSurfaceCubemap() const
 #endif
 }
 
-Vec2i cv::gpu::DeviceInfo::maxSurfaceCubemapLayered() const
+Vec2i cv::cuda::DeviceInfo::maxSurfaceCubemapLayered() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -753,7 +753,7 @@ Vec2i cv::gpu::DeviceInfo::maxSurfaceCubemapLayered() const
 #endif
 }
 
-size_t cv::gpu::DeviceInfo::surfaceAlignment() const
+size_t cv::cuda::DeviceInfo::surfaceAlignment() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -763,7 +763,7 @@ size_t cv::gpu::DeviceInfo::surfaceAlignment() const
 #endif
 }
 
-bool cv::gpu::DeviceInfo::concurrentKernels() const
+bool cv::cuda::DeviceInfo::concurrentKernels() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -773,7 +773,7 @@ bool cv::gpu::DeviceInfo::concurrentKernels() const
 #endif
 }
 
-bool cv::gpu::DeviceInfo::ECCEnabled() const
+bool cv::cuda::DeviceInfo::ECCEnabled() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -783,7 +783,7 @@ bool cv::gpu::DeviceInfo::ECCEnabled() const
 #endif
 }
 
-int cv::gpu::DeviceInfo::pciBusID() const
+int cv::cuda::DeviceInfo::pciBusID() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -793,7 +793,7 @@ int cv::gpu::DeviceInfo::pciBusID() const
 #endif
 }
 
-int cv::gpu::DeviceInfo::pciDeviceID() const
+int cv::cuda::DeviceInfo::pciDeviceID() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -803,7 +803,7 @@ int cv::gpu::DeviceInfo::pciDeviceID() const
 #endif
 }
 
-int cv::gpu::DeviceInfo::pciDomainID() const
+int cv::cuda::DeviceInfo::pciDomainID() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -813,7 +813,7 @@ int cv::gpu::DeviceInfo::pciDomainID() const
 #endif
 }
 
-bool cv::gpu::DeviceInfo::tccDriver() const
+bool cv::cuda::DeviceInfo::tccDriver() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -823,7 +823,7 @@ bool cv::gpu::DeviceInfo::tccDriver() const
 #endif
 }
 
-int cv::gpu::DeviceInfo::asyncEngineCount() const
+int cv::cuda::DeviceInfo::asyncEngineCount() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -833,7 +833,7 @@ int cv::gpu::DeviceInfo::asyncEngineCount() const
 #endif
 }
 
-bool cv::gpu::DeviceInfo::unifiedAddressing() const
+bool cv::cuda::DeviceInfo::unifiedAddressing() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -843,7 +843,7 @@ bool cv::gpu::DeviceInfo::unifiedAddressing() const
 #endif
 }
 
-int cv::gpu::DeviceInfo::memoryClockRate() const
+int cv::cuda::DeviceInfo::memoryClockRate() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -853,7 +853,7 @@ int cv::gpu::DeviceInfo::memoryClockRate() const
 #endif
 }
 
-int cv::gpu::DeviceInfo::memoryBusWidth() const
+int cv::cuda::DeviceInfo::memoryBusWidth() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -863,7 +863,7 @@ int cv::gpu::DeviceInfo::memoryBusWidth() const
 #endif
 }
 
-int cv::gpu::DeviceInfo::l2CacheSize() const
+int cv::cuda::DeviceInfo::l2CacheSize() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -873,7 +873,7 @@ int cv::gpu::DeviceInfo::l2CacheSize() const
 #endif
 }
 
-int cv::gpu::DeviceInfo::maxThreadsPerMultiProcessor() const
+int cv::cuda::DeviceInfo::maxThreadsPerMultiProcessor() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -883,7 +883,7 @@ int cv::gpu::DeviceInfo::maxThreadsPerMultiProcessor() const
 #endif
 }
 
-void cv::gpu::DeviceInfo::queryMemory(size_t& _totalMemory, size_t& _freeMemory) const
+void cv::cuda::DeviceInfo::queryMemory(size_t& _totalMemory, size_t& _freeMemory) const
 {
 #ifndef HAVE_CUDA
     (void) _totalMemory;
@@ -901,7 +901,7 @@ void cv::gpu::DeviceInfo::queryMemory(size_t& _totalMemory, size_t& _freeMemory)
 #endif
 }
 
-bool cv::gpu::DeviceInfo::isCompatible() const
+bool cv::cuda::DeviceInfo::isCompatible() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -951,7 +951,7 @@ namespace
 
 #endif
 
-void cv::gpu::printCudaDeviceInfo(int device)
+void cv::cuda::printCudaDeviceInfo(int device)
 {
 #ifndef HAVE_CUDA
     (void) device;
@@ -1037,7 +1037,7 @@ void cv::gpu::printCudaDeviceInfo(int device)
 #endif
 }
 
-void cv::gpu::printShortCudaDeviceInfo(int device)
+void cv::cuda::printShortCudaDeviceInfo(int device)
 {
 #ifndef HAVE_CUDA
     (void) device;
@@ -1251,7 +1251,7 @@ namespace
 
 #endif
 
-String cv::gpu::getNppErrorMessage(int code)
+String cv::cuda::getNppErrorMessage(int code)
 {
 #ifndef HAVE_CUDA
     (void) code;
@@ -1261,7 +1261,7 @@ String cv::gpu::getNppErrorMessage(int code)
 #endif
 }
 
-String cv::gpu::getCudaDriverApiErrorMessage(int code)
+String cv::cuda::getCudaDriverApiErrorMessage(int code)
 {
 #ifndef HAVE_CUDA
     (void) code;
diff --git a/modules/core/src/gpu_mat.cpp b/modules/core/src/gpu_mat.cpp
index 33a6046fed..2303f11ee8 100644
--- a/modules/core/src/gpu_mat.cpp
+++ b/modules/core/src/gpu_mat.cpp
@@ -44,9 +44,9 @@
 #include "precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
-cv::gpu::GpuMat::GpuMat(int rows_, int cols_, int type_, void* data_, size_t step_) :
+cv::cuda::GpuMat::GpuMat(int rows_, int cols_, int type_, void* data_, size_t step_) :
     flags(Mat::MAGIC_VAL + (type_ & Mat::TYPE_MASK)), rows(rows_), cols(cols_),
     step(step_), data((uchar*)data_), refcount(0),
     datastart((uchar*)data_), dataend((uchar*)data_)
@@ -71,7 +71,7 @@ cv::gpu::GpuMat::GpuMat(int rows_, int cols_, int type_, void* data_, size_t ste
     dataend += step * (rows - 1) + minstep;
 }
 
-cv::gpu::GpuMat::GpuMat(Size size_, int type_, void* data_, size_t step_) :
+cv::cuda::GpuMat::GpuMat(Size size_, int type_, void* data_, size_t step_) :
     flags(Mat::MAGIC_VAL + (type_ & Mat::TYPE_MASK)), rows(size_.height), cols(size_.width),
     step(step_), data((uchar*)data_), refcount(0),
     datastart((uchar*)data_), dataend((uchar*)data_)
@@ -95,7 +95,7 @@ cv::gpu::GpuMat::GpuMat(Size size_, int type_, void* data_, size_t step_) :
     dataend += step * (rows - 1) + minstep;
 }
 
-cv::gpu::GpuMat::GpuMat(const GpuMat& m, Range rowRange_, Range colRange_)
+cv::cuda::GpuMat::GpuMat(const GpuMat& m, Range rowRange_, Range colRange_)
 {
     flags = m.flags;
     step = m.step; refcount = m.refcount;
@@ -136,7 +136,7 @@ cv::gpu::GpuMat::GpuMat(const GpuMat& m, Range rowRange_, Range colRange_)
         rows = cols = 0;
 }
 
-cv::gpu::GpuMat::GpuMat(const GpuMat& m, Rect roi) :
+cv::cuda::GpuMat::GpuMat(const GpuMat& m, Rect roi) :
     flags(m.flags), rows(roi.height), cols(roi.width),
     step(m.step), data(m.data + roi.y*step), refcount(m.refcount),
     datastart(m.datastart), dataend(m.dataend)
@@ -153,7 +153,7 @@ cv::gpu::GpuMat::GpuMat(const GpuMat& m, Rect roi) :
         rows = cols = 0;
 }
 
-GpuMat cv::gpu::GpuMat::reshape(int new_cn, int new_rows) const
+GpuMat cv::cuda::GpuMat::reshape(int new_cn, int new_rows) const
 {
     GpuMat hdr = *this;
 
@@ -196,7 +196,7 @@ GpuMat cv::gpu::GpuMat::reshape(int new_cn, int new_rows) const
     return hdr;
 }
 
-void cv::gpu::GpuMat::locateROI(Size& wholeSize, Point& ofs) const
+void cv::cuda::GpuMat::locateROI(Size& wholeSize, Point& ofs) const
 {
     CV_DbgAssert( step > 0 );
 
@@ -222,7 +222,7 @@ void cv::gpu::GpuMat::locateROI(Size& wholeSize, Point& ofs) const
     wholeSize.width = std::max(static_cast<int>((delta2 - step * (wholeSize.height - 1)) / esz), ofs.x + cols);
 }
 
-GpuMat& cv::gpu::GpuMat::adjustROI(int dtop, int dbottom, int dleft, int dright)
+GpuMat& cv::cuda::GpuMat::adjustROI(int dtop, int dbottom, int dleft, int dright)
 {
     Size wholeSize;
     Point ofs;
@@ -262,7 +262,7 @@ namespace
     }
 }
 
-void cv::gpu::createContinuous(int rows, int cols, int type, OutputArray arr)
+void cv::cuda::createContinuous(int rows, int cols, int type, OutputArray arr)
 {
     switch (arr.kind())
     {
@@ -316,7 +316,7 @@ namespace
     }
 }
 
-void cv::gpu::ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr)
+void cv::cuda::ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr)
 {
     switch (arr.kind())
     {
@@ -337,7 +337,7 @@ void cv::gpu::ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr)
     }
 }
 
-GpuMat cv::gpu::allocMatFromBuf(int rows, int cols, int type, GpuMat& mat)
+GpuMat cv::cuda::allocMatFromBuf(int rows, int cols, int type, GpuMat& mat)
 {
     if (!mat.empty() && mat.type() == type && mat.rows >= rows && mat.cols >= cols)
         return mat(Rect(0, 0, cols, rows));
@@ -347,7 +347,7 @@ GpuMat cv::gpu::allocMatFromBuf(int rows, int cols, int type, GpuMat& mat)
 
 #ifndef HAVE_CUDA
 
-void cv::gpu::GpuMat::create(int _rows, int _cols, int _type)
+void cv::cuda::GpuMat::create(int _rows, int _cols, int _type)
 {
     (void) _rows;
     (void) _cols;
@@ -355,50 +355,50 @@ void cv::gpu::GpuMat::create(int _rows, int _cols, int _type)
     throw_no_cuda();
 }
 
-void cv::gpu::GpuMat::release()
+void cv::cuda::GpuMat::release()
 {
 }
 
-void cv::gpu::GpuMat::upload(InputArray arr)
+void cv::cuda::GpuMat::upload(InputArray arr)
 {
     (void) arr;
     throw_no_cuda();
 }
 
-void cv::gpu::GpuMat::upload(InputArray arr, Stream& _stream)
+void cv::cuda::GpuMat::upload(InputArray arr, Stream& _stream)
 {
     (void) arr;
     (void) _stream;
     throw_no_cuda();
 }
 
-void cv::gpu::GpuMat::download(OutputArray _dst) const
+void cv::cuda::GpuMat::download(OutputArray _dst) const
 {
     (void) _dst;
     throw_no_cuda();
 }
 
-void cv::gpu::GpuMat::download(OutputArray _dst, Stream& _stream) const
+void cv::cuda::GpuMat::download(OutputArray _dst, Stream& _stream) const
 {
     (void) _dst;
     (void) _stream;
     throw_no_cuda();
 }
 
-void cv::gpu::GpuMat::copyTo(OutputArray _dst) const
+void cv::cuda::GpuMat::copyTo(OutputArray _dst) const
 {
     (void) _dst;
     throw_no_cuda();
 }
 
-void cv::gpu::GpuMat::copyTo(OutputArray _dst, Stream& _stream) const
+void cv::cuda::GpuMat::copyTo(OutputArray _dst, Stream& _stream) const
 {
     (void) _dst;
     (void) _stream;
     throw_no_cuda();
 }
 
-void cv::gpu::GpuMat::copyTo(OutputArray _dst, InputArray _mask, Stream& _stream) const
+void cv::cuda::GpuMat::copyTo(OutputArray _dst, InputArray _mask, Stream& _stream) const
 {
     (void) _dst;
     (void) _mask;
@@ -406,7 +406,7 @@ void cv::gpu::GpuMat::copyTo(OutputArray _dst, InputArray _mask, Stream& _stream
     throw_no_cuda();
 }
 
-GpuMat& cv::gpu::GpuMat::setTo(Scalar s, Stream& _stream)
+GpuMat& cv::cuda::GpuMat::setTo(Scalar s, Stream& _stream)
 {
     (void) s;
     (void) _stream;
@@ -414,7 +414,7 @@ GpuMat& cv::gpu::GpuMat::setTo(Scalar s, Stream& _stream)
     return *this;
 }
 
-GpuMat& cv::gpu::GpuMat::setTo(Scalar s, InputArray _mask, Stream& _stream)
+GpuMat& cv::cuda::GpuMat::setTo(Scalar s, InputArray _mask, Stream& _stream)
 {
     (void) s;
     (void) _mask;
@@ -423,7 +423,7 @@ GpuMat& cv::gpu::GpuMat::setTo(Scalar s, InputArray _mask, Stream& _stream)
     return *this;
 }
 
-void cv::gpu::GpuMat::convertTo(OutputArray _dst, int rtype, Stream& _stream) const
+void cv::cuda::GpuMat::convertTo(OutputArray _dst, int rtype, Stream& _stream) const
 {
     (void) _dst;
     (void) rtype;
@@ -431,7 +431,7 @@ void cv::gpu::GpuMat::convertTo(OutputArray _dst, int rtype, Stream& _stream) co
     throw_no_cuda();
 }
 
-void cv::gpu::GpuMat::convertTo(OutputArray _dst, int rtype, double alpha, double beta, Stream& _stream) const
+void cv::cuda::GpuMat::convertTo(OutputArray _dst, int rtype, double alpha, double beta, Stream& _stream) const
 {
     (void) _dst;
     (void) rtype;
diff --git a/modules/core/src/gpu_stream.cpp b/modules/core/src/gpu_stream.cpp
index 879775355c..27fd6628cc 100644
--- a/modules/core/src/gpu_stream.cpp
+++ b/modules/core/src/gpu_stream.cpp
@@ -43,14 +43,14 @@
 #include "precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 ////////////////////////////////////////////////////////////////
 // Stream
 
 #ifndef HAVE_CUDA
 
-class cv::gpu::Stream::Impl
+class cv::cuda::Stream::Impl
 {
 public:
     Impl(void* ptr = 0)
@@ -62,7 +62,7 @@ public:
 
 #else
 
-class cv::gpu::Stream::Impl
+class cv::cuda::Stream::Impl
 {
 public:
     cudaStream_t stream;
@@ -73,29 +73,29 @@ public:
     ~Impl();
 };
 
-cv::gpu::Stream::Impl::Impl() : stream(0)
+cv::cuda::Stream::Impl::Impl() : stream(0)
 {
     cudaSafeCall( cudaStreamCreate(&stream) );
 }
 
-cv::gpu::Stream::Impl::Impl(cudaStream_t stream_) : stream(stream_)
+cv::cuda::Stream::Impl::Impl(cudaStream_t stream_) : stream(stream_)
 {
 }
 
-cv::gpu::Stream::Impl::~Impl()
+cv::cuda::Stream::Impl::~Impl()
 {
     if (stream)
         cudaStreamDestroy(stream);
 }
 
-cudaStream_t cv::gpu::StreamAccessor::getStream(const Stream& stream)
+cudaStream_t cv::cuda::StreamAccessor::getStream(const Stream& stream)
 {
     return stream.impl_->stream;
 }
 
 #endif
 
-cv::gpu::Stream::Stream()
+cv::cuda::Stream::Stream()
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -104,7 +104,7 @@ cv::gpu::Stream::Stream()
 #endif
 }
 
-bool cv::gpu::Stream::queryIfComplete() const
+bool cv::cuda::Stream::queryIfComplete() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -120,7 +120,7 @@ bool cv::gpu::Stream::queryIfComplete() const
 #endif
 }
 
-void cv::gpu::Stream::waitForCompletion()
+void cv::cuda::Stream::waitForCompletion()
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -129,7 +129,7 @@ void cv::gpu::Stream::waitForCompletion()
 #endif
 }
 
-void cv::gpu::Stream::waitEvent(const Event& event)
+void cv::cuda::Stream::waitEvent(const Event& event)
 {
 #ifndef HAVE_CUDA
     (void) event;
@@ -161,7 +161,7 @@ namespace
 
 #endif
 
-void cv::gpu::Stream::enqueueHostCallback(StreamCallback callback, void* userData)
+void cv::cuda::Stream::enqueueHostCallback(StreamCallback callback, void* userData)
 {
 #ifndef HAVE_CUDA
     (void) callback;
@@ -180,13 +180,13 @@ void cv::gpu::Stream::enqueueHostCallback(StreamCallback callback, void* userDat
 #endif
 }
 
-Stream& cv::gpu::Stream::Null()
+Stream& cv::cuda::Stream::Null()
 {
     static Stream s(new Impl(0));
     return s;
 }
 
-cv::gpu::Stream::operator bool_type() const
+cv::cuda::Stream::operator bool_type() const
 {
 #ifndef HAVE_CUDA
     return 0;
@@ -205,7 +205,7 @@ template <> void cv::Ptr<Stream::Impl>::delete_obj()
 
 #ifndef HAVE_CUDA
 
-class cv::gpu::Event::Impl
+class cv::cuda::Event::Impl
 {
 public:
     Impl(unsigned int)
@@ -216,7 +216,7 @@ public:
 
 #else
 
-class cv::gpu::Event::Impl
+class cv::cuda::Event::Impl
 {
 public:
     cudaEvent_t event;
@@ -225,25 +225,25 @@ public:
     ~Impl();
 };
 
-cv::gpu::Event::Impl::Impl(unsigned int flags) : event(0)
+cv::cuda::Event::Impl::Impl(unsigned int flags) : event(0)
 {
     cudaSafeCall( cudaEventCreateWithFlags(&event, flags) );
 }
 
-cv::gpu::Event::Impl::~Impl()
+cv::cuda::Event::Impl::~Impl()
 {
     if (event)
         cudaEventDestroy(event);
 }
 
-cudaEvent_t cv::gpu::EventAccessor::getEvent(const Event& event)
+cudaEvent_t cv::cuda::EventAccessor::getEvent(const Event& event)
 {
     return event.impl_->event;
 }
 
 #endif
 
-cv::gpu::Event::Event(CreateFlags flags)
+cv::cuda::Event::Event(CreateFlags flags)
 {
 #ifndef HAVE_CUDA
     (void) flags;
@@ -253,7 +253,7 @@ cv::gpu::Event::Event(CreateFlags flags)
 #endif
 }
 
-void cv::gpu::Event::record(Stream& stream)
+void cv::cuda::Event::record(Stream& stream)
 {
 #ifndef HAVE_CUDA
     (void) stream;
@@ -263,7 +263,7 @@ void cv::gpu::Event::record(Stream& stream)
 #endif
 }
 
-bool cv::gpu::Event::queryIfComplete() const
+bool cv::cuda::Event::queryIfComplete() const
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -279,7 +279,7 @@ bool cv::gpu::Event::queryIfComplete() const
 #endif
 }
 
-void cv::gpu::Event::waitForCompletion()
+void cv::cuda::Event::waitForCompletion()
 {
 #ifndef HAVE_CUDA
     throw_no_cuda();
@@ -288,7 +288,7 @@ void cv::gpu::Event::waitForCompletion()
 #endif
 }
 
-float cv::gpu::Event::elapsedTime(const Event& start, const Event& end)
+float cv::cuda::Event::elapsedTime(const Event& start, const Event& end)
 {
 #ifndef HAVE_CUDA
     (void) start;
diff --git a/modules/core/src/matrix.cpp b/modules/core/src/matrix.cpp
index d2032b2e5c..8bfa925574 100644
--- a/modules/core/src/matrix.cpp
+++ b/modules/core/src/matrix.cpp
@@ -943,9 +943,9 @@ _InputArray::_InputArray(const Mat& m) : flags(MAT), obj((void*)&m) {}
 _InputArray::_InputArray(const std::vector<Mat>& vec) : flags(STD_VECTOR_MAT), obj((void*)&vec) {}
 _InputArray::_InputArray(const double& val) : flags(FIXED_TYPE + FIXED_SIZE + MATX + CV_64F), obj((void*)&val), sz(Size(1,1)) {}
 _InputArray::_InputArray(const MatExpr& expr) : flags(FIXED_TYPE + FIXED_SIZE + EXPR), obj((void*)&expr) {}
-_InputArray::_InputArray(const gpu::GpuMat& d_mat) : flags(GPU_MAT), obj((void*)&d_mat) {}
+_InputArray::_InputArray(const cuda::GpuMat& d_mat) : flags(GPU_MAT), obj((void*)&d_mat) {}
 _InputArray::_InputArray(const ogl::Buffer& buf) : flags(OPENGL_BUFFER), obj((void*)&buf) {}
-_InputArray::_InputArray(const gpu::CudaMem& cuda_mem) : flags(CUDA_MEM), obj((void*)&cuda_mem) {}
+_InputArray::_InputArray(const cuda::CudaMem& cuda_mem) : flags(CUDA_MEM), obj((void*)&cuda_mem) {}
 
 _InputArray::~_InputArray() {}
 
@@ -1018,7 +1018,7 @@ Mat _InputArray::getMat(int i) const
     if( k == GPU_MAT )
     {
         CV_Assert( i < 0 );
-        CV_Error(cv::Error::StsNotImplemented, "You should explicitly call download method for gpu::GpuMat object");
+        CV_Error(cv::Error::StsNotImplemented, "You should explicitly call download method for cuda::GpuMat object");
         return Mat();
     }
 
@@ -1027,7 +1027,7 @@ Mat _InputArray::getMat(int i) const
     {
         CV_Assert( i < 0 );
 
-        const gpu::CudaMem* cuda_mem = (const gpu::CudaMem*)obj;
+        const cuda::CudaMem* cuda_mem = (const cuda::CudaMem*)obj;
 
         return cuda_mem->createMatHeader();
     }
@@ -1120,33 +1120,33 @@ void _InputArray::getMatVector(std::vector<Mat>& mv) const
     }
 }
 
-gpu::GpuMat _InputArray::getGpuMat() const
+cuda::GpuMat _InputArray::getGpuMat() const
 {
     int k = kind();
 
     if (k == GPU_MAT)
     {
-        const gpu::GpuMat* d_mat = (const gpu::GpuMat*)obj;
+        const cuda::GpuMat* d_mat = (const cuda::GpuMat*)obj;
         return *d_mat;
     }
 
     if (k == CUDA_MEM)
     {
-        const gpu::CudaMem* cuda_mem = (const gpu::CudaMem*)obj;
+        const cuda::CudaMem* cuda_mem = (const cuda::CudaMem*)obj;
         return cuda_mem->createGpuMatHeader();
     }
 
     if (k == OPENGL_BUFFER)
     {
         CV_Error(cv::Error::StsNotImplemented, "You should explicitly call mapDevice/unmapDevice methods for ogl::Buffer object");
-        return gpu::GpuMat();
+        return cuda::GpuMat();
     }
 
     if (k == NONE)
-        return gpu::GpuMat();
+        return cuda::GpuMat();
 
-    CV_Error(cv::Error::StsNotImplemented, "getGpuMat is available only for gpu::GpuMat and gpu::CudaMem");
-    return gpu::GpuMat();
+    CV_Error(cv::Error::StsNotImplemented, "getGpuMat is available only for cuda::GpuMat and cuda::CudaMem");
+    return cuda::GpuMat();
 }
 
 ogl::Buffer _InputArray::getOGlBuffer() const
@@ -1230,7 +1230,7 @@ Size _InputArray::size(int i) const
     if( k == GPU_MAT )
     {
         CV_Assert( i < 0 );
-        const gpu::GpuMat* d_mat = (const gpu::GpuMat*)obj;
+        const cuda::GpuMat* d_mat = (const cuda::GpuMat*)obj;
         return d_mat->size();
     }
 
@@ -1243,7 +1243,7 @@ Size _InputArray::size(int i) const
     //if( k == CUDA_MEM )
     {
         CV_Assert( i < 0 );
-        const gpu::CudaMem* cuda_mem = (const gpu::CudaMem*)obj;
+        const cuda::CudaMem* cuda_mem = (const cuda::CudaMem*)obj;
         return cuda_mem->size();
     }
 }
@@ -1299,11 +1299,11 @@ int _InputArray::type(int i) const
         return ((const ogl::Buffer*)obj)->type();
 
     if( k == GPU_MAT )
-        return ((const gpu::GpuMat*)obj)->type();
+        return ((const cuda::GpuMat*)obj)->type();
 
     CV_Assert( k == CUDA_MEM );
     //if( k == CUDA_MEM )
-        return ((const gpu::CudaMem*)obj)->type();
+        return ((const cuda::CudaMem*)obj)->type();
 }
 
 int _InputArray::depth(int i) const
@@ -1359,26 +1359,26 @@ bool _InputArray::empty() const
     }
 
     if( k == GPU_MAT )
-        return ((const gpu::GpuMat*)obj)->empty();
+        return ((const cuda::GpuMat*)obj)->empty();
 
     CV_Assert( k == CUDA_MEM );
     //if( k == CUDA_MEM )
-        return ((const gpu::CudaMem*)obj)->empty();
+        return ((const cuda::CudaMem*)obj)->empty();
 }
 
 
 _OutputArray::_OutputArray() {}
 _OutputArray::_OutputArray(Mat& m) : _InputArray(m) {}
 _OutputArray::_OutputArray(std::vector<Mat>& vec) : _InputArray(vec) {}
-_OutputArray::_OutputArray(gpu::GpuMat& d_mat) : _InputArray(d_mat) {}
+_OutputArray::_OutputArray(cuda::GpuMat& d_mat) : _InputArray(d_mat) {}
 _OutputArray::_OutputArray(ogl::Buffer& buf) : _InputArray(buf) {}
-_OutputArray::_OutputArray(gpu::CudaMem& cuda_mem) : _InputArray(cuda_mem) {}
+_OutputArray::_OutputArray(cuda::CudaMem& cuda_mem) : _InputArray(cuda_mem) {}
 
 _OutputArray::_OutputArray(const Mat& m) : _InputArray(m) {flags |= FIXED_SIZE|FIXED_TYPE;}
 _OutputArray::_OutputArray(const std::vector<Mat>& vec) : _InputArray(vec) {flags |= FIXED_SIZE;}
-_OutputArray::_OutputArray(const gpu::GpuMat& d_mat) : _InputArray(d_mat) {flags |= FIXED_SIZE|FIXED_TYPE;}
+_OutputArray::_OutputArray(const cuda::GpuMat& d_mat) : _InputArray(d_mat) {flags |= FIXED_SIZE|FIXED_TYPE;}
 _OutputArray::_OutputArray(const ogl::Buffer& buf) : _InputArray(buf) {flags |= FIXED_SIZE|FIXED_TYPE;}
-_OutputArray::_OutputArray(const gpu::CudaMem& cuda_mem) : _InputArray(cuda_mem) {flags |= FIXED_SIZE|FIXED_TYPE;}
+_OutputArray::_OutputArray(const cuda::CudaMem& cuda_mem) : _InputArray(cuda_mem) {flags |= FIXED_SIZE|FIXED_TYPE;}
 
 _OutputArray::~_OutputArray() {}
 
@@ -1404,9 +1404,9 @@ void _OutputArray::create(Size _sz, int mtype, int i, bool allowTransposed, int
     }
     if( k == GPU_MAT && i < 0 && !allowTransposed && fixedDepthMask == 0 )
     {
-        CV_Assert(!fixedSize() || ((gpu::GpuMat*)obj)->size() == _sz);
-        CV_Assert(!fixedType() || ((gpu::GpuMat*)obj)->type() == mtype);
-        ((gpu::GpuMat*)obj)->create(_sz, mtype);
+        CV_Assert(!fixedSize() || ((cuda::GpuMat*)obj)->size() == _sz);
+        CV_Assert(!fixedType() || ((cuda::GpuMat*)obj)->type() == mtype);
+        ((cuda::GpuMat*)obj)->create(_sz, mtype);
         return;
     }
     if( k == OPENGL_BUFFER && i < 0 && !allowTransposed && fixedDepthMask == 0 )
@@ -1418,9 +1418,9 @@ void _OutputArray::create(Size _sz, int mtype, int i, bool allowTransposed, int
     }
     if( k == CUDA_MEM && i < 0 && !allowTransposed && fixedDepthMask == 0 )
     {
-        CV_Assert(!fixedSize() || ((gpu::CudaMem*)obj)->size() == _sz);
-        CV_Assert(!fixedType() || ((gpu::CudaMem*)obj)->type() == mtype);
-        ((gpu::CudaMem*)obj)->create(_sz, mtype);
+        CV_Assert(!fixedSize() || ((cuda::CudaMem*)obj)->size() == _sz);
+        CV_Assert(!fixedType() || ((cuda::CudaMem*)obj)->type() == mtype);
+        ((cuda::CudaMem*)obj)->create(_sz, mtype);
         return;
     }
     int sizes[] = {_sz.height, _sz.width};
@@ -1439,9 +1439,9 @@ void _OutputArray::create(int rows, int cols, int mtype, int i, bool allowTransp
     }
     if( k == GPU_MAT && i < 0 && !allowTransposed && fixedDepthMask == 0 )
     {
-        CV_Assert(!fixedSize() || ((gpu::GpuMat*)obj)->size() == Size(cols, rows));
-        CV_Assert(!fixedType() || ((gpu::GpuMat*)obj)->type() == mtype);
-        ((gpu::GpuMat*)obj)->create(rows, cols, mtype);
+        CV_Assert(!fixedSize() || ((cuda::GpuMat*)obj)->size() == Size(cols, rows));
+        CV_Assert(!fixedType() || ((cuda::GpuMat*)obj)->type() == mtype);
+        ((cuda::GpuMat*)obj)->create(rows, cols, mtype);
         return;
     }
     if( k == OPENGL_BUFFER && i < 0 && !allowTransposed && fixedDepthMask == 0 )
@@ -1453,9 +1453,9 @@ void _OutputArray::create(int rows, int cols, int mtype, int i, bool allowTransp
     }
     if( k == CUDA_MEM && i < 0 && !allowTransposed && fixedDepthMask == 0 )
     {
-        CV_Assert(!fixedSize() || ((gpu::CudaMem*)obj)->size() == Size(cols, rows));
-        CV_Assert(!fixedType() || ((gpu::CudaMem*)obj)->type() == mtype);
-        ((gpu::CudaMem*)obj)->create(rows, cols, mtype);
+        CV_Assert(!fixedSize() || ((cuda::CudaMem*)obj)->size() == Size(cols, rows));
+        CV_Assert(!fixedType() || ((cuda::CudaMem*)obj)->type() == mtype);
+        ((cuda::CudaMem*)obj)->create(rows, cols, mtype);
         return;
     }
     int sizes[] = {rows, cols};
@@ -1678,13 +1678,13 @@ void _OutputArray::release() const
 
     if( k == GPU_MAT )
     {
-        ((gpu::GpuMat*)obj)->release();
+        ((cuda::GpuMat*)obj)->release();
         return;
     }
 
     if( k == CUDA_MEM )
     {
-        ((gpu::CudaMem*)obj)->release();
+        ((cuda::CudaMem*)obj)->release();
         return;
     }
 
@@ -1757,11 +1757,11 @@ Mat& _OutputArray::getMatRef(int i) const
     }
 }
 
-gpu::GpuMat& _OutputArray::getGpuMatRef() const
+cuda::GpuMat& _OutputArray::getGpuMatRef() const
 {
     int k = kind();
     CV_Assert( k == GPU_MAT );
-    return *(gpu::GpuMat*)obj;
+    return *(cuda::GpuMat*)obj;
 }
 
 ogl::Buffer& _OutputArray::getOGlBufferRef() const
@@ -1771,11 +1771,11 @@ ogl::Buffer& _OutputArray::getOGlBufferRef() const
     return *(ogl::Buffer*)obj;
 }
 
-gpu::CudaMem& _OutputArray::getCudaMemRef() const
+cuda::CudaMem& _OutputArray::getCudaMemRef() const
 {
     int k = kind();
     CV_Assert( k == CUDA_MEM );
-    return *(gpu::CudaMem*)obj;
+    return *(cuda::CudaMem*)obj;
 }
 
 static _OutputArray _none;
diff --git a/modules/core/src/opengl.cpp b/modules/core/src/opengl.cpp
index f8a647e8ec..37a78a8705 100644
--- a/modules/core/src/opengl.cpp
+++ b/modules/core/src/opengl.cpp
@@ -50,7 +50,7 @@
 #endif
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 namespace
 {
@@ -122,7 +122,7 @@ namespace
 ////////////////////////////////////////////////////////////////////////
 // setGlDevice
 
-void cv::gpu::setGlDevice(int device)
+void cv::cuda::setGlDevice(int device)
 {
 #ifndef HAVE_OPENGL
     (void) device;
@@ -627,7 +627,7 @@ void cv::ogl::Buffer::copyFrom(InputArray arr, Target target, bool autoRelease)
 #endif
 }
 
-void cv::ogl::Buffer::copyFrom(InputArray arr, gpu::Stream& stream, Target target, bool autoRelease)
+void cv::ogl::Buffer::copyFrom(InputArray arr, cuda::Stream& stream, Target target, bool autoRelease)
 {
 #ifndef HAVE_OPENGL
     (void) arr;
@@ -647,7 +647,7 @@ void cv::ogl::Buffer::copyFrom(InputArray arr, gpu::Stream& stream, Target targe
 
         create(dmat.size(), dmat.type(), target, autoRelease);
 
-        impl_->copyFrom(dmat.data, dmat.step, dmat.cols * dmat.elemSize(), dmat.rows, gpu::StreamAccessor::getStream(stream));
+        impl_->copyFrom(dmat.data, dmat.step, dmat.cols * dmat.elemSize(), dmat.rows, cuda::StreamAccessor::getStream(stream));
     #endif
 #endif
 }
@@ -692,7 +692,7 @@ void cv::ogl::Buffer::copyTo(OutputArray arr) const
 #endif
 }
 
-void cv::ogl::Buffer::copyTo(OutputArray arr, gpu::Stream& stream) const
+void cv::ogl::Buffer::copyTo(OutputArray arr, cuda::Stream& stream) const
 {
 #ifndef HAVE_OPENGL
     (void) arr;
@@ -706,7 +706,7 @@ void cv::ogl::Buffer::copyTo(OutputArray arr, gpu::Stream& stream) const
     #else
         arr.create(rows_, cols_, type_);
         GpuMat dmat = arr.getGpuMat();
-        impl_->copyTo(dmat.data, dmat.step, dmat.cols * dmat.elemSize(), dmat.rows, gpu::StreamAccessor::getStream(stream));
+        impl_->copyTo(dmat.data, dmat.step, dmat.cols * dmat.elemSize(), dmat.rows, cuda::StreamAccessor::getStream(stream));
     #endif
 #endif
 }
@@ -794,7 +794,7 @@ void cv::ogl::Buffer::unmapDevice()
 #endif
 }
 
-gpu::GpuMat cv::ogl::Buffer::mapDevice(gpu::Stream& stream)
+cuda::GpuMat cv::ogl::Buffer::mapDevice(cuda::Stream& stream)
 {
 #ifndef HAVE_OPENGL
     (void) stream;
@@ -806,12 +806,12 @@ gpu::GpuMat cv::ogl::Buffer::mapDevice(gpu::Stream& stream)
         throw_no_cuda();
         return GpuMat();
     #else
-        return GpuMat(rows_, cols_, type_, impl_->mapDevice(gpu::StreamAccessor::getStream(stream)));
+        return GpuMat(rows_, cols_, type_, impl_->mapDevice(cuda::StreamAccessor::getStream(stream)));
     #endif
 #endif
 }
 
-void cv::ogl::Buffer::unmapDevice(gpu::Stream& stream)
+void cv::ogl::Buffer::unmapDevice(cuda::Stream& stream)
 {
 #ifndef HAVE_OPENGL
     (void) stream;
@@ -821,7 +821,7 @@ void cv::ogl::Buffer::unmapDevice(gpu::Stream& stream)
         (void) stream;
         throw_no_cuda();
     #else
-        impl_->unmapDevice(gpu::StreamAccessor::getStream(stream));
+        impl_->unmapDevice(cuda::StreamAccessor::getStream(stream));
     #endif
 #endif
 }
diff --git a/modules/cudev/include/opencv2/cudev/common.hpp b/modules/cudev/include/opencv2/cudev/common.hpp
index be79901729..ce2048cede 100644
--- a/modules/cudev/include/opencv2/cudev/common.hpp
+++ b/modules/cudev/include/opencv2/cudev/common.hpp
@@ -52,7 +52,7 @@
 
 namespace cv { namespace cudev {
 
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 // CV_CUDEV_ARCH
 
diff --git a/modules/cudev/test/test_arithm_func.cu b/modules/cudev/test/test_arithm_func.cu
index bb73b04529..c9bc0d8f35 100644
--- a/modules/cudev/test/test_arithm_func.cu
+++ b/modules/cudev/test/test_arithm_func.cu
@@ -44,7 +44,7 @@
 #include "test_precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 using namespace cv::cudev;
 using namespace cvtest;
 
diff --git a/modules/cudev/test/test_arithm_op.cu b/modules/cudev/test/test_arithm_op.cu
index a904c54c7f..d4dca64d7c 100644
--- a/modules/cudev/test/test_arithm_op.cu
+++ b/modules/cudev/test/test_arithm_op.cu
@@ -44,7 +44,7 @@
 #include "test_precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 using namespace cv::cudev;
 using namespace cvtest;
 
diff --git a/modules/cudev/test/test_bitwize_op.cu b/modules/cudev/test/test_bitwize_op.cu
index 908d46d66a..6936f57485 100644
--- a/modules/cudev/test/test_bitwize_op.cu
+++ b/modules/cudev/test/test_bitwize_op.cu
@@ -44,7 +44,7 @@
 #include "test_precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 using namespace cv::cudev;
 using namespace cvtest;
 
diff --git a/modules/cudev/test/test_cmp_op.cu b/modules/cudev/test/test_cmp_op.cu
index 4d557b6528..19933723da 100644
--- a/modules/cudev/test/test_cmp_op.cu
+++ b/modules/cudev/test/test_cmp_op.cu
@@ -44,7 +44,7 @@
 #include "test_precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 using namespace cv::cudev;
 using namespace cvtest;
 
diff --git a/modules/cudev/test/test_color_cvt.cu b/modules/cudev/test/test_color_cvt.cu
index 70d904b49e..62cd49ca6b 100644
--- a/modules/cudev/test/test_color_cvt.cu
+++ b/modules/cudev/test/test_color_cvt.cu
@@ -44,7 +44,7 @@
 #include "test_precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 using namespace cv::cudev;
 using namespace cvtest;
 
diff --git a/modules/cudev/test/test_cvt.cu b/modules/cudev/test/test_cvt.cu
index bdfa493bdc..b1c3d10f66 100644
--- a/modules/cudev/test/test_cvt.cu
+++ b/modules/cudev/test/test_cvt.cu
@@ -44,7 +44,7 @@
 #include "test_precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 using namespace cv::cudev;
 using namespace cvtest;
 
diff --git a/modules/cudev/test/test_deriv.cu b/modules/cudev/test/test_deriv.cu
index 8ef9fb76aa..2001b7fdee 100644
--- a/modules/cudev/test/test_deriv.cu
+++ b/modules/cudev/test/test_deriv.cu
@@ -44,7 +44,7 @@
 #include "test_precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 using namespace cv::cudev;
 using namespace cvtest;
 
diff --git a/modules/cudev/test/test_integral.cu b/modules/cudev/test/test_integral.cu
index 190fc354ac..3c34ffcc05 100644
--- a/modules/cudev/test/test_integral.cu
+++ b/modules/cudev/test/test_integral.cu
@@ -44,7 +44,7 @@
 #include "test_precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 using namespace cv::cudev;
 using namespace cvtest;
 
diff --git a/modules/cudev/test/test_lut.cu b/modules/cudev/test/test_lut.cu
index d2548ec24b..62c3129a98 100644
--- a/modules/cudev/test/test_lut.cu
+++ b/modules/cudev/test/test_lut.cu
@@ -44,7 +44,7 @@
 #include "test_precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 using namespace cv::cudev;
 using namespace cvtest;
 
diff --git a/modules/cudev/test/test_pyramids.cu b/modules/cudev/test/test_pyramids.cu
index c196c923ca..28678b8d62 100644
--- a/modules/cudev/test/test_pyramids.cu
+++ b/modules/cudev/test/test_pyramids.cu
@@ -44,7 +44,7 @@
 #include "test_precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 using namespace cv::cudev;
 using namespace cvtest;
 
diff --git a/modules/cudev/test/test_reduction.cu b/modules/cudev/test/test_reduction.cu
index 22cadbebc4..03c78def15 100644
--- a/modules/cudev/test/test_reduction.cu
+++ b/modules/cudev/test/test_reduction.cu
@@ -44,7 +44,7 @@
 #include "test_precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 using namespace cv::cudev;
 using namespace cvtest;
 
diff --git a/modules/cudev/test/test_split_merge.cu b/modules/cudev/test/test_split_merge.cu
index 3af24f5882..b25c8b96d6 100644
--- a/modules/cudev/test/test_split_merge.cu
+++ b/modules/cudev/test/test_split_merge.cu
@@ -44,7 +44,7 @@
 #include "test_precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 using namespace cv::cudev;
 using namespace cvtest;
 
diff --git a/modules/cudev/test/test_warp.cu b/modules/cudev/test/test_warp.cu
index 8777867bfc..eda1694860 100644
--- a/modules/cudev/test/test_warp.cu
+++ b/modules/cudev/test/test_warp.cu
@@ -44,7 +44,7 @@
 #include "test_precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 using namespace cv::cudev;
 using namespace cvtest;
 
diff --git a/modules/cudev/test/transpose.cu b/modules/cudev/test/transpose.cu
index a1e477c225..515eedfc34 100644
--- a/modules/cudev/test/transpose.cu
+++ b/modules/cudev/test/transpose.cu
@@ -44,7 +44,7 @@
 #include "test_precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 using namespace cv::cudev;
 using namespace cvtest;
 
diff --git a/modules/gpu/include/opencv2/gpu.hpp b/modules/gpu/include/opencv2/gpu.hpp
index 1b0120ba25..ec59f57602 100644
--- a/modules/gpu/include/opencv2/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu.hpp
@@ -89,7 +89,7 @@
     #endif
 #endif
 
-namespace cv { namespace gpu {
+namespace cv { namespace cuda {
 
 //////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////
 
@@ -255,6 +255,6 @@ CV_EXPORTS void calcWobbleSuppressionMaps(
         int left, int idx, int right, Size size, const Mat &ml, const Mat &mr,
         GpuMat &mapx, GpuMat &mapy);
 
-}} // namespace cv { namespace gpu {
+}} // namespace cv { namespace cuda {
 
 #endif /* __OPENCV_GPU_HPP__ */
diff --git a/modules/gpu/perf/perf_calib3d.cpp b/modules/gpu/perf/perf_calib3d.cpp
index 185d9cd684..761ff3d1a3 100644
--- a/modules/gpu/perf/perf_calib3d.cpp
+++ b/modules/gpu/perf/perf_calib3d.cpp
@@ -65,10 +65,10 @@ PERF_TEST_P(Count, Calib3D_ProjectPoints,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::projectPoints(d_src, rvec, tvec, camera_mat, cv::Mat(), dst);
+        TEST_CYCLE() cv::cuda::projectPoints(d_src, rvec, tvec, camera_mat, cv::Mat(), dst);
 
         GPU_SANITY_CHECK(dst);
     }
@@ -120,7 +120,7 @@ PERF_TEST_P(Count, Calib3D_SolvePnPRansac,
 
     if (PERF_RUN_GPU())
     {
-        TEST_CYCLE() cv::gpu::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
+        TEST_CYCLE() cv::cuda::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
 
         GPU_SANITY_CHECK(rvec, 1e-3);
         GPU_SANITY_CHECK(tvec, 1e-3);
diff --git a/modules/gpu/perf/perf_labeling.cpp b/modules/gpu/perf/perf_labeling.cpp
index 0484da9d59..e6cd26f07d 100644
--- a/modules/gpu/perf/perf_labeling.cpp
+++ b/modules/gpu/perf/perf_labeling.cpp
@@ -151,10 +151,10 @@ PERF_TEST_P(Image, DISABLED_Labeling_ConnectivityMask,
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_image(image);
-        cv::gpu::GpuMat mask;
+        cv::cuda::GpuMat d_image(image);
+        cv::cuda::GpuMat mask;
 
-        TEST_CYCLE() cv::gpu::connectivityMask(d_image, mask, cv::Scalar::all(0), cv::Scalar::all(2));
+        TEST_CYCLE() cv::cuda::connectivityMask(d_image, mask, cv::Scalar::all(0), cv::Scalar::all(2));
 
         GPU_SANITY_CHECK(mask);
     }
@@ -174,12 +174,12 @@ PERF_TEST_P(Image, DISABLED_Labeling_ConnectedComponents,
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_mask;
-        cv::gpu::connectivityMask(cv::gpu::GpuMat(image), d_mask, cv::Scalar::all(0), cv::Scalar::all(2));
+        cv::cuda::GpuMat d_mask;
+        cv::cuda::connectivityMask(cv::cuda::GpuMat(image), d_mask, cv::Scalar::all(0), cv::Scalar::all(2));
 
-        cv::gpu::GpuMat components;
+        cv::cuda::GpuMat components;
 
-        TEST_CYCLE() cv::gpu::labelComponents(d_mask, components);
+        TEST_CYCLE() cv::cuda::labelComponents(d_mask, components);
 
         GPU_SANITY_CHECK(components);
     }
diff --git a/modules/gpu/perf/perf_matop.cpp b/modules/gpu/perf/perf_matop.cpp
index f80ba1b087..42b3bc1904 100644
--- a/modules/gpu/perf/perf_matop.cpp
+++ b/modules/gpu/perf/perf_matop.cpp
@@ -64,7 +64,7 @@ PERF_TEST_P(Sz_Depth_Cn, MatOp_SetTo,
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat dst(size, type);
+        cv::cuda::GpuMat dst(size, type);
 
         TEST_CYCLE() dst.setTo(val);
 
@@ -102,8 +102,8 @@ PERF_TEST_P(Sz_Depth_Cn, MatOp_SetToMasked,
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat dst(src);
-        const cv::gpu::GpuMat d_mask(mask);
+        cv::cuda::GpuMat dst(src);
+        const cv::cuda::GpuMat d_mask(mask);
 
         TEST_CYCLE() dst.setTo(val, d_mask);
 
@@ -139,9 +139,9 @@ PERF_TEST_P(Sz_Depth_Cn, MatOp_CopyToMasked,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        const cv::gpu::GpuMat d_mask(mask);
-        cv::gpu::GpuMat dst(d_src.size(), d_src.type(), cv::Scalar::all(0));
+        const cv::cuda::GpuMat d_src(src);
+        const cv::cuda::GpuMat d_mask(mask);
+        cv::cuda::GpuMat dst(d_src.size(), d_src.type(), cv::Scalar::all(0));
 
         TEST_CYCLE() d_src.copyTo(dst, d_mask);
 
@@ -179,8 +179,8 @@ PERF_TEST_P(Sz_2Depth, MatOp_ConvertTo,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
         TEST_CYCLE() d_src.convertTo(dst, depth2, a, b);
 
diff --git a/modules/gpu/perf/perf_objdetect.cpp b/modules/gpu/perf/perf_objdetect.cpp
index 1516d6b99b..b2d75c816a 100644
--- a/modules/gpu/perf/perf_objdetect.cpp
+++ b/modules/gpu/perf/perf_objdetect.cpp
@@ -68,11 +68,11 @@ PERF_TEST_P(Image, ObjDetect_HOG,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_img(img);
+        const cv::cuda::GpuMat d_img(img);
         std::vector<cv::Rect> gpu_found_locations;
 
-        cv::gpu::HOGDescriptor d_hog;
-        d_hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
+        cv::cuda::HOGDescriptor d_hog;
+        d_hog.setSVMDetector(cv::cuda::HOGDescriptor::getDefaultPeopleDetector());
 
         TEST_CYCLE() d_hog.detectMultiScale(d_img, gpu_found_locations);
 
@@ -83,7 +83,7 @@ PERF_TEST_P(Image, ObjDetect_HOG,
         std::vector<cv::Rect> cpu_found_locations;
 
         cv::HOGDescriptor hog;
-        hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
+        hog.setSVMDetector(cv::cuda::HOGDescriptor::getDefaultPeopleDetector());
 
         TEST_CYCLE() hog.detectMultiScale(img, cpu_found_locations);
 
@@ -105,11 +105,11 @@ PERF_TEST_P(ImageAndCascade, ObjDetect_HaarClassifier,
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::CascadeClassifier_GPU d_cascade;
+        cv::cuda::CascadeClassifier_GPU d_cascade;
         ASSERT_TRUE(d_cascade.load(perf::TestBase::getDataPath(GetParam().second)));
 
-        const cv::gpu::GpuMat d_img(img);
-        cv::gpu::GpuMat objects_buffer;
+        const cv::cuda::GpuMat d_img(img);
+        cv::cuda::GpuMat objects_buffer;
         int detections_num = 0;
 
         TEST_CYCLE() detections_num = d_cascade.detectMultiScale(d_img, objects_buffer);
@@ -144,11 +144,11 @@ PERF_TEST_P(ImageAndCascade, ObjDetect_LBPClassifier,
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::CascadeClassifier_GPU d_cascade;
+        cv::cuda::CascadeClassifier_GPU d_cascade;
         ASSERT_TRUE(d_cascade.load(perf::TestBase::getDataPath(GetParam().second)));
 
-        const cv::gpu::GpuMat d_img(img);
-        cv::gpu::GpuMat objects_buffer;
+        const cv::cuda::GpuMat d_img(img);
+        cv::cuda::GpuMat objects_buffer;
         int detections_num = 0;
 
         TEST_CYCLE() detections_num = d_cascade.detectMultiScale(d_img, objects_buffer);
diff --git a/modules/gpu/perf4au/main.cpp b/modules/gpu/perf4au/main.cpp
index 707251c42f..02c102c3aa 100644
--- a/modules/gpu/perf4au/main.cpp
+++ b/modules/gpu/perf4au/main.cpp
@@ -80,10 +80,10 @@ PERF_TEST_P(Image, HoughLinesP, testing::Values(std::string("im1_1280x800.jpg"))
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_image(image);
-        cv::gpu::GpuMat d_lines;
+        cv::cuda::GpuMat d_image(image);
+        cv::cuda::GpuMat d_lines;
 
-        cv::Ptr<cv::gpu::HoughSegmentDetector> hough = cv::gpu::createHoughSegmentDetector(rho, theta, minLineLenght, maxLineGap);
+        cv::Ptr<cv::cuda::HoughSegmentDetector> hough = cv::cuda::createHoughSegmentDetector(rho, theta, minLineLenght, maxLineGap);
 
         hough->detect(d_image, d_lines);
 
@@ -144,11 +144,11 @@ PERF_TEST_P(Image_Depth, GoodFeaturesToTrack,
 
     if (PERF_RUN_GPU())
     {
-        cv::Ptr<cv::gpu::CornersDetector> detector = cv::gpu::createGoodFeaturesToTrackDetector(src.type(), maxCorners, qualityLevel, minDistance, blockSize, useHarrisDetector, k);
+        cv::Ptr<cv::cuda::CornersDetector> detector = cv::cuda::createGoodFeaturesToTrackDetector(src.type(), maxCorners, qualityLevel, minDistance, blockSize, useHarrisDetector, k);
 
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_mask(mask);
-        cv::gpu::GpuMat d_pts;
+        cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat d_mask(mask);
+        cv::cuda::GpuMat d_pts;
 
         detector->detect(d_src, d_pts, d_mask);
 
@@ -233,13 +233,13 @@ PERF_TEST_P(ImagePair_Depth_GraySource, OpticalFlowPyrLKSparse,
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src1(src1);
-        cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat d_pts(pts.reshape(2, 1));
-        cv::gpu::GpuMat d_nextPts;
-        cv::gpu::GpuMat d_status;
+        cv::cuda::GpuMat d_src1(src1);
+        cv::cuda::GpuMat d_src2(src2);
+        cv::cuda::GpuMat d_pts(pts.reshape(2, 1));
+        cv::cuda::GpuMat d_nextPts;
+        cv::cuda::GpuMat d_status;
 
-        cv::gpu::PyrLKOpticalFlow d_pyrLK;
+        cv::cuda::PyrLKOpticalFlow d_pyrLK;
         d_pyrLK.winSize = winSize;
         d_pyrLK.maxLevel = maxLevel;
         d_pyrLK.iters = criteria.maxCount;
@@ -311,12 +311,12 @@ PERF_TEST_P(ImagePair_Depth, OpticalFlowFarneback,
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src1(src1);
-        cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat d_u(src1.size(), CV_32FC1, cv::Scalar::all(0));
-        cv::gpu::GpuMat d_v(src1.size(), CV_32FC1, cv::Scalar::all(0));
+        cv::cuda::GpuMat d_src1(src1);
+        cv::cuda::GpuMat d_src2(src2);
+        cv::cuda::GpuMat d_u(src1.size(), CV_32FC1, cv::Scalar::all(0));
+        cv::cuda::GpuMat d_v(src1.size(), CV_32FC1, cv::Scalar::all(0));
 
-        cv::gpu::FarnebackOpticalFlow d_farneback;
+        cv::cuda::FarnebackOpticalFlow d_farneback;
         d_farneback.pyrScale = pyrScale;
         d_farneback.numLevels = numLevels;
         d_farneback.winSize = winSize;
@@ -398,15 +398,15 @@ PERF_TEST_P(ImagePair_BlockSize_ShiftSize_MaxRange, OpticalFlowBM,
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src1(src1);
-        cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat d_velx, d_vely, buf;
+        cv::cuda::GpuMat d_src1(src1);
+        cv::cuda::GpuMat d_src2(src2);
+        cv::cuda::GpuMat d_velx, d_vely, buf;
 
-        cv::gpu::calcOpticalFlowBM(d_src1, d_src2, block_size, shift_size, max_range, false, d_velx, d_vely, buf);
+        cv::cuda::calcOpticalFlowBM(d_src1, d_src2, block_size, shift_size, max_range, false, d_velx, d_vely, buf);
 
         TEST_CYCLE_N(10)
         {
-            cv::gpu::calcOpticalFlowBM(d_src1, d_src2, block_size, shift_size, max_range, false, d_velx, d_vely, buf);
+            cv::cuda::calcOpticalFlowBM(d_src1, d_src2, block_size, shift_size, max_range, false, d_velx, d_vely, buf);
         }
     }
     else
@@ -449,11 +449,11 @@ PERF_TEST_P(ImagePair_BlockSize_ShiftSize_MaxRange, FastOpticalFlowBM,
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_src1(src1);
-        cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat d_velx, d_vely;
+        cv::cuda::GpuMat d_src1(src1);
+        cv::cuda::GpuMat d_src2(src2);
+        cv::cuda::GpuMat d_velx, d_vely;
 
-        cv::gpu::FastOpticalFlowBM fastBM;
+        cv::cuda::FastOpticalFlowBM fastBM;
 
         fastBM(d_src1, d_src2, d_velx, d_vely, max_range.width, block_size.width);
 
diff --git a/modules/gpu/src/calib3d.cpp b/modules/gpu/src/calib3d.cpp
index 631174218c..619a2c1329 100644
--- a/modules/gpu/src/calib3d.cpp
+++ b/modules/gpu/src/calib3d.cpp
@@ -43,19 +43,19 @@
 #include "precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 #if !defined HAVE_CUDA || defined(CUDA_DISABLER)
 
-void cv::gpu::transformPoints(const GpuMat&, const Mat&, const Mat&, GpuMat&, Stream&) { throw_no_cuda(); }
+void cv::cuda::transformPoints(const GpuMat&, const Mat&, const Mat&, GpuMat&, Stream&) { throw_no_cuda(); }
 
-void cv::gpu::projectPoints(const GpuMat&, const Mat&, const Mat&, const Mat&, const Mat&, GpuMat&, Stream&) { throw_no_cuda(); }
+void cv::cuda::projectPoints(const GpuMat&, const Mat&, const Mat&, const Mat&, const Mat&, GpuMat&, Stream&) { throw_no_cuda(); }
 
-void cv::gpu::solvePnPRansac(const Mat&, const Mat&, const Mat&, const Mat&, Mat&, Mat&, bool, int, float, int, std::vector<int>*) { throw_no_cuda(); }
+void cv::cuda::solvePnPRansac(const Mat&, const Mat&, const Mat&, const Mat&, Mat&, Mat&, bool, int, float, int, std::vector<int>*) { throw_no_cuda(); }
 
 #else
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace transform_points
     {
@@ -78,7 +78,7 @@ namespace cv { namespace gpu { namespace cudev
     }
 }}}
 
-using namespace ::cv::gpu::cudev;
+using namespace ::cv::cuda::cudev;
 
 namespace
 {
@@ -97,7 +97,7 @@ namespace
     }
 }
 
-void cv::gpu::transformPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec, GpuMat& dst, Stream& stream)
+void cv::cuda::transformPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec, GpuMat& dst, Stream& stream)
 {
     transformPointsCaller(src, rvec, tvec, dst, StreamAccessor::getStream(stream));
 }
@@ -121,7 +121,7 @@ namespace
     }
 }
 
-void cv::gpu::projectPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec, const Mat& camera_mat, const Mat& dist_coef, GpuMat& dst, Stream& stream)
+void cv::cuda::projectPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec, const Mat& camera_mat, const Mat& dist_coef, GpuMat& dst, Stream& stream)
 {
     projectPointsCaller(src, rvec, tvec, camera_mat, dist_coef, dst, StreamAccessor::getStream(stream));
 }
@@ -208,7 +208,7 @@ namespace
     };
 }
 
-void cv::gpu::solvePnPRansac(const Mat& object, const Mat& image, const Mat& camera_mat,
+void cv::cuda::solvePnPRansac(const Mat& object, const Mat& image, const Mat& camera_mat,
                              const Mat& dist_coef, Mat& rvec, Mat& tvec, bool use_extrinsic_guess,
                              int num_iters, float max_dist, int min_inlier_count,
                              std::vector<int>* inliers)
@@ -252,7 +252,7 @@ void cv::gpu::solvePnPRansac(const Mat& object, const Mat& image, const Mat& cam
     // Find the best hypothesis index
     Point best_idx;
     double best_score;
-    gpu::minMaxLoc(d_hypothesis_scores, NULL, &best_score, NULL, &best_idx);
+    cuda::minMaxLoc(d_hypothesis_scores, NULL, &best_score, NULL, &best_idx);
     int num_inliers = static_cast<int>(best_score);
 
     // Extract the best hypothesis data
diff --git a/modules/gpu/src/cascadeclassifier.cpp b/modules/gpu/src/cascadeclassifier.cpp
index 74867b48dd..84a45b7639 100644
--- a/modules/gpu/src/cascadeclassifier.cpp
+++ b/modules/gpu/src/cascadeclassifier.cpp
@@ -44,23 +44,23 @@
 #include "opencv2/objdetect/objdetect_c.h"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
 
-cv::gpu::CascadeClassifier_GPU::CascadeClassifier_GPU()               { throw_no_cuda(); }
-cv::gpu::CascadeClassifier_GPU::CascadeClassifier_GPU(const String&)  { throw_no_cuda(); }
-cv::gpu::CascadeClassifier_GPU::~CascadeClassifier_GPU()              { throw_no_cuda(); }
-bool cv::gpu::CascadeClassifier_GPU::empty() const                    { throw_no_cuda(); return true; }
-bool cv::gpu::CascadeClassifier_GPU::load(const String&)              { throw_no_cuda(); return true; }
-Size cv::gpu::CascadeClassifier_GPU::getClassifierSize() const        { throw_no_cuda(); return Size();}
-void cv::gpu::CascadeClassifier_GPU::release()                        { throw_no_cuda(); }
-int cv::gpu::CascadeClassifier_GPU::detectMultiScale( const GpuMat&, GpuMat&, double, int, Size)       {throw_no_cuda(); return -1;}
-int cv::gpu::CascadeClassifier_GPU::detectMultiScale( const GpuMat&, GpuMat&, Size, Size, double, int) {throw_no_cuda(); return -1;}
+cv::cuda::CascadeClassifier_GPU::CascadeClassifier_GPU()               { throw_no_cuda(); }
+cv::cuda::CascadeClassifier_GPU::CascadeClassifier_GPU(const String&)  { throw_no_cuda(); }
+cv::cuda::CascadeClassifier_GPU::~CascadeClassifier_GPU()              { throw_no_cuda(); }
+bool cv::cuda::CascadeClassifier_GPU::empty() const                    { throw_no_cuda(); return true; }
+bool cv::cuda::CascadeClassifier_GPU::load(const String&)              { throw_no_cuda(); return true; }
+Size cv::cuda::CascadeClassifier_GPU::getClassifierSize() const        { throw_no_cuda(); return Size();}
+void cv::cuda::CascadeClassifier_GPU::release()                        { throw_no_cuda(); }
+int cv::cuda::CascadeClassifier_GPU::detectMultiScale( const GpuMat&, GpuMat&, double, int, Size)       {throw_no_cuda(); return -1;}
+int cv::cuda::CascadeClassifier_GPU::detectMultiScale( const GpuMat&, GpuMat&, Size, Size, double, int) {throw_no_cuda(); return -1;}
 
 #else
 
-struct cv::gpu::CascadeClassifier_GPU::CascadeClassifierImpl
+struct cv::cuda::CascadeClassifier_GPU::CascadeClassifierImpl
 {
 public:
     CascadeClassifierImpl(){}
@@ -75,7 +75,7 @@ public:
 
 #ifndef HAVE_OPENCV_GPULEGACY
 
-struct cv::gpu::CascadeClassifier_GPU::HaarCascade : cv::gpu::CascadeClassifier_GPU::CascadeClassifierImpl
+struct cv::cuda::CascadeClassifier_GPU::HaarCascade : cv::cuda::CascadeClassifier_GPU::CascadeClassifierImpl
 {
 public:
     HaarCascade()
@@ -104,7 +104,7 @@ public:
 
 #else
 
-struct cv::gpu::CascadeClassifier_GPU::HaarCascade : cv::gpu::CascadeClassifier_GPU::CascadeClassifierImpl
+struct cv::cuda::CascadeClassifier_GPU::HaarCascade : cv::cuda::CascadeClassifier_GPU::CascadeClassifierImpl
 {
 public:
     HaarCascade() : lastAllocatedFrameSize(-1, -1)
@@ -203,7 +203,7 @@ private:
 
     NCVStatus load(const String& classifierFile)
     {
-        int devId = cv::gpu::getDevice();
+        int devId = cv::cuda::getDevice();
         ncvAssertCUDAReturn(cudaGetDeviceProperties(&devProp, devId), NCV_CUDA_ERROR);
 
         // Load the classifier from file (assuming its size is about 1 mb) using a simple allocator
@@ -372,7 +372,7 @@ struct PyrLavel
     cv::Size sWindow;
 };
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace lbp
     {
@@ -398,7 +398,7 @@ namespace cv { namespace gpu { namespace cudev
     }
 }}}
 
-struct cv::gpu::CascadeClassifier_GPU::LbpCascade : cv::gpu::CascadeClassifier_GPU::CascadeClassifierImpl
+struct cv::cuda::CascadeClassifier_GPU::LbpCascade : cv::cuda::CascadeClassifier_GPU::CascadeClassifierImpl
 {
 public:
     struct Stage
@@ -457,8 +457,8 @@ public:
                 GpuMat buff = integralBuffer;
 
                 // generate integral for scale
-                gpu::resize(image, src, level.sFrame, 0, 0, cv::INTER_LINEAR);
-                gpu::integral(src, sint, buff);
+                cuda::resize(image, src, level.sFrame, 0, 0, cv::INTER_LINEAR);
+                cuda::integral(src, sint, buff);
 
                 // calculate job
                 int totalWidth = level.workArea.width / step;
@@ -515,7 +515,7 @@ private:
             roiSize.height = frame.height;
 
             cudaDeviceProp prop;
-            cudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) );
+            cudaSafeCall( cudaGetDeviceProperties(&prop, cv::cuda::getDevice()) );
 
             Ncv32u bufSize;
             ncvSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) );
@@ -694,36 +694,36 @@ private:
     static const int integralFactor = 4;
 };
 
-cv::gpu::CascadeClassifier_GPU::CascadeClassifier_GPU()
+cv::cuda::CascadeClassifier_GPU::CascadeClassifier_GPU()
 : findLargestObject(false), visualizeInPlace(false), impl(0) {}
 
-cv::gpu::CascadeClassifier_GPU::CascadeClassifier_GPU(const String& filename)
+cv::cuda::CascadeClassifier_GPU::CascadeClassifier_GPU(const String& filename)
 : findLargestObject(false), visualizeInPlace(false), impl(0) { load(filename); }
 
-cv::gpu::CascadeClassifier_GPU::~CascadeClassifier_GPU() { release(); }
+cv::cuda::CascadeClassifier_GPU::~CascadeClassifier_GPU() { release(); }
 
-void cv::gpu::CascadeClassifier_GPU::release() { if (impl) { delete impl; impl = 0; } }
+void cv::cuda::CascadeClassifier_GPU::release() { if (impl) { delete impl; impl = 0; } }
 
-bool cv::gpu::CascadeClassifier_GPU::empty() const { return impl == 0; }
+bool cv::cuda::CascadeClassifier_GPU::empty() const { return impl == 0; }
 
-Size cv::gpu::CascadeClassifier_GPU::getClassifierSize() const
+Size cv::cuda::CascadeClassifier_GPU::getClassifierSize() const
 {
     return this->empty() ? Size() : impl->getClassifierCvSize();
 }
 
-int cv::gpu::CascadeClassifier_GPU::detectMultiScale( const GpuMat& image, GpuMat& objectsBuf, double scaleFactor, int minNeighbors, Size minSize)
+int cv::cuda::CascadeClassifier_GPU::detectMultiScale( const GpuMat& image, GpuMat& objectsBuf, double scaleFactor, int minNeighbors, Size minSize)
 {
     CV_Assert( !this->empty());
     return impl->process(image, objectsBuf, (float)scaleFactor, minNeighbors, findLargestObject, visualizeInPlace, minSize, cv::Size());
 }
 
-int cv::gpu::CascadeClassifier_GPU::detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, Size maxObjectSize, Size minSize, double scaleFactor, int minNeighbors)
+int cv::cuda::CascadeClassifier_GPU::detectMultiScale(const GpuMat& image, GpuMat& objectsBuf, Size maxObjectSize, Size minSize, double scaleFactor, int minNeighbors)
 {
     CV_Assert( !this->empty());
     return impl->process(image, objectsBuf, (float)scaleFactor, minNeighbors, findLargestObject, visualizeInPlace, minSize, maxObjectSize);
 }
 
-bool cv::gpu::CascadeClassifier_GPU::load(const String& filename)
+bool cv::cuda::CascadeClassifier_GPU::load(const String& filename)
 {
     release();
 
diff --git a/modules/gpu/src/cuda/calib3d.cu b/modules/gpu/src/cuda/calib3d.cu
index 9adc7806f6..d62aaf8486 100644
--- a/modules/gpu/src/cuda/calib3d.cu
+++ b/modules/gpu/src/cuda/calib3d.cu
@@ -47,7 +47,7 @@
 #include "opencv2/core/cuda/functional.hpp"
 #include "opencv2/core/cuda/reduce.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     #define SOLVE_PNP_RANSAC_MAX_NUM_ITERS 200
 
@@ -79,7 +79,7 @@ namespace cv { namespace gpu { namespace cudev
             cudaSafeCall(cudaMemcpyToSymbol(crot1, rot + 3, sizeof(float) * 3));
             cudaSafeCall(cudaMemcpyToSymbol(crot2, rot + 6, sizeof(float) * 3));
             cudaSafeCall(cudaMemcpyToSymbol(ctransl, transl, sizeof(float) * 3));
-            cv::gpu::cudev::transform(src, dst, TransformOp(), WithOutMask(), stream);
+            cv::cuda::cudev::transform(src, dst, TransformOp(), WithOutMask(), stream);
         }
     } // namespace transform_points
 
@@ -120,7 +120,7 @@ namespace cv { namespace gpu { namespace cudev
             cudaSafeCall(cudaMemcpyToSymbol(ctransl, transl, sizeof(float) * 3));
             cudaSafeCall(cudaMemcpyToSymbol(cproj0, proj, sizeof(float) * 3));
             cudaSafeCall(cudaMemcpyToSymbol(cproj1, proj + 3, sizeof(float) * 3));
-            cv::gpu::cudev::transform(src, dst, ProjectOp(), WithOutMask(), stream);
+            cv::cuda::cudev::transform(src, dst, ProjectOp(), WithOutMask(), stream);
         }
     } // namespace project_points
 
@@ -187,7 +187,7 @@ namespace cv { namespace gpu { namespace cudev
             cudaSafeCall( cudaDeviceSynchronize() );
         }
     } // namespace solvepnp_ransac
-}}} // namespace cv { namespace gpu { namespace cudev
+}}} // namespace cv { namespace cuda { namespace cudev
 
 
 #endif /* CUDA_DISABLER */
diff --git a/modules/gpu/src/cuda/ccomponetns.cu b/modules/gpu/src/cuda/ccomponetns.cu
index 58ceb99cd8..4323919c80 100644
--- a/modules/gpu/src/cuda/ccomponetns.cu
+++ b/modules/gpu/src/cuda/ccomponetns.cu
@@ -50,7 +50,7 @@
 #include <iostream>
 #include <stdio.h>
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace ccl
     {
diff --git a/modules/gpu/src/cuda/global_motion.cu b/modules/gpu/src/cuda/global_motion.cu
index 5685c6750c..c03adc02fc 100644
--- a/modules/gpu/src/cuda/global_motion.cu
+++ b/modules/gpu/src/cuda/global_motion.cu
@@ -47,7 +47,7 @@
 #include <thrust/functional.h>
 #include "opencv2/core/cuda/common.hpp"
 
-namespace cv { namespace gpu { namespace cudev { namespace globmotion {
+namespace cv { namespace cuda { namespace cudev { namespace globmotion {
 
 __constant__ float cml[9];
 __constant__ float cmr[9];
diff --git a/modules/gpu/src/cuda/hog.cu b/modules/gpu/src/cuda/hog.cu
index 48d656a744..0e20fe244b 100644
--- a/modules/gpu/src/cuda/hog.cu
+++ b/modules/gpu/src/cuda/hog.cu
@@ -47,7 +47,7 @@
 #include "opencv2/core/cuda/functional.hpp"
 #include "opencv2/core/cuda/warp_shuffle.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     // Other values are not supported
     #define CELL_WIDTH 8
@@ -808,7 +808,7 @@ namespace cv { namespace gpu { namespace cudev
         void resize_8UC1(const PtrStepSzb& src, PtrStepSzb dst) { resize_for_hog<uchar> (src, dst, resize8UC1_tex); }
         void resize_8UC4(const PtrStepSzb& src, PtrStepSzb dst) { resize_for_hog<uchar4>(src, dst, resize8UC4_tex); }
     } // namespace hog
-}}} // namespace cv { namespace gpu { namespace cudev
+}}} // namespace cv { namespace cuda { namespace cudev
 
 
 #endif /* CUDA_DISABLER */
diff --git a/modules/gpu/src/cuda/lbp.cu b/modules/gpu/src/cuda/lbp.cu
index fb6267f2fe..fada381e46 100644
--- a/modules/gpu/src/cuda/lbp.cu
+++ b/modules/gpu/src/cuda/lbp.cu
@@ -46,7 +46,7 @@
 #include "opencv2/core/cuda/vec_traits.hpp"
 #include "opencv2/core/cuda/saturate_cast.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace lbp
     {
diff --git a/modules/gpu/src/cuda/lbp.hpp b/modules/gpu/src/cuda/lbp.hpp
index 1bfdc610cb..e6bd0b2e9d 100644
--- a/modules/gpu/src/cuda/lbp.hpp
+++ b/modules/gpu/src/cuda/lbp.hpp
@@ -46,7 +46,7 @@
 #include "opencv2/core/cuda/common.hpp"
 #include "opencv2/core/cuda/emulation.hpp"
 
-namespace cv { namespace gpu { namespace cudev {
+namespace cv { namespace cuda { namespace cudev {
 
 namespace lbp {
 
diff --git a/modules/gpu/src/global_motion.cpp b/modules/gpu/src/global_motion.cpp
index a9cc66954d..2650b948a9 100644
--- a/modules/gpu/src/global_motion.cpp
+++ b/modules/gpu/src/global_motion.cpp
@@ -43,17 +43,17 @@
 #include "precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 #if !defined HAVE_CUDA || defined(CUDA_DISABLER)
 
-void cv::gpu::compactPoints(GpuMat&, GpuMat&, const GpuMat&) { throw_no_cuda(); }
-void cv::gpu::calcWobbleSuppressionMaps(
+void cv::cuda::compactPoints(GpuMat&, GpuMat&, const GpuMat&) { throw_no_cuda(); }
+void cv::cuda::calcWobbleSuppressionMaps(
         int, int, int, Size, const Mat&, const Mat&, GpuMat&, GpuMat&) { throw_no_cuda(); }
 
 #else
 
-namespace cv { namespace gpu { namespace cudev { namespace globmotion {
+namespace cv { namespace cuda { namespace cudev { namespace globmotion {
 
     int compactPoints(int N, float *points0, float *points1, const uchar *mask);
 
@@ -63,14 +63,14 @@ namespace cv { namespace gpu { namespace cudev { namespace globmotion {
 
 }}}}
 
-void cv::gpu::compactPoints(GpuMat &points0, GpuMat &points1, const GpuMat &mask)
+void cv::cuda::compactPoints(GpuMat &points0, GpuMat &points1, const GpuMat &mask)
 {
     CV_Assert(points0.rows == 1 && points1.rows == 1 && mask.rows == 1);
     CV_Assert(points0.type() == CV_32FC2 && points1.type() == CV_32FC2 && mask.type() == CV_8U);
     CV_Assert(points0.cols == mask.cols && points1.cols == mask.cols);
 
     int npoints = points0.cols;
-    int remaining = cv::gpu::cudev::globmotion::compactPoints(
+    int remaining = cv::cuda::cudev::globmotion::compactPoints(
             npoints, (float*)points0.data, (float*)points1.data, mask.data);
 
     points0 = points0.colRange(0, remaining);
@@ -78,7 +78,7 @@ void cv::gpu::compactPoints(GpuMat &points0, GpuMat &points1, const GpuMat &mask
 }
 
 
-void cv::gpu::calcWobbleSuppressionMaps(
+void cv::cuda::calcWobbleSuppressionMaps(
         int left, int idx, int right, Size size, const Mat &ml, const Mat &mr,
         GpuMat &mapx, GpuMat &mapy)
 {
@@ -88,7 +88,7 @@ void cv::gpu::calcWobbleSuppressionMaps(
     mapx.create(size, CV_32F);
     mapy.create(size, CV_32F);
 
-    cv::gpu::cudev::globmotion::calcWobbleSuppressionMaps(
+    cv::cuda::cudev::globmotion::calcWobbleSuppressionMaps(
                 left, idx, right, size.width, size.height,
                 ml.ptr<float>(), mr.ptr<float>(), mapx, mapy);
 }
diff --git a/modules/gpu/src/graphcuts.cpp b/modules/gpu/src/graphcuts.cpp
index 40ccd04710..78532f1973 100644
--- a/modules/gpu/src/graphcuts.cpp
+++ b/modules/gpu/src/graphcuts.cpp
@@ -44,15 +44,15 @@
 
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
 
-void cv::gpu::graphcut(GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
-void cv::gpu::graphcut(GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
+void cv::cuda::graphcut(GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
+void cv::cuda::graphcut(GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
 
-void cv::gpu::connectivityMask(const GpuMat&, GpuMat&, const cv::Scalar&, const cv::Scalar&, Stream&) { throw_no_cuda(); }
-void cv::gpu::labelComponents(const GpuMat&, GpuMat&, int, Stream&) { throw_no_cuda(); }
+void cv::cuda::connectivityMask(const GpuMat&, GpuMat&, const cv::Scalar&, const cv::Scalar&, Stream&) { throw_no_cuda(); }
+void cv::cuda::labelComponents(const GpuMat&, GpuMat&, int, Stream&) { throw_no_cuda(); }
 
 #else /* !defined (HAVE_CUDA) */
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace ccl
     {
@@ -68,7 +68,7 @@ static float4 scalarToCudaType(const cv::Scalar& in)
   return make_float4((float)in[0], (float)in[1], (float)in[2], (float)in[3]);
 }
 
-void cv::gpu::connectivityMask(const GpuMat& image, GpuMat& mask, const cv::Scalar& lo, const cv::Scalar& hi, Stream& s)
+void cv::cuda::connectivityMask(const GpuMat& image, GpuMat& mask, const cv::Scalar& lo, const cv::Scalar& hi, Stream& s)
 {
     CV_Assert(!image.empty());
 
@@ -102,7 +102,7 @@ void cv::gpu::connectivityMask(const GpuMat& image, GpuMat& mask, const cv::Scal
     f(image, mask, culo, cuhi, stream);
 }
 
-void cv::gpu::labelComponents(const GpuMat& mask, GpuMat& components, int flags, Stream& s)
+void cv::cuda::labelComponents(const GpuMat& mask, GpuMat& components, int flags, Stream& s)
 {
     CV_Assert(!mask.empty() && mask.type() == CV_8U);
 
@@ -142,7 +142,7 @@ namespace
     };
 }
 
-void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& bottom, GpuMat& labels, GpuMat& buf, Stream& s)
+void cv::cuda::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& bottom, GpuMat& labels, GpuMat& buf, Stream& s)
 {
 #if (CUDA_VERSION < 5000)
     CV_Assert(terminals.type() == CV_32S);
@@ -201,7 +201,7 @@ void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTrans
         cudaSafeCall( cudaDeviceSynchronize() );
 }
 
-void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& topLeft, GpuMat& topRight,
+void cv::cuda::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& topLeft, GpuMat& topRight,
               GpuMat& bottom, GpuMat& bottomLeft, GpuMat& bottomRight, GpuMat& labels, GpuMat& buf, Stream& s)
 {
 #if (CUDA_VERSION < 5000)
diff --git a/modules/gpu/src/hog.cpp b/modules/gpu/src/hog.cpp
index a599fa8f1b..5a84137ec5 100644
--- a/modules/gpu/src/hog.cpp
+++ b/modules/gpu/src/hog.cpp
@@ -44,25 +44,25 @@
 
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
 
-cv::gpu::HOGDescriptor::HOGDescriptor(Size, Size, Size, Size, int, double, double, bool, int) { throw_no_cuda(); }
-size_t cv::gpu::HOGDescriptor::getDescriptorSize() const { throw_no_cuda(); return 0; }
-size_t cv::gpu::HOGDescriptor::getBlockHistogramSize() const { throw_no_cuda(); return 0; }
-double cv::gpu::HOGDescriptor::getWinSigma() const { throw_no_cuda(); return 0; }
-bool cv::gpu::HOGDescriptor::checkDetectorSize() const { throw_no_cuda(); return false; }
-void cv::gpu::HOGDescriptor::setSVMDetector(const std::vector<float>&) { throw_no_cuda(); }
-void cv::gpu::HOGDescriptor::detect(const GpuMat&, std::vector<Point>&, double, Size, Size) { throw_no_cuda(); }
-void cv::gpu::HOGDescriptor::detectMultiScale(const GpuMat&, std::vector<Rect>&, double, Size, Size, double, int) { throw_no_cuda(); }
-void cv::gpu::HOGDescriptor::computeBlockHistograms(const GpuMat&) { throw_no_cuda(); }
-void cv::gpu::HOGDescriptor::getDescriptors(const GpuMat&, Size, GpuMat&, int) { throw_no_cuda(); }
-std::vector<float> cv::gpu::HOGDescriptor::getDefaultPeopleDetector() { throw_no_cuda(); return std::vector<float>(); }
-std::vector<float> cv::gpu::HOGDescriptor::getPeopleDetector48x96() { throw_no_cuda(); return std::vector<float>(); }
-std::vector<float> cv::gpu::HOGDescriptor::getPeopleDetector64x128() { throw_no_cuda(); return std::vector<float>(); }
-void cv::gpu::HOGDescriptor::computeConfidence(const GpuMat&, std::vector<Point>&, double, Size, Size, std::vector<Point>&, std::vector<double>&) { throw_no_cuda(); }
-void cv::gpu::HOGDescriptor::computeConfidenceMultiScale(const GpuMat&, std::vector<Rect>&, double, Size, Size, std::vector<HOGConfidence>&, int) { throw_no_cuda(); }
+cv::cuda::HOGDescriptor::HOGDescriptor(Size, Size, Size, Size, int, double, double, bool, int) { throw_no_cuda(); }
+size_t cv::cuda::HOGDescriptor::getDescriptorSize() const { throw_no_cuda(); return 0; }
+size_t cv::cuda::HOGDescriptor::getBlockHistogramSize() const { throw_no_cuda(); return 0; }
+double cv::cuda::HOGDescriptor::getWinSigma() const { throw_no_cuda(); return 0; }
+bool cv::cuda::HOGDescriptor::checkDetectorSize() const { throw_no_cuda(); return false; }
+void cv::cuda::HOGDescriptor::setSVMDetector(const std::vector<float>&) { throw_no_cuda(); }
+void cv::cuda::HOGDescriptor::detect(const GpuMat&, std::vector<Point>&, double, Size, Size) { throw_no_cuda(); }
+void cv::cuda::HOGDescriptor::detectMultiScale(const GpuMat&, std::vector<Rect>&, double, Size, Size, double, int) { throw_no_cuda(); }
+void cv::cuda::HOGDescriptor::computeBlockHistograms(const GpuMat&) { throw_no_cuda(); }
+void cv::cuda::HOGDescriptor::getDescriptors(const GpuMat&, Size, GpuMat&, int) { throw_no_cuda(); }
+std::vector<float> cv::cuda::HOGDescriptor::getDefaultPeopleDetector() { throw_no_cuda(); return std::vector<float>(); }
+std::vector<float> cv::cuda::HOGDescriptor::getPeopleDetector48x96() { throw_no_cuda(); return std::vector<float>(); }
+std::vector<float> cv::cuda::HOGDescriptor::getPeopleDetector64x128() { throw_no_cuda(); return std::vector<float>(); }
+void cv::cuda::HOGDescriptor::computeConfidence(const GpuMat&, std::vector<Point>&, double, Size, Size, std::vector<Point>&, std::vector<double>&) { throw_no_cuda(); }
+void cv::cuda::HOGDescriptor::computeConfidenceMultiScale(const GpuMat&, std::vector<Rect>&, double, Size, Size, std::vector<HOGConfidence>&, int) { throw_no_cuda(); }
 
 #else
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace hog
     {
@@ -70,8 +70,8 @@ namespace cv { namespace gpu { namespace cudev
                               int nblocks_win_x, int nblocks_win_y);
 
         void compute_hists(int nbins, int block_stride_x, int blovck_stride_y,
-                           int height, int width, const cv::gpu::PtrStepSzf& grad,
-                           const cv::gpu::PtrStepSzb& qangle, float sigma, float* block_hists);
+                           int height, int width, const cv::cuda::PtrStepSzf& grad,
+                           const cv::cuda::PtrStepSzb& qangle, float sigma, float* block_hists);
 
         void normalize_hists(int nbins, int block_stride_x, int block_stride_y,
                              int height, int width, float* block_hists, float threshold);
@@ -87,24 +87,24 @@ namespace cv { namespace gpu { namespace cudev
 
         void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x,
                                     int win_stride_y, int win_stride_x, int height, int width, float* block_hists,
-                                    cv::gpu::PtrStepSzf descriptors);
+                                    cv::cuda::PtrStepSzf descriptors);
         void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x,
                                     int win_stride_y, int win_stride_x, int height, int width, float* block_hists,
-                                    cv::gpu::PtrStepSzf descriptors);
+                                    cv::cuda::PtrStepSzf descriptors);
 
-        void compute_gradients_8UC1(int nbins, int height, int width, const cv::gpu::PtrStepSzb& img,
-                                    float angle_scale, cv::gpu::PtrStepSzf grad, cv::gpu::PtrStepSzb qangle, bool correct_gamma);
-        void compute_gradients_8UC4(int nbins, int height, int width, const cv::gpu::PtrStepSzb& img,
-                                    float angle_scale, cv::gpu::PtrStepSzf grad, cv::gpu::PtrStepSzb qangle, bool correct_gamma);
+        void compute_gradients_8UC1(int nbins, int height, int width, const cv::cuda::PtrStepSzb& img,
+                                    float angle_scale, cv::cuda::PtrStepSzf grad, cv::cuda::PtrStepSzb qangle, bool correct_gamma);
+        void compute_gradients_8UC4(int nbins, int height, int width, const cv::cuda::PtrStepSzb& img,
+                                    float angle_scale, cv::cuda::PtrStepSzf grad, cv::cuda::PtrStepSzb qangle, bool correct_gamma);
 
-        void resize_8UC1(const cv::gpu::PtrStepSzb& src, cv::gpu::PtrStepSzb dst);
-        void resize_8UC4(const cv::gpu::PtrStepSzb& src, cv::gpu::PtrStepSzb dst);
+        void resize_8UC1(const cv::cuda::PtrStepSzb& src, cv::cuda::PtrStepSzb dst);
+        void resize_8UC4(const cv::cuda::PtrStepSzb& src, cv::cuda::PtrStepSzb dst);
     }
 }}}
 
-using namespace ::cv::gpu::cudev;
+using namespace ::cv::cuda::cudev;
 
-cv::gpu::HOGDescriptor::HOGDescriptor(Size win_size_, Size block_size_, Size block_stride_, Size cell_size_,
+cv::cuda::HOGDescriptor::HOGDescriptor(Size win_size_, Size block_size_, Size block_stride_, Size cell_size_,
                                       int nbins_, double win_sigma_, double threshold_L2hys_, bool gamma_correction_, int nlevels_)
         : win_size(win_size_),
           block_size(block_size_),
@@ -132,30 +132,30 @@ cv::gpu::HOGDescriptor::HOGDescriptor(Size win_size_, Size block_size_, Size blo
     hog::set_up_constants(nbins, block_stride.width, block_stride.height, blocks_per_win.width, blocks_per_win.height);
 }
 
-size_t cv::gpu::HOGDescriptor::getDescriptorSize() const
+size_t cv::cuda::HOGDescriptor::getDescriptorSize() const
 {
     return numPartsWithin(win_size, block_size, block_stride).area() * getBlockHistogramSize();
 }
 
-size_t cv::gpu::HOGDescriptor::getBlockHistogramSize() const
+size_t cv::cuda::HOGDescriptor::getBlockHistogramSize() const
 {
     Size cells_per_block = Size(block_size.width / cell_size.width, block_size.height / cell_size.height);
     return (size_t)(nbins * cells_per_block.area());
 }
 
-double cv::gpu::HOGDescriptor::getWinSigma() const
+double cv::cuda::HOGDescriptor::getWinSigma() const
 {
     return win_sigma >= 0 ? win_sigma : (block_size.width + block_size.height) / 8.0;
 }
 
-bool cv::gpu::HOGDescriptor::checkDetectorSize() const
+bool cv::cuda::HOGDescriptor::checkDetectorSize() const
 {
     size_t detector_size = detector.rows * detector.cols;
     size_t descriptor_size = getDescriptorSize();
     return detector_size == 0 || detector_size == descriptor_size || detector_size == descriptor_size + 1;
 }
 
-void cv::gpu::HOGDescriptor::setSVMDetector(const std::vector<float>& _detector)
+void cv::cuda::HOGDescriptor::setSVMDetector(const std::vector<float>& _detector)
 {
     std::vector<float> detector_reordered(_detector.size());
 
@@ -179,7 +179,7 @@ void cv::gpu::HOGDescriptor::setSVMDetector(const std::vector<float>& _detector)
     CV_Assert(checkDetectorSize());
 }
 
-cv::gpu::GpuMat cv::gpu::HOGDescriptor::getBuffer(const Size& sz, int type, GpuMat& buf)
+cv::cuda::GpuMat cv::cuda::HOGDescriptor::getBuffer(const Size& sz, int type, GpuMat& buf)
 {
     if (buf.empty() || buf.type() != type)
         buf.create(sz, type);
@@ -190,13 +190,13 @@ cv::gpu::GpuMat cv::gpu::HOGDescriptor::getBuffer(const Size& sz, int type, GpuM
     return buf(Rect(Point(0,0), sz));
 }
 
-cv::gpu::GpuMat cv::gpu::HOGDescriptor::getBuffer(int rows, int cols, int type, GpuMat& buf)
+cv::cuda::GpuMat cv::cuda::HOGDescriptor::getBuffer(int rows, int cols, int type, GpuMat& buf)
 {
     return getBuffer(Size(cols, rows), type, buf);
 }
 
 
-void cv::gpu::HOGDescriptor::computeGradient(const GpuMat& img, GpuMat& _grad, GpuMat& _qangle)
+void cv::cuda::HOGDescriptor::computeGradient(const GpuMat& img, GpuMat& _grad, GpuMat& _qangle)
 {
     CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4);
 
@@ -219,7 +219,7 @@ void cv::gpu::HOGDescriptor::computeGradient(const GpuMat& img, GpuMat& _grad, G
 }
 
 
-void cv::gpu::HOGDescriptor::computeBlockHistograms(const GpuMat& img)
+void cv::cuda::HOGDescriptor::computeBlockHistograms(const GpuMat& img)
 {
     computeGradient(img, grad, qangle);
 
@@ -237,7 +237,7 @@ void cv::gpu::HOGDescriptor::computeBlockHistograms(const GpuMat& img)
 }
 
 
-void cv::gpu::HOGDescriptor::getDescriptors(const GpuMat& img, Size win_stride, GpuMat& descriptors, int descr_format)
+void cv::cuda::HOGDescriptor::getDescriptors(const GpuMat& img, Size win_stride, GpuMat& descriptors, int descr_format)
 {
     CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0);
 
@@ -264,7 +264,7 @@ void cv::gpu::HOGDescriptor::getDescriptors(const GpuMat& img, Size win_stride,
     }
 }
 
-void cv::gpu::HOGDescriptor::computeConfidence(const GpuMat& img, std::vector<Point>& hits, double hit_threshold,
+void cv::cuda::HOGDescriptor::computeConfidence(const GpuMat& img, std::vector<Point>& hits, double hit_threshold,
                           Size win_stride, Size padding, std::vector<Point>& locations, std::vector<double>& confidences)
 {
   CV_Assert(padding == Size(0, 0));
@@ -307,7 +307,7 @@ void cv::gpu::HOGDescriptor::computeConfidence(const GpuMat& img, std::vector<Po
     }
 }
 
-void cv::gpu::HOGDescriptor::computeConfidenceMultiScale(const GpuMat& img, std::vector<Rect>& found_locations,
+void cv::cuda::HOGDescriptor::computeConfidenceMultiScale(const GpuMat& img, std::vector<Rect>& found_locations,
                             double hit_threshold, Size win_stride, Size padding,
                             std::vector<HOGConfidence> &conf_out, int group_threshold)
 {
@@ -359,7 +359,7 @@ void cv::gpu::HOGDescriptor::computeConfidenceMultiScale(const GpuMat& img, std:
 }
 
 
-void cv::gpu::HOGDescriptor::detect(const GpuMat& img, std::vector<Point>& hits, double hit_threshold, Size win_stride, Size padding)
+void cv::cuda::HOGDescriptor::detect(const GpuMat& img, std::vector<Point>& hits, double hit_threshold, Size win_stride, Size padding)
 {
     CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4);
     CV_Assert(padding == Size(0, 0));
@@ -396,7 +396,7 @@ void cv::gpu::HOGDescriptor::detect(const GpuMat& img, std::vector<Point>& hits,
 
 
 
-void cv::gpu::HOGDescriptor::detectMultiScale(const GpuMat& img, std::vector<Rect>& found_locations, double hit_threshold,
+void cv::cuda::HOGDescriptor::detectMultiScale(const GpuMat& img, std::vector<Rect>& found_locations, double hit_threshold,
                                               Size win_stride, Size padding, double scale0, int group_threshold)
 {
 
@@ -450,22 +450,22 @@ void cv::gpu::HOGDescriptor::detectMultiScale(const GpuMat& img, std::vector<Rec
     groupRectangles(found_locations, group_threshold, 0.2/*magic number copied from CPU version*/);
 }
 
-int cv::gpu::HOGDescriptor::numPartsWithin(int size, int part_size, int stride)
+int cv::cuda::HOGDescriptor::numPartsWithin(int size, int part_size, int stride)
 {
     return (size - part_size + stride) / stride;
 }
 
-cv::Size cv::gpu::HOGDescriptor::numPartsWithin(cv::Size size, cv::Size part_size, cv::Size stride)
+cv::Size cv::cuda::HOGDescriptor::numPartsWithin(cv::Size size, cv::Size part_size, cv::Size stride)
 {
     return Size(numPartsWithin(size.width, part_size.width, stride.width), numPartsWithin(size.height, part_size.height, stride.height));
 }
 
-std::vector<float> cv::gpu::HOGDescriptor::getDefaultPeopleDetector()
+std::vector<float> cv::cuda::HOGDescriptor::getDefaultPeopleDetector()
 {
     return getPeopleDetector64x128();
 }
 
-std::vector<float> cv::gpu::HOGDescriptor::getPeopleDetector48x96()
+std::vector<float> cv::cuda::HOGDescriptor::getPeopleDetector48x96()
 {
     static const float detector[] = {
         0.294350f, -0.098796f, -0.129522f, 0.078753f, 0.387527f, 0.261529f,
@@ -805,7 +805,7 @@ std::vector<float> cv::gpu::HOGDescriptor::getPeopleDetector48x96()
 
 
 
-std::vector<float> cv::gpu::HOGDescriptor::getPeopleDetector64x128()
+std::vector<float> cv::cuda::HOGDescriptor::getPeopleDetector64x128()
 {
     static const float detector[] = {
        0.05359386f, -0.14721455f, -0.05532170f, 0.05077307f,
diff --git a/modules/gpu/test/test_calib3d.cpp b/modules/gpu/test/test_calib3d.cpp
index 3ad19dcbea..12aaf6560b 100644
--- a/modules/gpu/test/test_calib3d.cpp
+++ b/modules/gpu/test/test_calib3d.cpp
@@ -49,15 +49,15 @@ using namespace cvtest;
 ///////////////////////////////////////////////////////////////////////////////////////////////////////
 // transformPoints
 
-struct TransformPoints : testing::TestWithParam<cv::gpu::DeviceInfo>
+struct TransformPoints : testing::TestWithParam<cv::cuda::DeviceInfo>
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
 
     virtual void SetUp()
     {
         devInfo = GetParam();
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -67,8 +67,8 @@ GPU_TEST_P(TransformPoints, Accuracy)
     cv::Mat rvec = randomMat(cv::Size(3, 1), CV_32F, 0, 1);
     cv::Mat tvec = randomMat(cv::Size(3, 1), CV_32F, 0, 1);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::transformPoints(loadMat(src), rvec, tvec, dst);
+    cv::cuda::GpuMat dst;
+    cv::cuda::transformPoints(loadMat(src), rvec, tvec, dst);
 
     ASSERT_EQ(src.size(), dst.size());
     ASSERT_EQ(src.type(), dst.type());
@@ -97,15 +97,15 @@ INSTANTIATE_TEST_CASE_P(GPU_Calib3D, TransformPoints, ALL_DEVICES);
 ///////////////////////////////////////////////////////////////////////////////////////////////////////
 // ProjectPoints
 
-struct ProjectPoints : testing::TestWithParam<cv::gpu::DeviceInfo>
+struct ProjectPoints : testing::TestWithParam<cv::cuda::DeviceInfo>
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
 
     virtual void SetUp()
     {
         devInfo = GetParam();
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -120,8 +120,8 @@ GPU_TEST_P(ProjectPoints, Accuracy)
     camera_mat.at<float>(2, 0) = 0.f;
     camera_mat.at<float>(2, 1) = 0.f;
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::projectPoints(loadMat(src), rvec, tvec, camera_mat, cv::Mat(), dst);
+    cv::cuda::GpuMat dst;
+    cv::cuda::projectPoints(loadMat(src), rvec, tvec, camera_mat, cv::Mat(), dst);
 
     ASSERT_EQ(1, dst.rows);
     ASSERT_EQ(MatType(CV_32FC2), MatType(dst.type()));
@@ -147,15 +147,15 @@ INSTANTIATE_TEST_CASE_P(GPU_Calib3D, ProjectPoints, ALL_DEVICES);
 ///////////////////////////////////////////////////////////////////////////////////////////////////////
 // SolvePnPRansac
 
-struct SolvePnPRansac : testing::TestWithParam<cv::gpu::DeviceInfo>
+struct SolvePnPRansac : testing::TestWithParam<cv::cuda::DeviceInfo>
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
 
     virtual void SetUp()
     {
         devInfo = GetParam();
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -177,7 +177,7 @@ GPU_TEST_P(SolvePnPRansac, Accuracy)
 
     cv::Mat rvec, tvec;
     std::vector<int> inliers;
-    cv::gpu::solvePnPRansac(object, cv::Mat(1, (int)image_vec.size(), CV_32FC2, &image_vec[0]),
+    cv::cuda::solvePnPRansac(object, cv::Mat(1, (int)image_vec.size(), CV_32FC2, &image_vec[0]),
                             camera_mat, cv::Mat(1, 8, CV_32F, cv::Scalar::all(0)),
                             rvec, tvec, false, 200, 2.f, 100, &inliers);
 
diff --git a/modules/gpu/test/test_global_motion.cpp b/modules/gpu/test/test_global_motion.cpp
index a156f93107..4f6c8daa35 100644
--- a/modules/gpu/test/test_global_motion.cpp
+++ b/modules/gpu/test/test_global_motion.cpp
@@ -47,9 +47,9 @@
 using namespace std;
 using namespace cv;
 
-struct CompactPoints : testing::TestWithParam<gpu::DeviceInfo>
+struct CompactPoints : testing::TestWithParam<cuda::DeviceInfo>
 {
-    virtual void SetUp() { gpu::setDevice(GetParam().deviceID()); }
+    virtual void SetUp() { cuda::setDevice(GetParam().deviceID()); }
 };
 
 GPU_TEST_P(CompactPoints, CanCompactizeSmallInput)
@@ -69,8 +69,8 @@ GPU_TEST_P(CompactPoints, CanCompactizeSmallInput)
     mask.at<uchar>(0,1) = 0;
     mask.at<uchar>(0,2) = 1;
 
-    gpu::GpuMat dsrc0(src0), dsrc1(src1), dmask(mask);
-    gpu::compactPoints(dsrc0, dsrc1, dmask);
+    cuda::GpuMat dsrc0(src0), dsrc1(src1), dmask(mask);
+    cuda::compactPoints(dsrc0, dsrc1, dmask);
 
     dsrc0.download(src0);
     dsrc1.download(src1);
diff --git a/modules/gpu/test/test_gpumat.cpp b/modules/gpu/test/test_gpumat.cpp
index 6fd4d69b43..3b227c447b 100644
--- a/modules/gpu/test/test_gpumat.cpp
+++ b/modules/gpu/test/test_gpumat.cpp
@@ -49,9 +49,9 @@ using namespace cvtest;
 ////////////////////////////////////////////////////////////////////////////////
 // SetTo
 
-PARAM_TEST_CASE(SetTo, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi)
+PARAM_TEST_CASE(SetTo, cv::cuda::DeviceInfo, cv::Size, MatType, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int type;
     bool useRoi;
@@ -63,7 +63,7 @@ PARAM_TEST_CASE(SetTo, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi)
         type = GET_PARAM(2);
         useRoi = GET_PARAM(3);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -71,7 +71,7 @@ GPU_TEST_P(SetTo, Zero)
 {
     cv::Scalar zero = cv::Scalar::all(0);
 
-    cv::gpu::GpuMat mat = createMat(size, type, useRoi);
+    cv::cuda::GpuMat mat = createMat(size, type, useRoi);
     mat.setTo(zero);
 
     EXPECT_MAT_NEAR(cv::Mat::zeros(size, type), mat, 0.0);
@@ -81,11 +81,11 @@ GPU_TEST_P(SetTo, SameVal)
 {
     cv::Scalar val = cv::Scalar::all(randomDouble(0.0, 255.0));
 
-    if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
         {
-            cv::gpu::GpuMat mat = createMat(size, type, useRoi);
+            cv::cuda::GpuMat mat = createMat(size, type, useRoi);
             mat.setTo(val);
         }
         catch (const cv::Exception& e)
@@ -95,7 +95,7 @@ GPU_TEST_P(SetTo, SameVal)
     }
     else
     {
-        cv::gpu::GpuMat mat = createMat(size, type, useRoi);
+        cv::cuda::GpuMat mat = createMat(size, type, useRoi);
         mat.setTo(val);
 
         EXPECT_MAT_NEAR(cv::Mat(size, type, val), mat, 0.0);
@@ -106,11 +106,11 @@ GPU_TEST_P(SetTo, DifferentVal)
 {
     cv::Scalar val = randomScalar(0.0, 255.0);
 
-    if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
         {
-            cv::gpu::GpuMat mat = createMat(size, type, useRoi);
+            cv::cuda::GpuMat mat = createMat(size, type, useRoi);
             mat.setTo(val);
         }
         catch (const cv::Exception& e)
@@ -120,7 +120,7 @@ GPU_TEST_P(SetTo, DifferentVal)
     }
     else
     {
-        cv::gpu::GpuMat mat = createMat(size, type, useRoi);
+        cv::cuda::GpuMat mat = createMat(size, type, useRoi);
         mat.setTo(val);
 
         EXPECT_MAT_NEAR(cv::Mat(size, type, val), mat, 0.0);
@@ -133,11 +133,11 @@ GPU_TEST_P(SetTo, Masked)
     cv::Mat mat_gold = randomMat(size, type);
     cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0);
 
-    if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
         {
-            cv::gpu::GpuMat mat = createMat(size, type, useRoi);
+            cv::cuda::GpuMat mat = createMat(size, type, useRoi);
             mat.setTo(val, loadMat(mask));
         }
         catch (const cv::Exception& e)
@@ -147,7 +147,7 @@ GPU_TEST_P(SetTo, Masked)
     }
     else
     {
-        cv::gpu::GpuMat mat = loadMat(mat_gold, useRoi);
+        cv::cuda::GpuMat mat = loadMat(mat_gold, useRoi);
         mat.setTo(val, loadMat(mask, useRoi));
 
         mat_gold.setTo(val, mask);
@@ -165,9 +165,9 @@ INSTANTIATE_TEST_CASE_P(GPU_GpuMat, SetTo, testing::Combine(
 ////////////////////////////////////////////////////////////////////////////////
 // CopyTo
 
-PARAM_TEST_CASE(CopyTo, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi)
+PARAM_TEST_CASE(CopyTo, cv::cuda::DeviceInfo, cv::Size, MatType, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int type;
     bool useRoi;
@@ -180,7 +180,7 @@ PARAM_TEST_CASE(CopyTo, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi)
         type = GET_PARAM(2);
         useRoi = GET_PARAM(3);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -188,8 +188,8 @@ GPU_TEST_P(CopyTo, WithOutMask)
 {
     cv::Mat src = randomMat(size, type);
 
-    cv::gpu::GpuMat d_src = loadMat(src, useRoi);
-    cv::gpu::GpuMat dst = createMat(size, type, useRoi);
+    cv::cuda::GpuMat d_src = loadMat(src, useRoi);
+    cv::cuda::GpuMat dst = createMat(size, type, useRoi);
     d_src.copyTo(dst);
 
     EXPECT_MAT_NEAR(src, dst, 0.0);
@@ -200,12 +200,12 @@ GPU_TEST_P(CopyTo, Masked)
     cv::Mat src = randomMat(size, type);
     cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0);
 
-    if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
         {
-            cv::gpu::GpuMat d_src = loadMat(src);
-            cv::gpu::GpuMat dst;
+            cv::cuda::GpuMat d_src = loadMat(src);
+            cv::cuda::GpuMat dst;
             d_src.copyTo(dst, loadMat(mask, useRoi));
         }
         catch (const cv::Exception& e)
@@ -215,8 +215,8 @@ GPU_TEST_P(CopyTo, Masked)
     }
     else
     {
-        cv::gpu::GpuMat d_src = loadMat(src, useRoi);
-        cv::gpu::GpuMat dst = loadMat(cv::Mat::zeros(size, type), useRoi);
+        cv::cuda::GpuMat d_src = loadMat(src, useRoi);
+        cv::cuda::GpuMat dst = loadMat(cv::Mat::zeros(size, type), useRoi);
         d_src.copyTo(dst, loadMat(mask, useRoi));
 
         cv::Mat dst_gold = cv::Mat::zeros(size, type);
@@ -235,9 +235,9 @@ INSTANTIATE_TEST_CASE_P(GPU_GpuMat, CopyTo, testing::Combine(
 ////////////////////////////////////////////////////////////////////////////////
 // ConvertTo
 
-PARAM_TEST_CASE(ConvertTo, cv::gpu::DeviceInfo, cv::Size, MatDepth, MatDepth, UseRoi)
+PARAM_TEST_CASE(ConvertTo, cv::cuda::DeviceInfo, cv::Size, MatDepth, MatDepth, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int depth1;
     int depth2;
@@ -251,7 +251,7 @@ PARAM_TEST_CASE(ConvertTo, cv::gpu::DeviceInfo, cv::Size, MatDepth, MatDepth, Us
         depth2 = GET_PARAM(3);
         useRoi = GET_PARAM(4);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -259,12 +259,12 @@ GPU_TEST_P(ConvertTo, WithOutScaling)
 {
     cv::Mat src = randomMat(size, depth1);
 
-    if ((depth1 == CV_64F || depth2 == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if ((depth1 == CV_64F || depth2 == CV_64F) && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
         {
-            cv::gpu::GpuMat d_src = loadMat(src);
-            cv::gpu::GpuMat dst;
+            cv::cuda::GpuMat d_src = loadMat(src);
+            cv::cuda::GpuMat dst;
             d_src.convertTo(dst, depth2);
         }
         catch (const cv::Exception& e)
@@ -274,8 +274,8 @@ GPU_TEST_P(ConvertTo, WithOutScaling)
     }
     else
     {
-        cv::gpu::GpuMat d_src = loadMat(src, useRoi);
-        cv::gpu::GpuMat dst = createMat(size, depth2, useRoi);
+        cv::cuda::GpuMat d_src = loadMat(src, useRoi);
+        cv::cuda::GpuMat dst = createMat(size, depth2, useRoi);
         d_src.convertTo(dst, depth2);
 
         cv::Mat dst_gold;
@@ -291,12 +291,12 @@ GPU_TEST_P(ConvertTo, WithScaling)
     double a = randomDouble(0.0, 1.0);
     double b = randomDouble(-10.0, 10.0);
 
-    if ((depth1 == CV_64F || depth2 == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if ((depth1 == CV_64F || depth2 == CV_64F) && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
         {
-            cv::gpu::GpuMat d_src = loadMat(src);
-            cv::gpu::GpuMat dst;
+            cv::cuda::GpuMat d_src = loadMat(src);
+            cv::cuda::GpuMat dst;
             d_src.convertTo(dst, depth2, a, b);
         }
         catch (const cv::Exception& e)
@@ -306,8 +306,8 @@ GPU_TEST_P(ConvertTo, WithScaling)
     }
     else
     {
-        cv::gpu::GpuMat d_src = loadMat(src, useRoi);
-        cv::gpu::GpuMat dst = createMat(size, depth2, useRoi);
+        cv::cuda::GpuMat d_src = loadMat(src, useRoi);
+        cv::cuda::GpuMat dst = createMat(size, depth2, useRoi);
         d_src.convertTo(dst, depth2, a, b);
 
         cv::Mat dst_gold;
@@ -327,29 +327,29 @@ INSTANTIATE_TEST_CASE_P(GPU_GpuMat, ConvertTo, testing::Combine(
 ////////////////////////////////////////////////////////////////////////////////
 // ensureSizeIsEnough
 
-struct EnsureSizeIsEnough : testing::TestWithParam<cv::gpu::DeviceInfo>
+struct EnsureSizeIsEnough : testing::TestWithParam<cv::cuda::DeviceInfo>
 {
     virtual void SetUp()
     {
-        cv::gpu::DeviceInfo devInfo = GetParam();
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::DeviceInfo devInfo = GetParam();
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
 GPU_TEST_P(EnsureSizeIsEnough, BufferReuse)
 {
-    cv::gpu::GpuMat buffer(100, 100, CV_8U);
-    cv::gpu::GpuMat old = buffer;
+    cv::cuda::GpuMat buffer(100, 100, CV_8U);
+    cv::cuda::GpuMat old = buffer;
 
     // don't reallocate memory
-    cv::gpu::ensureSizeIsEnough(10, 20, CV_8U, buffer);
+    cv::cuda::ensureSizeIsEnough(10, 20, CV_8U, buffer);
     EXPECT_EQ(10, buffer.rows);
     EXPECT_EQ(20, buffer.cols);
     EXPECT_EQ(CV_8UC1, buffer.type());
     EXPECT_EQ(reinterpret_cast<intptr_t>(old.data), reinterpret_cast<intptr_t>(buffer.data));
 
     // don't reallocate memory
-    cv::gpu::ensureSizeIsEnough(20, 30, CV_8U, buffer);
+    cv::cuda::ensureSizeIsEnough(20, 30, CV_8U, buffer);
     EXPECT_EQ(20, buffer.rows);
     EXPECT_EQ(30, buffer.cols);
     EXPECT_EQ(CV_8UC1, buffer.type());
diff --git a/modules/gpu/test/test_labeling.cpp b/modules/gpu/test/test_labeling.cpp
index 4a1927c392..9a6b6fd6ff 100644
--- a/modules/gpu/test/test_labeling.cpp
+++ b/modules/gpu/test/test_labeling.cpp
@@ -151,14 +151,14 @@ namespace
     };
 }
 
-struct Labeling : testing::TestWithParam<cv::gpu::DeviceInfo>
+struct Labeling : testing::TestWithParam<cv::cuda::DeviceInfo>
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
 
     virtual void SetUp()
     {
         devInfo = GetParam();
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 
     cv::Mat loat_image()
@@ -179,15 +179,15 @@ GPU_TEST_P(Labeling, DISABLED_ConnectedComponents)
     GreedyLabeling host(image);
     host(host._labels);
 
-    cv::gpu::GpuMat mask;
+    cv::cuda::GpuMat mask;
     mask.create(image.rows, image.cols, CV_8UC1);
 
-    cv::gpu::GpuMat components;
+    cv::cuda::GpuMat components;
     components.create(image.rows, image.cols, CV_32SC1);
 
-    cv::gpu::connectivityMask(cv::gpu::GpuMat(image), mask, cv::Scalar::all(0), cv::Scalar::all(2));
+    cv::cuda::connectivityMask(cv::cuda::GpuMat(image), mask, cv::Scalar::all(0), cv::Scalar::all(2));
 
-    cv::gpu::labelComponents(mask, components);
+    cv::cuda::labelComponents(mask, components);
 
     host.checkCorrectness(cv::Mat(components));
 }
diff --git a/modules/gpu/test/test_objdetect.cpp b/modules/gpu/test/test_objdetect.cpp
index b91c5dcf4d..02b04d2f57 100644
--- a/modules/gpu/test/test_objdetect.cpp
+++ b/modules/gpu/test/test_objdetect.cpp
@@ -48,9 +48,9 @@ using namespace cvtest;
 
 //#define DUMP
 
-struct HOG : testing::TestWithParam<cv::gpu::DeviceInfo>, cv::gpu::HOGDescriptor
+struct HOG : testing::TestWithParam<cv::cuda::DeviceInfo>, cv::cuda::HOGDescriptor
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
 
 #ifdef DUMP
     std::ofstream f;
@@ -68,7 +68,7 @@ struct HOG : testing::TestWithParam<cv::gpu::DeviceInfo>, cv::gpu::HOGDescriptor
     {
         devInfo = GetParam();
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 
 #ifdef DUMP
@@ -127,7 +127,7 @@ struct HOG : testing::TestWithParam<cv::gpu::DeviceInfo>, cv::gpu::HOGDescriptor
     void testDetect(const cv::Mat& img)
     {
         gamma_correction = false;
-        setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
+        setSVMDetector(cv::cuda::HOGDescriptor::getDefaultPeopleDetector());
 
         std::vector<cv::Point> locations;
 
@@ -212,10 +212,10 @@ GPU_TEST_P(HOG, GetDescriptors)
     cv::Mat img;
     cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA);
 
-    cv::gpu::GpuMat d_img(img);
+    cv::cuda::GpuMat d_img(img);
 
     // Convert train images into feature vectors (train table)
-    cv::gpu::GpuMat descriptors, descriptors_by_cols;
+    cv::cuda::GpuMat descriptors, descriptors_by_cols;
     getDescriptors(d_img, win_size, descriptors, DESCR_FORMAT_ROW_BY_ROW);
     getDescriptors(d_img, win_size, descriptors_by_cols, DESCR_FORMAT_COL_BY_COL);
 
@@ -251,38 +251,38 @@ GPU_TEST_P(HOG, GetDescriptors)
     img_rgb = readImage("hog/positive1.png");
     ASSERT_TRUE(!img_rgb.empty());
     cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA);
-    computeBlockHistograms(cv::gpu::GpuMat(img));
+    computeBlockHistograms(cv::cuda::GpuMat(img));
     // Everything is fine with interpolation for left top subimage
     ASSERT_EQ(0.0, cv::norm((cv::Mat)block_hists, (cv::Mat)descriptors.rowRange(0, 1)));
 
     img_rgb = readImage("hog/positive2.png");
     ASSERT_TRUE(!img_rgb.empty());
     cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA);
-    computeBlockHistograms(cv::gpu::GpuMat(img));
+    computeBlockHistograms(cv::cuda::GpuMat(img));
     compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(1, 2)));
 
     img_rgb = readImage("hog/negative1.png");
     ASSERT_TRUE(!img_rgb.empty());
     cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA);
-    computeBlockHistograms(cv::gpu::GpuMat(img));
+    computeBlockHistograms(cv::cuda::GpuMat(img));
     compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(2, 3)));
 
     img_rgb = readImage("hog/negative2.png");
     ASSERT_TRUE(!img_rgb.empty());
     cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA);
-    computeBlockHistograms(cv::gpu::GpuMat(img));
+    computeBlockHistograms(cv::cuda::GpuMat(img));
     compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(3, 4)));
 
     img_rgb = readImage("hog/positive3.png");
     ASSERT_TRUE(!img_rgb.empty());
     cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA);
-    computeBlockHistograms(cv::gpu::GpuMat(img));
+    computeBlockHistograms(cv::cuda::GpuMat(img));
     compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(4, 5)));
 
     img_rgb = readImage("hog/negative3.png");
     ASSERT_TRUE(!img_rgb.empty());
     cv::cvtColor(img_rgb, img, cv::COLOR_BGR2BGRA);
-    computeBlockHistograms(cv::gpu::GpuMat(img));
+    computeBlockHistograms(cv::cuda::GpuMat(img));
     compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(5, 6)));
 }
 
@@ -290,15 +290,15 @@ INSTANTIATE_TEST_CASE_P(GPU_ObjDetect, HOG, ALL_DEVICES);
 
 //============== caltech hog tests =====================//
 
-struct CalTech : public ::testing::TestWithParam<std::tr1::tuple<cv::gpu::DeviceInfo, std::string> >
+struct CalTech : public ::testing::TestWithParam<std::tr1::tuple<cv::cuda::DeviceInfo, std::string> >
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Mat img;
 
     virtual void SetUp()
     {
         devInfo = GET_PARAM(0);
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
 
         img = readImage(GET_PARAM(1), cv::IMREAD_GRAYSCALE);
         ASSERT_FALSE(img.empty());
@@ -307,11 +307,11 @@ struct CalTech : public ::testing::TestWithParam<std::tr1::tuple<cv::gpu::Device
 
 GPU_TEST_P(CalTech, HOG)
 {
-    cv::gpu::GpuMat d_img(img);
+    cv::cuda::GpuMat d_img(img);
     cv::Mat markedImage(img.clone());
 
-    cv::gpu::HOGDescriptor d_hog;
-    d_hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
+    cv::cuda::HOGDescriptor d_hog;
+    d_hog.setSVMDetector(cv::cuda::HOGDescriptor::getDefaultPeopleDetector());
     d_hog.nlevels = d_hog.nlevels + 32;
 
     std::vector<cv::Rect> found_locations;
@@ -341,20 +341,20 @@ INSTANTIATE_TEST_CASE_P(detect, CalTech, testing::Combine(ALL_DEVICES,
 //////////////////////////////////////////////////////////////////////////////////////////
 /// LBP classifier
 
-PARAM_TEST_CASE(LBP_Read_classifier, cv::gpu::DeviceInfo, int)
+PARAM_TEST_CASE(LBP_Read_classifier, cv::cuda::DeviceInfo, int)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
 
     virtual void SetUp()
     {
         devInfo = GET_PARAM(0);
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
 GPU_TEST_P(LBP_Read_classifier, Accuracy)
 {
-    cv::gpu::CascadeClassifier_GPU classifier;
+    cv::cuda::CascadeClassifier_GPU classifier;
     std::string classifierXmlPath = std::string(cvtest::TS::ptr()->get_data_path()) + "lbpcascade/lbpcascade_frontalface.xml";
     ASSERT_TRUE(classifier.load(classifierXmlPath));
 }
@@ -363,14 +363,14 @@ INSTANTIATE_TEST_CASE_P(GPU_ObjDetect, LBP_Read_classifier,
                         testing::Combine(ALL_DEVICES, testing::Values<int>(0)));
 
 
-PARAM_TEST_CASE(LBP_classify, cv::gpu::DeviceInfo, int)
+PARAM_TEST_CASE(LBP_classify, cv::cuda::DeviceInfo, int)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
 
     virtual void SetUp()
     {
         devInfo = GET_PARAM(0);
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -396,11 +396,11 @@ GPU_TEST_P(LBP_classify, Accuracy)
     for (; it != rects.end(); ++it)
         cv::rectangle(markedImage, *it, cv::Scalar(255, 0, 0));
 
-    cv::gpu::CascadeClassifier_GPU gpuClassifier;
+    cv::cuda::CascadeClassifier_GPU gpuClassifier;
     ASSERT_TRUE(gpuClassifier.load(classifierXmlPath));
 
-    cv::gpu::GpuMat gpu_rects;
-    cv::gpu::GpuMat tested(grey);
+    cv::cuda::GpuMat gpu_rects;
+    cv::cuda::GpuMat tested(grey);
     int count = gpuClassifier.detectMultiScale(tested, gpu_rects);
 
 #if defined (LOG_CASCADE_STATISTIC)
diff --git a/modules/gpu/test/test_opengl.cpp b/modules/gpu/test/test_opengl.cpp
index 61660814ce..e0956e28a4 100644
--- a/modules/gpu/test/test_opengl.cpp
+++ b/modules/gpu/test/test_opengl.cpp
@@ -104,7 +104,7 @@ GPU_TEST_P(Buffer, ConstructorFromMat)
 GPU_TEST_P(Buffer, ConstructorFromGpuMat)
 {
     cv::Mat gold = randomMat(size, type);
-    cv::gpu::GpuMat d_gold(gold);
+    cv::cuda::GpuMat d_gold(gold);
 
     cv::ogl::Buffer buf(d_gold, cv::ogl::Buffer::ARRAY_BUFFER);
 
@@ -152,7 +152,7 @@ GPU_TEST_P(Buffer, CopyFromMat)
 GPU_TEST_P(Buffer, CopyFromGpuMat)
 {
     cv::Mat gold = randomMat(size, type);
-    cv::gpu::GpuMat d_gold(gold);
+    cv::cuda::GpuMat d_gold(gold);
 
     cv::ogl::Buffer buf;
     buf.copyFrom(d_gold, cv::ogl::Buffer::ARRAY_BUFFER, true);
@@ -185,7 +185,7 @@ GPU_TEST_P(Buffer, CopyToGpuMat)
 
     cv::ogl::Buffer buf(gold, cv::ogl::Buffer::ARRAY_BUFFER, true);
 
-    cv::gpu::GpuMat dst;
+    cv::cuda::GpuMat dst;
     buf.copyTo(dst);
 
     EXPECT_MAT_NEAR(gold, dst, 0);
@@ -261,7 +261,7 @@ GPU_TEST_P(Buffer, MapDevice)
 
     cv::ogl::Buffer buf(gold, cv::ogl::Buffer::ARRAY_BUFFER, true);
 
-    cv::gpu::GpuMat dst = buf.mapDevice();
+    cv::cuda::GpuMat dst = buf.mapDevice();
 
     EXPECT_MAT_NEAR(gold, dst, 0);
 
@@ -335,7 +335,7 @@ GPU_TEST_P(Texture2D, ConstructorFromMat)
 GPU_TEST_P(Texture2D, ConstructorFromGpuMat)
 {
     cv::Mat gold = randomMat(size, type, 0, depth == CV_8U ? 255 : 1);
-    cv::gpu::GpuMat d_gold(gold);
+    cv::cuda::GpuMat d_gold(gold);
 
     cv::ogl::Texture2D tex(d_gold, true);
 
@@ -395,7 +395,7 @@ GPU_TEST_P(Texture2D, CopyFromMat)
 GPU_TEST_P(Texture2D, CopyFromGpuMat)
 {
     cv::Mat gold = randomMat(size, type, 0, depth == CV_8U ? 255 : 1);
-    cv::gpu::GpuMat d_gold(gold);
+    cv::cuda::GpuMat d_gold(gold);
 
     cv::ogl::Texture2D tex;
     tex.copyFrom(d_gold, true);
@@ -426,7 +426,7 @@ GPU_TEST_P(Texture2D, CopyToGpuMat)
 
     cv::ogl::Texture2D tex(gold, true);
 
-    cv::gpu::GpuMat dst;
+    cv::cuda::GpuMat dst;
     tex.copyTo(dst, depth);
 
     EXPECT_MAT_NEAR(gold, dst, 1e-2);
diff --git a/modules/gpu/test/test_stream.cpp b/modules/gpu/test/test_stream.cpp
index 4ce29db750..ae8282f0e3 100644
--- a/modules/gpu/test/test_stream.cpp
+++ b/modules/gpu/test/test_stream.cpp
@@ -50,20 +50,20 @@
 
 using namespace cvtest;
 
-struct Async : testing::TestWithParam<cv::gpu::DeviceInfo>
+struct Async : testing::TestWithParam<cv::cuda::DeviceInfo>
 {
-    cv::gpu::CudaMem src;
-    cv::gpu::GpuMat d_src;
+    cv::cuda::CudaMem src;
+    cv::cuda::GpuMat d_src;
 
-    cv::gpu::CudaMem dst;
-    cv::gpu::GpuMat d_dst;
+    cv::cuda::CudaMem dst;
+    cv::cuda::GpuMat d_dst;
 
     virtual void SetUp()
     {
-        cv::gpu::DeviceInfo devInfo = GetParam();
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::DeviceInfo devInfo = GetParam();
+        cv::cuda::setDevice(devInfo.deviceID());
 
-        src = cv::gpu::CudaMem(cv::gpu::CudaMem::PAGE_LOCKED);
+        src = cv::cuda::CudaMem(cv::cuda::CudaMem::PAGE_LOCKED);
 
         cv::Mat m = randomMat(cv::Size(128, 128), CV_8UC1);
         m.copyTo(src);
@@ -76,8 +76,8 @@ void checkMemSet(int status, void* userData)
 
     Async* test = reinterpret_cast<Async*>(userData);
 
-    cv::gpu::CudaMem src = test->src;
-    cv::gpu::CudaMem dst = test->dst;
+    cv::cuda::CudaMem src = test->src;
+    cv::cuda::CudaMem dst = test->dst;
 
     cv::Mat dst_gold = cv::Mat::zeros(src.size(), src.type());
 
@@ -86,7 +86,7 @@ void checkMemSet(int status, void* userData)
 
 GPU_TEST_P(Async, MemSet)
 {
-    cv::gpu::Stream stream;
+    cv::cuda::Stream stream;
 
     d_dst.upload(src);
 
@@ -105,8 +105,8 @@ void checkConvert(int status, void* userData)
 
     Async* test = reinterpret_cast<Async*>(userData);
 
-    cv::gpu::CudaMem src = test->src;
-    cv::gpu::CudaMem dst = test->dst;
+    cv::cuda::CudaMem src = test->src;
+    cv::cuda::CudaMem dst = test->dst;
 
     cv::Mat dst_gold;
     src.createMatHeader().convertTo(dst_gold, CV_32S);
@@ -116,7 +116,7 @@ void checkConvert(int status, void* userData)
 
 GPU_TEST_P(Async, Convert)
 {
-    cv::gpu::Stream stream;
+    cv::cuda::Stream stream;
 
     d_src.upload(src, stream);
     d_src.convertTo(d_dst, CV_32S, stream);
diff --git a/modules/gpuarithm/include/opencv2/gpuarithm.hpp b/modules/gpuarithm/include/opencv2/gpuarithm.hpp
index dabb4e2ab0..33a3f16af6 100644
--- a/modules/gpuarithm/include/opencv2/gpuarithm.hpp
+++ b/modules/gpuarithm/include/opencv2/gpuarithm.hpp
@@ -49,7 +49,7 @@
 
 #include "opencv2/core/gpu.hpp"
 
-namespace cv { namespace gpu {
+namespace cv { namespace cuda {
 
 //! adds one matrix to another (dst = src1 + src2)
 CV_EXPORTS void add(InputArray src1, InputArray src2, OutputArray dst, InputArray mask = noArray(), int dtype = -1, Stream& stream = Stream::Null());
@@ -369,6 +369,6 @@ public:
 
 CV_EXPORTS Ptr<Convolution> createConvolution(Size user_block_size = Size());
 
-}} // namespace cv { namespace gpu {
+}} // namespace cv { namespace cuda {
 
 #endif /* __OPENCV_GPUARITHM_HPP__ */
diff --git a/modules/gpuarithm/perf/perf_arithm.cpp b/modules/gpuarithm/perf/perf_arithm.cpp
index b18c8a8c0e..16917d017c 100644
--- a/modules/gpuarithm/perf/perf_arithm.cpp
+++ b/modules/gpuarithm/perf/perf_arithm.cpp
@@ -77,12 +77,12 @@ PERF_TEST_P(Sz_Type_Flags, GEMM,
     {
         declare.time(5.0);
 
-        const cv::gpu::GpuMat d_src1(src1);
-        const cv::gpu::GpuMat d_src2(src2);
-        const cv::gpu::GpuMat d_src3(src3);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src1(src1);
+        const cv::cuda::GpuMat d_src2(src2);
+        const cv::cuda::GpuMat d_src3(src3);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, dst, flags);
+        TEST_CYCLE() cv::cuda::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, dst, flags);
 
         GPU_SANITY_CHECK(dst, 1e-6, ERROR_RELATIVE);
     }
@@ -118,11 +118,11 @@ PERF_TEST_P(Sz_Flags, MulSpectrums,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_a(a);
-        const cv::gpu::GpuMat d_b(b);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_a(a);
+        const cv::cuda::GpuMat d_b(b);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::mulSpectrums(d_a, d_b, dst, flag);
+        TEST_CYCLE() cv::cuda::mulSpectrums(d_a, d_b, dst, flag);
 
         GPU_SANITY_CHECK(dst);
     }
@@ -152,11 +152,11 @@ PERF_TEST_P(Sz, MulAndScaleSpectrums,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src1(src1);
-        const cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src1(src1);
+        const cv::cuda::GpuMat d_src2(src2);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::mulAndScaleSpectrums(d_src1, d_src2, dst, cv::DFT_ROWS, scale, false);
+        TEST_CYCLE() cv::cuda::mulAndScaleSpectrums(d_src1, d_src2, dst, cv::DFT_ROWS, scale, false);
 
         GPU_SANITY_CHECK(dst);
     }
@@ -183,10 +183,10 @@ PERF_TEST_P(Sz_Flags, Dft,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::dft(d_src, dst, size, flag);
+        TEST_CYCLE() cv::cuda::dft(d_src, dst, size, flag);
 
         GPU_SANITY_CHECK(dst, 1e-6, ERROR_RELATIVE);
     }
@@ -222,15 +222,15 @@ PERF_TEST_P(Sz_KernelSz_Ccorr, Convolve,
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_image = cv::gpu::createContinuous(size, CV_32FC1);
+        cv::cuda::GpuMat d_image = cv::cuda::createContinuous(size, CV_32FC1);
         d_image.upload(image);
 
-        cv::gpu::GpuMat d_templ = cv::gpu::createContinuous(templ_size, templ_size, CV_32FC1);
+        cv::cuda::GpuMat d_templ = cv::cuda::createContinuous(templ_size, templ_size, CV_32FC1);
         d_templ.upload(templ);
 
-        cv::Ptr<cv::gpu::Convolution> convolution = cv::gpu::createConvolution();
+        cv::Ptr<cv::cuda::Convolution> convolution = cv::cuda::createConvolution();
 
-        cv::gpu::GpuMat dst;
+        cv::cuda::GpuMat dst;
 
         TEST_CYCLE() convolution->convolve(d_image, d_templ, dst, ccorr);
 
@@ -262,11 +262,11 @@ PERF_TEST_P(Sz, Integral,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-        cv::gpu::GpuMat d_buf;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
+        cv::cuda::GpuMat d_buf;
 
-        TEST_CYCLE() cv::gpu::integral(d_src, dst, d_buf);
+        TEST_CYCLE() cv::cuda::integral(d_src, dst, d_buf);
 
         GPU_SANITY_CHECK(dst);
     }
@@ -293,10 +293,10 @@ PERF_TEST_P(Sz, IntegralSqr,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst, buf;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst, buf;
 
-        TEST_CYCLE() cv::gpu::sqrIntegral(d_src, dst, buf);
+        TEST_CYCLE() cv::cuda::sqrIntegral(d_src, dst, buf);
 
         GPU_SANITY_CHECK(dst);
     }
diff --git a/modules/gpuarithm/perf/perf_core.cpp b/modules/gpuarithm/perf/perf_core.cpp
index 0add472ca3..12d8ff20cd 100644
--- a/modules/gpuarithm/perf/perf_core.cpp
+++ b/modules/gpuarithm/perf/perf_core.cpp
@@ -69,13 +69,13 @@ PERF_TEST_P(Sz_Depth_Cn, Merge,
 
     if (PERF_RUN_GPU())
     {
-        std::vector<cv::gpu::GpuMat> d_src(channels);
+        std::vector<cv::cuda::GpuMat> d_src(channels);
         for (int i = 0; i < channels; ++i)
             d_src[i].upload(src[i]);
 
-        cv::gpu::GpuMat dst;
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::merge(d_src, dst);
+        TEST_CYCLE() cv::cuda::merge(d_src, dst);
 
         GPU_SANITY_CHECK(dst, 1e-10);
     }
@@ -106,13 +106,13 @@ PERF_TEST_P(Sz_Depth_Cn, Split,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        std::vector<cv::gpu::GpuMat> dst;
+        const cv::cuda::GpuMat d_src(src);
+        std::vector<cv::cuda::GpuMat> dst;
 
-        TEST_CYCLE() cv::gpu::split(d_src, dst);
+        TEST_CYCLE() cv::cuda::split(d_src, dst);
 
-        const cv::gpu::GpuMat& dst0 = dst[0];
-        const cv::gpu::GpuMat& dst1 = dst[1];
+        const cv::cuda::GpuMat& dst0 = dst[0];
+        const cv::cuda::GpuMat& dst1 = dst[1];
 
         GPU_SANITY_CHECK(dst0, 1e-10);
         GPU_SANITY_CHECK(dst1, 1e-10);
@@ -146,10 +146,10 @@ PERF_TEST_P(Sz_Type, Transpose,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::transpose(d_src, dst);
+        TEST_CYCLE() cv::cuda::transpose(d_src, dst);
 
         GPU_SANITY_CHECK(dst, 1e-10);
     }
@@ -189,10 +189,10 @@ PERF_TEST_P(Sz_Depth_Cn_Code, Flip,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::flip(d_src, dst, flipCode);
+        TEST_CYCLE() cv::cuda::flip(d_src, dst, flipCode);
 
         GPU_SANITY_CHECK(dst);
     }
@@ -224,10 +224,10 @@ PERF_TEST_P(Sz_Type, LutOneChannel,
 
     if (PERF_RUN_GPU())
     {
-        cv::Ptr<cv::gpu::LookUpTable> lutAlg = cv::gpu::createLookUpTable(lut);
+        cv::Ptr<cv::cuda::LookUpTable> lutAlg = cv::cuda::createLookUpTable(lut);
 
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
         TEST_CYCLE() lutAlg->transform(d_src, dst);
 
@@ -261,10 +261,10 @@ PERF_TEST_P(Sz_Type, LutMultiChannel,
 
     if (PERF_RUN_GPU())
     {
-        cv::Ptr<cv::gpu::LookUpTable> lutAlg = cv::gpu::createLookUpTable(lut);
+        cv::Ptr<cv::cuda::LookUpTable> lutAlg = cv::cuda::createLookUpTable(lut);
 
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
         TEST_CYCLE() lutAlg->transform(d_src, dst);
 
@@ -303,10 +303,10 @@ PERF_TEST_P(Sz_Depth_Cn_Border, CopyMakeBorder,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::copyMakeBorder(d_src, dst, 5, 5, 5, 5, borderMode);
+        TEST_CYCLE() cv::cuda::copyMakeBorder(d_src, dst, 5, 5, 5, 5, borderMode);
 
         GPU_SANITY_CHECK(dst);
     }
diff --git a/modules/gpuarithm/perf/perf_element_operations.cpp b/modules/gpuarithm/perf/perf_element_operations.cpp
index 1370da5804..bc2f24eb82 100644
--- a/modules/gpuarithm/perf/perf_element_operations.cpp
+++ b/modules/gpuarithm/perf/perf_element_operations.cpp
@@ -66,11 +66,11 @@ PERF_TEST_P(Sz_Depth, AddMat,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src1(src1);
-        const cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src1(src1);
+        const cv::cuda::GpuMat d_src2(src2);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::add(d_src1, d_src2, dst);
+        TEST_CYCLE() cv::cuda::add(d_src1, d_src2, dst);
 
         GPU_SANITY_CHECK(dst, 1e-10);
     }
@@ -102,10 +102,10 @@ PERF_TEST_P(Sz_Depth, AddScalar,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::add(d_src, s, dst);
+        TEST_CYCLE() cv::cuda::add(d_src, s, dst);
 
         GPU_SANITY_CHECK(dst, 1e-10);
     }
@@ -137,11 +137,11 @@ PERF_TEST_P(Sz_Depth, SubtractMat,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src1(src1);
-        const cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src1(src1);
+        const cv::cuda::GpuMat d_src2(src2);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::subtract(d_src1, d_src2, dst);
+        TEST_CYCLE() cv::cuda::subtract(d_src1, d_src2, dst);
 
         GPU_SANITY_CHECK(dst, 1e-10);
     }
@@ -173,10 +173,10 @@ PERF_TEST_P(Sz_Depth, SubtractScalar,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::subtract(d_src, s, dst);
+        TEST_CYCLE() cv::cuda::subtract(d_src, s, dst);
 
         GPU_SANITY_CHECK(dst, 1e-10);
     }
@@ -208,11 +208,11 @@ PERF_TEST_P(Sz_Depth, MultiplyMat,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src1(src1);
-        const cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src1(src1);
+        const cv::cuda::GpuMat d_src2(src2);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::multiply(d_src1, d_src2, dst);
+        TEST_CYCLE() cv::cuda::multiply(d_src1, d_src2, dst);
 
         GPU_SANITY_CHECK(dst, 1e-6);
     }
@@ -244,10 +244,10 @@ PERF_TEST_P(Sz_Depth, MultiplyScalar,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::multiply(d_src, s, dst);
+        TEST_CYCLE() cv::cuda::multiply(d_src, s, dst);
 
         GPU_SANITY_CHECK(dst, 1e-6);
     }
@@ -279,11 +279,11 @@ PERF_TEST_P(Sz_Depth, DivideMat,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src1(src1);
-        const cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src1(src1);
+        const cv::cuda::GpuMat d_src2(src2);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::divide(d_src1, d_src2, dst);
+        TEST_CYCLE() cv::cuda::divide(d_src1, d_src2, dst);
 
         GPU_SANITY_CHECK(dst, 1e-6);
     }
@@ -315,10 +315,10 @@ PERF_TEST_P(Sz_Depth, DivideScalar,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::divide(d_src, s, dst);
+        TEST_CYCLE() cv::cuda::divide(d_src, s, dst);
 
         GPU_SANITY_CHECK(dst, 1e-6);
     }
@@ -350,10 +350,10 @@ PERF_TEST_P(Sz_Depth, DivideScalarInv,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::divide(s[0], d_src, dst);
+        TEST_CYCLE() cv::cuda::divide(s[0], d_src, dst);
 
         GPU_SANITY_CHECK(dst, 1e-6);
     }
@@ -385,11 +385,11 @@ PERF_TEST_P(Sz_Depth, AbsDiffMat,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src1(src1);
-        const cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src1(src1);
+        const cv::cuda::GpuMat d_src2(src2);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::absdiff(d_src1, d_src2, dst);
+        TEST_CYCLE() cv::cuda::absdiff(d_src1, d_src2, dst);
 
         GPU_SANITY_CHECK(dst, 1e-10);
     }
@@ -421,10 +421,10 @@ PERF_TEST_P(Sz_Depth, AbsDiffScalar,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::absdiff(d_src, s, dst);
+        TEST_CYCLE() cv::cuda::absdiff(d_src, s, dst);
 
         GPU_SANITY_CHECK(dst, 1e-10);
     }
@@ -453,10 +453,10 @@ PERF_TEST_P(Sz_Depth, Abs,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::abs(d_src, dst);
+        TEST_CYCLE() cv::cuda::abs(d_src, dst);
 
         GPU_SANITY_CHECK(dst);
     }
@@ -481,10 +481,10 @@ PERF_TEST_P(Sz_Depth, Sqr,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::sqr(d_src, dst);
+        TEST_CYCLE() cv::cuda::sqr(d_src, dst);
 
         GPU_SANITY_CHECK(dst);
     }
@@ -509,10 +509,10 @@ PERF_TEST_P(Sz_Depth, Sqrt,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::sqrt(d_src, dst);
+        TEST_CYCLE() cv::cuda::sqrt(d_src, dst);
 
         GPU_SANITY_CHECK(dst);
     }
@@ -541,10 +541,10 @@ PERF_TEST_P(Sz_Depth, Log,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::log(d_src, dst);
+        TEST_CYCLE() cv::cuda::log(d_src, dst);
 
         GPU_SANITY_CHECK(dst);
     }
@@ -573,10 +573,10 @@ PERF_TEST_P(Sz_Depth, Exp,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::exp(d_src, dst);
+        TEST_CYCLE() cv::cuda::exp(d_src, dst);
 
         GPU_SANITY_CHECK(dst);
     }
@@ -609,10 +609,10 @@ PERF_TEST_P(Sz_Depth_Power, Pow,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::pow(d_src, power, dst);
+        TEST_CYCLE() cv::cuda::pow(d_src, power, dst);
 
         GPU_SANITY_CHECK(dst);
     }
@@ -650,11 +650,11 @@ PERF_TEST_P(Sz_Depth_Code, CompareMat,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src1(src1);
-        const cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src1(src1);
+        const cv::cuda::GpuMat d_src2(src2);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::compare(d_src1, d_src2, dst, cmp_code);
+        TEST_CYCLE() cv::cuda::compare(d_src1, d_src2, dst, cmp_code);
 
         GPU_SANITY_CHECK(dst);
     }
@@ -688,10 +688,10 @@ PERF_TEST_P(Sz_Depth_Code, CompareScalar,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::compare(d_src, s, dst, cmp_code);
+        TEST_CYCLE() cv::cuda::compare(d_src, s, dst, cmp_code);
 
         GPU_SANITY_CHECK(dst);
     }
@@ -720,10 +720,10 @@ PERF_TEST_P(Sz_Depth, BitwiseNot,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::bitwise_not(d_src, dst);
+        TEST_CYCLE() cv::cuda::bitwise_not(d_src, dst);
 
         GPU_SANITY_CHECK(dst);
     }
@@ -755,11 +755,11 @@ PERF_TEST_P(Sz_Depth, BitwiseAndMat,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src1(src1);
-        const cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src1(src1);
+        const cv::cuda::GpuMat d_src2(src2);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::bitwise_and(d_src1, d_src2, dst);
+        TEST_CYCLE() cv::cuda::bitwise_and(d_src1, d_src2, dst);
 
         GPU_SANITY_CHECK(dst);
     }
@@ -796,10 +796,10 @@ PERF_TEST_P(Sz_Depth_Cn, BitwiseAndScalar,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::bitwise_and(d_src, is, dst);
+        TEST_CYCLE() cv::cuda::bitwise_and(d_src, is, dst);
 
         GPU_SANITY_CHECK(dst);
     }
@@ -831,11 +831,11 @@ PERF_TEST_P(Sz_Depth, BitwiseOrMat,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src1(src1);
-        const cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src1(src1);
+        const cv::cuda::GpuMat d_src2(src2);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::bitwise_or(d_src1, d_src2, dst);
+        TEST_CYCLE() cv::cuda::bitwise_or(d_src1, d_src2, dst);
 
         GPU_SANITY_CHECK(dst);
     }
@@ -872,10 +872,10 @@ PERF_TEST_P(Sz_Depth_Cn, BitwiseOrScalar,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::bitwise_or(d_src, is, dst);
+        TEST_CYCLE() cv::cuda::bitwise_or(d_src, is, dst);
 
         GPU_SANITY_CHECK(dst);
     }
@@ -907,11 +907,11 @@ PERF_TEST_P(Sz_Depth, BitwiseXorMat,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src1(src1);
-        const cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src1(src1);
+        const cv::cuda::GpuMat d_src2(src2);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::bitwise_xor(d_src1, d_src2, dst);
+        TEST_CYCLE() cv::cuda::bitwise_xor(d_src1, d_src2, dst);
 
         GPU_SANITY_CHECK(dst);
     }
@@ -948,10 +948,10 @@ PERF_TEST_P(Sz_Depth_Cn, BitwiseXorScalar,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::bitwise_xor(d_src, is, dst);
+        TEST_CYCLE() cv::cuda::bitwise_xor(d_src, is, dst);
 
         GPU_SANITY_CHECK(dst);
     }
@@ -986,10 +986,10 @@ PERF_TEST_P(Sz_Depth_Cn, RShift,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::rshift(d_src, val, dst);
+        TEST_CYCLE() cv::cuda::rshift(d_src, val, dst);
 
         GPU_SANITY_CHECK(dst);
     }
@@ -1020,10 +1020,10 @@ PERF_TEST_P(Sz_Depth_Cn, LShift,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::lshift(d_src, val, dst);
+        TEST_CYCLE() cv::cuda::lshift(d_src, val, dst);
 
         GPU_SANITY_CHECK(dst);
     }
@@ -1051,11 +1051,11 @@ PERF_TEST_P(Sz_Depth, MinMat,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src1(src1);
-        const cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src1(src1);
+        const cv::cuda::GpuMat d_src2(src2);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::min(d_src1, d_src2, dst);
+        TEST_CYCLE() cv::cuda::min(d_src1, d_src2, dst);
 
         GPU_SANITY_CHECK(dst);
     }
@@ -1087,10 +1087,10 @@ PERF_TEST_P(Sz_Depth, MinScalar,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::min(d_src, val[0], dst);
+        TEST_CYCLE() cv::cuda::min(d_src, val[0], dst);
 
         GPU_SANITY_CHECK(dst);
     }
@@ -1122,11 +1122,11 @@ PERF_TEST_P(Sz_Depth, MaxMat,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src1(src1);
-        const cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src1(src1);
+        const cv::cuda::GpuMat d_src2(src2);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::max(d_src1, d_src2, dst);
+        TEST_CYCLE() cv::cuda::max(d_src1, d_src2, dst);
 
         GPU_SANITY_CHECK(dst);
     }
@@ -1158,10 +1158,10 @@ PERF_TEST_P(Sz_Depth, MaxScalar,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::max(d_src, val[0], dst);
+        TEST_CYCLE() cv::cuda::max(d_src, val[0], dst);
 
         GPU_SANITY_CHECK(dst);
     }
@@ -1199,11 +1199,11 @@ PERF_TEST_P(Sz_3Depth, AddWeighted,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src1(src1);
-        const cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src1(src1);
+        const cv::cuda::GpuMat d_src2(src2);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::addWeighted(d_src1, 0.5, d_src2, 0.5, 10.0, dst, dst_depth);
+        TEST_CYCLE() cv::cuda::addWeighted(d_src1, 0.5, d_src2, 0.5, 10.0, dst, dst_depth);
 
         GPU_SANITY_CHECK(dst, 1e-10);
     }
@@ -1230,10 +1230,10 @@ PERF_TEST_P(Sz, MagnitudeComplex,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::magnitude(d_src, dst);
+        TEST_CYCLE() cv::cuda::magnitude(d_src, dst);
 
         GPU_SANITY_CHECK(dst);
     }
@@ -1263,10 +1263,10 @@ PERF_TEST_P(Sz, MagnitudeSqrComplex,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::magnitudeSqr(d_src, dst);
+        TEST_CYCLE() cv::cuda::magnitudeSqr(d_src, dst);
 
         GPU_SANITY_CHECK(dst);
     }
@@ -1292,11 +1292,11 @@ PERF_TEST_P(Sz, Magnitude,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src1(src1);
-        const cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src1(src1);
+        const cv::cuda::GpuMat d_src2(src2);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::magnitude(d_src1, d_src2, dst);
+        TEST_CYCLE() cv::cuda::magnitude(d_src1, d_src2, dst);
 
         GPU_SANITY_CHECK(dst);
     }
@@ -1326,11 +1326,11 @@ PERF_TEST_P(Sz, MagnitudeSqr,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src1(src1);
-        const cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src1(src1);
+        const cv::cuda::GpuMat d_src2(src2);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::magnitudeSqr(d_src1, d_src2, dst);
+        TEST_CYCLE() cv::cuda::magnitudeSqr(d_src1, d_src2, dst);
 
         GPU_SANITY_CHECK(dst);
     }
@@ -1360,11 +1360,11 @@ PERF_TEST_P(Sz_AngleInDegrees, Phase,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src1(src1);
-        const cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src1(src1);
+        const cv::cuda::GpuMat d_src2(src2);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::phase(d_src1, d_src2, dst, angleInDegrees);
+        TEST_CYCLE() cv::cuda::phase(d_src1, d_src2, dst, angleInDegrees);
 
         GPU_SANITY_CHECK(dst, 1e-6, ERROR_RELATIVE);
     }
@@ -1396,12 +1396,12 @@ PERF_TEST_P(Sz_AngleInDegrees, CartToPolar,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src1(src1);
-        const cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat magnitude;
-        cv::gpu::GpuMat angle;
+        const cv::cuda::GpuMat d_src1(src1);
+        const cv::cuda::GpuMat d_src2(src2);
+        cv::cuda::GpuMat magnitude;
+        cv::cuda::GpuMat angle;
 
-        TEST_CYCLE() cv::gpu::cartToPolar(d_src1, d_src2, magnitude, angle, angleInDegrees);
+        TEST_CYCLE() cv::cuda::cartToPolar(d_src1, d_src2, magnitude, angle, angleInDegrees);
 
         GPU_SANITY_CHECK(magnitude);
         GPU_SANITY_CHECK(angle, 1e-6, ERROR_RELATIVE);
@@ -1436,12 +1436,12 @@ PERF_TEST_P(Sz_AngleInDegrees, PolarToCart,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_magnitude(magnitude);
-        const cv::gpu::GpuMat d_angle(angle);
-        cv::gpu::GpuMat x;
-        cv::gpu::GpuMat y;
+        const cv::cuda::GpuMat d_magnitude(magnitude);
+        const cv::cuda::GpuMat d_angle(angle);
+        cv::cuda::GpuMat x;
+        cv::cuda::GpuMat y;
 
-        TEST_CYCLE() cv::gpu::polarToCart(d_magnitude, d_angle, x, y, angleInDegrees);
+        TEST_CYCLE() cv::cuda::polarToCart(d_magnitude, d_angle, x, y, angleInDegrees);
 
         GPU_SANITY_CHECK(x);
         GPU_SANITY_CHECK(y);
@@ -1479,10 +1479,10 @@ PERF_TEST_P(Sz_Depth_Op, Threshold,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::threshold(d_src, dst, 100.0, 255.0, threshOp);
+        TEST_CYCLE() cv::cuda::threshold(d_src, dst, 100.0, 255.0, threshOp);
 
         GPU_SANITY_CHECK(dst, 1e-10);
     }
diff --git a/modules/gpuarithm/perf/perf_reductions.cpp b/modules/gpuarithm/perf/perf_reductions.cpp
index c541ce0e28..40d709e51f 100644
--- a/modules/gpuarithm/perf/perf_reductions.cpp
+++ b/modules/gpuarithm/perf/perf_reductions.cpp
@@ -68,11 +68,11 @@ PERF_TEST_P(Sz_Depth_Norm, Norm,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_buf;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat d_buf;
         double gpu_dst;
 
-        TEST_CYCLE() gpu_dst = cv::gpu::norm(d_src, normType, d_buf);
+        TEST_CYCLE() gpu_dst = cv::cuda::norm(d_src, normType, d_buf);
 
         SANITY_CHECK(gpu_dst, 1e-6, ERROR_RELATIVE);
     }
@@ -106,12 +106,12 @@ PERF_TEST_P(Sz_Norm, NormDiff,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src1(src1);
-        const cv::gpu::GpuMat d_src2(src2);
-        cv::gpu::GpuMat d_buf;
+        const cv::cuda::GpuMat d_src1(src1);
+        const cv::cuda::GpuMat d_src2(src2);
+        cv::cuda::GpuMat d_buf;
         double gpu_dst;
 
-        TEST_CYCLE() gpu_dst = cv::gpu::norm(d_src1, d_src2, d_buf, normType);
+        TEST_CYCLE() gpu_dst = cv::cuda::norm(d_src1, d_src2, d_buf, normType);
 
         SANITY_CHECK(gpu_dst);
 
@@ -145,11 +145,11 @@ PERF_TEST_P(Sz_Depth_Cn, Sum,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_buf;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat d_buf;
         cv::Scalar gpu_dst;
 
-        TEST_CYCLE() gpu_dst = cv::gpu::sum(d_src, d_buf);
+        TEST_CYCLE() gpu_dst = cv::cuda::sum(d_src, d_buf);
 
         SANITY_CHECK(gpu_dst, 1e-5, ERROR_RELATIVE);
     }
@@ -182,11 +182,11 @@ PERF_TEST_P(Sz_Depth_Cn, SumAbs,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_buf;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat d_buf;
         cv::Scalar gpu_dst;
 
-        TEST_CYCLE() gpu_dst = cv::gpu::absSum(d_src, d_buf);
+        TEST_CYCLE() gpu_dst = cv::cuda::absSum(d_src, d_buf);
 
         SANITY_CHECK(gpu_dst, 1e-6, ERROR_RELATIVE);
     }
@@ -215,11 +215,11 @@ PERF_TEST_P(Sz_Depth_Cn, SumSqr,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_buf;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat d_buf;
         cv::Scalar gpu_dst;
 
-        TEST_CYCLE() gpu_dst = cv::gpu::sqrSum(d_src, d_buf);
+        TEST_CYCLE() gpu_dst = cv::cuda::sqrSum(d_src, d_buf);
 
         SANITY_CHECK(gpu_dst, 1e-6, ERROR_RELATIVE);
     }
@@ -247,11 +247,11 @@ PERF_TEST_P(Sz_Depth, MinMax,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_buf;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat d_buf;
         double gpu_minVal, gpu_maxVal;
 
-        TEST_CYCLE() cv::gpu::minMax(d_src, &gpu_minVal, &gpu_maxVal, cv::gpu::GpuMat(), d_buf);
+        TEST_CYCLE() cv::cuda::minMax(d_src, &gpu_minVal, &gpu_maxVal, cv::cuda::GpuMat(), d_buf);
 
         SANITY_CHECK(gpu_minVal, 1e-10);
         SANITY_CHECK(gpu_maxVal, 1e-10);
@@ -285,12 +285,12 @@ PERF_TEST_P(Sz_Depth, MinMaxLoc,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_valbuf, d_locbuf;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat d_valbuf, d_locbuf;
         double gpu_minVal, gpu_maxVal;
         cv::Point gpu_minLoc, gpu_maxLoc;
 
-        TEST_CYCLE() cv::gpu::minMaxLoc(d_src, &gpu_minVal, &gpu_maxVal, &gpu_minLoc, &gpu_maxLoc, cv::gpu::GpuMat(), d_valbuf, d_locbuf);
+        TEST_CYCLE() cv::cuda::minMaxLoc(d_src, &gpu_minVal, &gpu_maxVal, &gpu_minLoc, &gpu_maxLoc, cv::cuda::GpuMat(), d_valbuf, d_locbuf);
 
         SANITY_CHECK(gpu_minVal, 1e-10);
         SANITY_CHECK(gpu_maxVal, 1e-10);
@@ -322,11 +322,11 @@ PERF_TEST_P(Sz_Depth, CountNonZero,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_buf;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat d_buf;
         int gpu_dst = 0;
 
-        TEST_CYCLE() gpu_dst = cv::gpu::countNonZero(d_src, d_buf);
+        TEST_CYCLE() gpu_dst = cv::cuda::countNonZero(d_src, d_buf);
 
         SANITY_CHECK(gpu_dst);
     }
@@ -370,10 +370,10 @@ PERF_TEST_P(Sz_Depth_Cn_Code_Dim, Reduce,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::reduce(d_src, dst, dim, reduceOp);
+        TEST_CYCLE() cv::cuda::reduce(d_src, dst, dim, reduceOp);
 
         GPU_SANITY_CHECK(dst);
     }
@@ -412,11 +412,11 @@ PERF_TEST_P(Sz_Depth_NormType, Normalize,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-        cv::gpu::GpuMat d_norm_buf, d_cvt_buf;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
+        cv::cuda::GpuMat d_norm_buf, d_cvt_buf;
 
-        TEST_CYCLE() cv::gpu::normalize(d_src, dst, alpha, beta, norm_type, type, cv::gpu::GpuMat(), d_norm_buf, d_cvt_buf);
+        TEST_CYCLE() cv::cuda::normalize(d_src, dst, alpha, beta, norm_type, type, cv::cuda::GpuMat(), d_norm_buf, d_cvt_buf);
 
         GPU_SANITY_CHECK(dst, 1e-6);
     }
@@ -444,12 +444,12 @@ PERF_TEST_P(Sz, MeanStdDev,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_buf;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat d_buf;
         cv::Scalar gpu_mean;
         cv::Scalar gpu_stddev;
 
-        TEST_CYCLE() cv::gpu::meanStdDev(d_src, gpu_mean, gpu_stddev, d_buf);
+        TEST_CYCLE() cv::cuda::meanStdDev(d_src, gpu_mean, gpu_stddev, d_buf);
 
         SANITY_CHECK(gpu_mean);
         SANITY_CHECK(gpu_stddev);
diff --git a/modules/gpuarithm/src/arithm.cpp b/modules/gpuarithm/src/arithm.cpp
index eb7d710e6e..7206e58ec2 100644
--- a/modules/gpuarithm/src/arithm.cpp
+++ b/modules/gpuarithm/src/arithm.cpp
@@ -43,18 +43,18 @@
 #include "precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
 
-void cv::gpu::gemm(InputArray, InputArray, double, InputArray, double, OutputArray, int, Stream&) { throw_no_cuda(); }
+void cv::cuda::gemm(InputArray, InputArray, double, InputArray, double, OutputArray, int, Stream&) { throw_no_cuda(); }
 
-void cv::gpu::mulSpectrums(InputArray, InputArray, OutputArray, int, bool, Stream&) { throw_no_cuda(); }
-void cv::gpu::mulAndScaleSpectrums(InputArray, InputArray, OutputArray, int, float, bool, Stream&) { throw_no_cuda(); }
+void cv::cuda::mulSpectrums(InputArray, InputArray, OutputArray, int, bool, Stream&) { throw_no_cuda(); }
+void cv::cuda::mulAndScaleSpectrums(InputArray, InputArray, OutputArray, int, float, bool, Stream&) { throw_no_cuda(); }
 
-void cv::gpu::dft(InputArray, OutputArray, Size, int, Stream&) { throw_no_cuda(); }
+void cv::cuda::dft(InputArray, OutputArray, Size, int, Stream&) { throw_no_cuda(); }
 
-Ptr<Convolution> cv::gpu::createConvolution(Size) { throw_no_cuda(); return Ptr<Convolution>(); }
+Ptr<Convolution> cv::cuda::createConvolution(Size) { throw_no_cuda(); return Ptr<Convolution>(); }
 
 #else /* !defined (HAVE_CUDA) */
 
@@ -162,7 +162,7 @@ namespace
 ////////////////////////////////////////////////////////////////////////
 // gemm
 
-void cv::gpu::gemm(InputArray _src1, InputArray _src2, double alpha, InputArray _src3, double beta, OutputArray _dst, int flags, Stream& stream)
+void cv::cuda::gemm(InputArray _src1, InputArray _src2, double alpha, InputArray _src3, double beta, OutputArray _dst, int flags, Stream& stream)
 {
 #ifndef HAVE_CUBLAS
     (void) _src1;
@@ -221,7 +221,7 @@ void cv::gpu::gemm(InputArray _src1, InputArray _src2, double alpha, InputArray
         {
             if (tr3)
             {
-                gpu::transpose(src3, dst, stream);
+                cuda::transpose(src3, dst, stream);
             }
             else
             {
@@ -297,7 +297,7 @@ void cv::gpu::gemm(InputArray _src1, InputArray _src2, double alpha, InputArray
 
 #ifdef HAVE_CUFFT
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     void mulSpectrums(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, PtrStepSz<cufftComplex> c, cudaStream_t stream);
 
@@ -306,7 +306,7 @@ namespace cv { namespace gpu { namespace cudev
 
 #endif
 
-void cv::gpu::mulSpectrums(InputArray _src1, InputArray _src2, OutputArray _dst, int flags, bool conjB, Stream& stream)
+void cv::cuda::mulSpectrums(InputArray _src1, InputArray _src2, OutputArray _dst, int flags, bool conjB, Stream& stream)
 {
 #ifndef HAVE_CUFFT
     (void) _src1;
@@ -341,7 +341,7 @@ void cv::gpu::mulSpectrums(InputArray _src1, InputArray _src2, OutputArray _dst,
 
 #ifdef HAVE_CUFFT
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     void mulAndScaleSpectrums(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, PtrStepSz<cufftComplex> c, cudaStream_t stream);
 
@@ -350,7 +350,7 @@ namespace cv { namespace gpu { namespace cudev
 
 #endif
 
-void cv::gpu::mulAndScaleSpectrums(InputArray _src1, InputArray _src2, OutputArray _dst, int flags, float scale, bool conjB, Stream& stream)
+void cv::cuda::mulAndScaleSpectrums(InputArray _src1, InputArray _src2, OutputArray _dst, int flags, float scale, bool conjB, Stream& stream)
 {
 #ifndef HAVE_CUFFT
     (void) _src1;
@@ -384,7 +384,7 @@ void cv::gpu::mulAndScaleSpectrums(InputArray _src1, InputArray _src2, OutputArr
 //////////////////////////////////////////////////////////////////////////////
 // dft
 
-void cv::gpu::dft(InputArray _src, OutputArray _dst, Size dft_size, int flags, Stream& stream)
+void cv::cuda::dft(InputArray _src, OutputArray _dst, Size dft_size, int flags, Stream& stream)
 {
 #ifndef HAVE_CUFFT
     (void) _src;
@@ -478,7 +478,7 @@ void cv::gpu::dft(InputArray _src, OutputArray _dst, Size dft_size, int flags, S
     cufftSafeCall( cufftDestroy(plan) );
 
     if (is_scaled_dft)
-        gpu::multiply(_dst, Scalar::all(1. / dft_size.area()), _dst, 1, -1, stream);
+        cuda::multiply(_dst, Scalar::all(1. / dft_size.area()), _dst, 1, -1, stream);
 
 #endif
 }
@@ -580,7 +580,7 @@ namespace
         cufftSafeCall( cufftSetStream(planC2R, stream) );
 
         GpuMat templ_roi(templ.size(), CV_32FC1, templ.data, templ.step);
-        gpu::copyMakeBorder(templ_roi, templ_block, 0, templ_block.rows - templ_roi.rows, 0,
+        cuda::copyMakeBorder(templ_roi, templ_block, 0, templ_block.rows - templ_roi.rows, 0,
                             templ_block.cols - templ_roi.cols, 0, Scalar(), _stream);
 
         cufftSafeCall( cufftExecR2C(planR2C, templ_block.ptr<cufftReal>(), templ_spect.ptr<cufftComplex>()) );
@@ -594,12 +594,12 @@ namespace
                                     std::min(y + dft_size.height, image.rows) - y);
                 GpuMat image_roi(image_roi_size, CV_32F, (void*)(image.ptr<float>(y) + x),
                                  image.step);
-                gpu::copyMakeBorder(image_roi, image_block, 0, image_block.rows - image_roi.rows,
+                cuda::copyMakeBorder(image_roi, image_block, 0, image_block.rows - image_roi.rows,
                                     0, image_block.cols - image_roi.cols, 0, Scalar(), _stream);
 
                 cufftSafeCall(cufftExecR2C(planR2C, image_block.ptr<cufftReal>(),
                                            image_spect.ptr<cufftComplex>()));
-                gpu::mulAndScaleSpectrums(image_spect, templ_spect, result_spect, 0,
+                cuda::mulAndScaleSpectrums(image_spect, templ_spect, result_spect, 0,
                                           1.f / dft_size.area(), ccorr, _stream);
                 cufftSafeCall(cufftExecC2R(planC2R, result_spect.ptr<cufftComplex>(),
                                            result_data.ptr<cufftReal>()));
@@ -622,7 +622,7 @@ namespace
 
 #endif
 
-Ptr<Convolution> cv::gpu::createConvolution(Size user_block_size)
+Ptr<Convolution> cv::cuda::createConvolution(Size user_block_size)
 {
 #ifndef HAVE_CUFFT
     (void) user_block_size;
diff --git a/modules/gpuarithm/src/core.cpp b/modules/gpuarithm/src/core.cpp
index 22887796ab..e611b36dfc 100644
--- a/modules/gpuarithm/src/core.cpp
+++ b/modules/gpuarithm/src/core.cpp
@@ -43,30 +43,30 @@
 #include "precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
 
-void cv::gpu::merge(const GpuMat*, size_t, OutputArray, Stream&) { throw_no_cuda(); }
-void cv::gpu::merge(const std::vector<GpuMat>&, OutputArray, Stream&) { throw_no_cuda(); }
+void cv::cuda::merge(const GpuMat*, size_t, OutputArray, Stream&) { throw_no_cuda(); }
+void cv::cuda::merge(const std::vector<GpuMat>&, OutputArray, Stream&) { throw_no_cuda(); }
 
-void cv::gpu::split(InputArray, GpuMat*, Stream&) { throw_no_cuda(); }
-void cv::gpu::split(InputArray, std::vector<GpuMat>&, Stream&) { throw_no_cuda(); }
+void cv::cuda::split(InputArray, GpuMat*, Stream&) { throw_no_cuda(); }
+void cv::cuda::split(InputArray, std::vector<GpuMat>&, Stream&) { throw_no_cuda(); }
 
-void cv::gpu::transpose(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
+void cv::cuda::transpose(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
 
-void cv::gpu::flip(InputArray, OutputArray, int, Stream&) { throw_no_cuda(); }
+void cv::cuda::flip(InputArray, OutputArray, int, Stream&) { throw_no_cuda(); }
 
-Ptr<LookUpTable> cv::gpu::createLookUpTable(InputArray) { throw_no_cuda(); return Ptr<LookUpTable>(); }
+Ptr<LookUpTable> cv::cuda::createLookUpTable(InputArray) { throw_no_cuda(); return Ptr<LookUpTable>(); }
 
-void cv::gpu::copyMakeBorder(InputArray, OutputArray, int, int, int, int, int, Scalar, Stream&) { throw_no_cuda(); }
+void cv::cuda::copyMakeBorder(InputArray, OutputArray, int, int, int, int, int, Scalar, Stream&) { throw_no_cuda(); }
 
 #else /* !defined (HAVE_CUDA) */
 
 ////////////////////////////////////////////////////////////////////////
 // merge/split
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace split_merge
     {
@@ -112,7 +112,7 @@ namespace
                 src_as_devmem[i] = src[i];
 
             PtrStepSzb dst_as_devmem(dst);
-            cv::gpu::cudev::split_merge::merge(src_as_devmem, dst_as_devmem, (int)n, CV_ELEM_SIZE(depth), StreamAccessor::getStream(stream));
+            cv::cuda::cudev::split_merge::merge(src_as_devmem, dst_as_devmem, (int)n, CV_ELEM_SIZE(depth), StreamAccessor::getStream(stream));
         }
     }
 
@@ -145,28 +145,28 @@ namespace
             dst_as_devmem[i] = dst[i];
 
         PtrStepSzb src_as_devmem(src);
-        cv::gpu::cudev::split_merge::split(src_as_devmem, dst_as_devmem, num_channels, src.elemSize1(), StreamAccessor::getStream(stream));
+        cv::cuda::cudev::split_merge::split(src_as_devmem, dst_as_devmem, num_channels, src.elemSize1(), StreamAccessor::getStream(stream));
     }
 }
 
-void cv::gpu::merge(const GpuMat* src, size_t n, OutputArray dst, Stream& stream)
+void cv::cuda::merge(const GpuMat* src, size_t n, OutputArray dst, Stream& stream)
 {
     merge_caller(src, n, dst, stream);
 }
 
 
-void cv::gpu::merge(const std::vector<GpuMat>& src, OutputArray dst, Stream& stream)
+void cv::cuda::merge(const std::vector<GpuMat>& src, OutputArray dst, Stream& stream)
 {
     merge_caller(&src[0], src.size(), dst, stream);
 }
 
-void cv::gpu::split(InputArray _src, GpuMat* dst, Stream& stream)
+void cv::cuda::split(InputArray _src, GpuMat* dst, Stream& stream)
 {
     GpuMat src = _src.getGpuMat();
     split_caller(src, dst, stream);
 }
 
-void cv::gpu::split(InputArray _src, std::vector<GpuMat>& dst, Stream& stream)
+void cv::cuda::split(InputArray _src, std::vector<GpuMat>& dst, Stream& stream)
 {
     GpuMat src = _src.getGpuMat();
     dst.resize(src.channels());
@@ -182,7 +182,7 @@ namespace arithm
     template <typename T> void transpose(PtrStepSz<T> src, PtrStepSz<T> dst, cudaStream_t stream);
 }
 
-void cv::gpu::transpose(InputArray _src, OutputArray _dst, Stream& _stream)
+void cv::cuda::transpose(InputArray _src, OutputArray _dst, Stream& _stream)
 {
     GpuMat src = _src.getGpuMat();
 
@@ -263,7 +263,7 @@ namespace
     };
 }
 
-void cv::gpu::flip(InputArray _src, OutputArray _dst, int flipCode, Stream& stream)
+void cv::cuda::flip(InputArray _src, OutputArray _dst, int flipCode, Stream& stream)
 {
     typedef void (*func_t)(const GpuMat& src, GpuMat& dst, int flipCode, cudaStream_t stream);
     static const func_t funcs[6][4] =
@@ -349,7 +349,7 @@ namespace
         }
         else
         {
-            gpu::split(d_nppLut, d_nppLut3);
+            cuda::split(d_nppLut, d_nppLut3);
 
             pValues3[0] = d_nppLut3[0].ptr<Npp32s>();
             pValues3[1] = d_nppLut3[1].ptr<Npp32s>();
@@ -495,7 +495,7 @@ namespace
 
 #endif //  (CUDA_VERSION >= 5000)
 
-Ptr<LookUpTable> cv::gpu::createLookUpTable(InputArray lut)
+Ptr<LookUpTable> cv::cuda::createLookUpTable(InputArray lut)
 {
     return new LookUpTableImpl(lut);
 }
@@ -503,7 +503,7 @@ Ptr<LookUpTable> cv::gpu::createLookUpTable(InputArray lut)
 ////////////////////////////////////////////////////////////////////////
 // copyMakeBorder
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace imgproc
     {
@@ -515,7 +515,7 @@ namespace
 {
     template <typename T, int cn> void copyMakeBorder_caller(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderType, const Scalar& value, cudaStream_t stream)
     {
-        using namespace ::cv::gpu::cudev::imgproc;
+        using namespace ::cv::cuda::cudev::imgproc;
 
         Scalar_<T> val(saturate_cast<T>(value[0]), saturate_cast<T>(value[1]), saturate_cast<T>(value[2]), saturate_cast<T>(value[3]));
 
@@ -529,7 +529,7 @@ typedef Npp32s __attribute__((__may_alias__)) Npp32s_a;
 typedef Npp32s Npp32s_a;
 #endif
 
-void cv::gpu::copyMakeBorder(InputArray _src, OutputArray _dst, int top, int bottom, int left, int right, int borderType, Scalar value, Stream& _stream)
+void cv::cuda::copyMakeBorder(InputArray _src, OutputArray _dst, int top, int bottom, int left, int right, int borderType, Scalar value, Stream& _stream)
 {
     GpuMat src = _src.getGpuMat();
 
diff --git a/modules/gpuarithm/src/cuda/absdiff_mat.cu b/modules/gpuarithm/src/cuda/absdiff_mat.cu
index d47068ee03..bdbb2c2f6c 100644
--- a/modules/gpuarithm/src/cuda/absdiff_mat.cu
+++ b/modules/gpuarithm/src/cuda/absdiff_mat.cu
@@ -50,8 +50,8 @@
 
 #include "arithm_func_traits.hpp"
 
-using namespace cv::gpu;
-using namespace cv::gpu::cudev;
+using namespace cv::cuda;
+using namespace cv::cuda::cudev;
 
 namespace arithm
 {
@@ -102,7 +102,7 @@ namespace arithm
     };
 }
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     template <> struct TransformFunctorTraits< arithm::VAbsDiff4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
     {
diff --git a/modules/gpuarithm/src/cuda/absdiff_scalar.cu b/modules/gpuarithm/src/cuda/absdiff_scalar.cu
index 5a89791f86..f2b6ad0972 100644
--- a/modules/gpuarithm/src/cuda/absdiff_scalar.cu
+++ b/modules/gpuarithm/src/cuda/absdiff_scalar.cu
@@ -50,8 +50,8 @@
 
 #include "arithm_func_traits.hpp"
 
-using namespace cv::gpu;
-using namespace cv::gpu::cudev;
+using namespace cv::cuda;
+using namespace cv::cuda::cudev;
 
 namespace arithm
 {
@@ -69,7 +69,7 @@ namespace arithm
     };
 }
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     template <typename T, typename S> struct TransformFunctorTraits< arithm::AbsDiffScalar<T, S> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
     {
diff --git a/modules/gpuarithm/src/cuda/add_mat.cu b/modules/gpuarithm/src/cuda/add_mat.cu
index 3b1bc1f385..d85ab9f5ff 100644
--- a/modules/gpuarithm/src/cuda/add_mat.cu
+++ b/modules/gpuarithm/src/cuda/add_mat.cu
@@ -50,8 +50,8 @@
 
 #include "arithm_func_traits.hpp"
 
-using namespace cv::gpu;
-using namespace cv::gpu::cudev;
+using namespace cv::cuda;
+using namespace cv::cuda::cudev;
 
 namespace arithm
 {
@@ -89,7 +89,7 @@ namespace arithm
     };
 }
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     template <> struct TransformFunctorTraits< arithm::VAdd4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
     {
diff --git a/modules/gpuarithm/src/cuda/add_scalar.cu b/modules/gpuarithm/src/cuda/add_scalar.cu
index 3362c2b930..1065ac3f70 100644
--- a/modules/gpuarithm/src/cuda/add_scalar.cu
+++ b/modules/gpuarithm/src/cuda/add_scalar.cu
@@ -50,8 +50,8 @@
 
 #include "arithm_func_traits.hpp"
 
-using namespace cv::gpu;
-using namespace cv::gpu::cudev;
+using namespace cv::cuda;
+using namespace cv::cuda::cudev;
 
 namespace arithm
 {
@@ -68,7 +68,7 @@ namespace arithm
     };
 }
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::AddScalar<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
     {
diff --git a/modules/gpuarithm/src/cuda/add_weighted.cu b/modules/gpuarithm/src/cuda/add_weighted.cu
index bf632d68f2..ce6a13784d 100644
--- a/modules/gpuarithm/src/cuda/add_weighted.cu
+++ b/modules/gpuarithm/src/cuda/add_weighted.cu
@@ -49,8 +49,8 @@
 
 #include "arithm_func_traits.hpp"
 
-using namespace cv::gpu;
-using namespace cv::gpu::cudev;
+using namespace cv::cuda;
+using namespace cv::cuda::cudev;
 
 namespace arithm
 {
@@ -100,7 +100,7 @@ namespace arithm
     };
 }
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     template <typename T1, typename T2, typename D, size_t src1_size, size_t src2_size, size_t dst_size> struct AddWeightedTraits : DefaultTransformFunctorTraits< arithm::AddWeighted<T1, T2, D> >
     {
diff --git a/modules/gpuarithm/src/cuda/bitwise_mat.cu b/modules/gpuarithm/src/cuda/bitwise_mat.cu
index 40222214ea..e16b5d9e7f 100644
--- a/modules/gpuarithm/src/cuda/bitwise_mat.cu
+++ b/modules/gpuarithm/src/cuda/bitwise_mat.cu
@@ -50,10 +50,10 @@
 
 #include "arithm_func_traits.hpp"
 
-using namespace cv::gpu;
-using namespace cv::gpu::cudev;
+using namespace cv::cuda;
+using namespace cv::cuda::cudev;
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     template <typename T> struct TransformFunctorTraits< bit_not<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
     {
diff --git a/modules/gpuarithm/src/cuda/bitwise_scalar.cu b/modules/gpuarithm/src/cuda/bitwise_scalar.cu
index 145885024a..411133d87d 100644
--- a/modules/gpuarithm/src/cuda/bitwise_scalar.cu
+++ b/modules/gpuarithm/src/cuda/bitwise_scalar.cu
@@ -50,10 +50,10 @@
 
 #include "arithm_func_traits.hpp"
 
-using namespace cv::gpu;
-using namespace cv::gpu::cudev;
+using namespace cv::cuda;
+using namespace cv::cuda::cudev;
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     template <typename T> struct TransformFunctorTraits< binder2nd< bit_and<T> > > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
     {
@@ -72,17 +72,17 @@ namespace arithm
 {
     template <typename T> void bitScalarAnd(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream)
     {
-        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::gpu::cudev::bind2nd(bit_and<T>(), src2), WithOutMask(), stream);
+        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::cuda::cudev::bind2nd(bit_and<T>(), src2), WithOutMask(), stream);
     }
 
     template <typename T> void bitScalarOr(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream)
     {
-        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::gpu::cudev::bind2nd(bit_or<T>(), src2), WithOutMask(), stream);
+        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::cuda::cudev::bind2nd(bit_or<T>(), src2), WithOutMask(), stream);
     }
 
     template <typename T> void bitScalarXor(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream)
     {
-        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::gpu::cudev::bind2nd(bit_xor<T>(), src2), WithOutMask(), stream);
+        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::cuda::cudev::bind2nd(bit_xor<T>(), src2), WithOutMask(), stream);
     }
 
     template void bitScalarAnd<uchar>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
diff --git a/modules/gpuarithm/src/cuda/cmp_mat.cu b/modules/gpuarithm/src/cuda/cmp_mat.cu
index 6602edf62f..281831a4e1 100644
--- a/modules/gpuarithm/src/cuda/cmp_mat.cu
+++ b/modules/gpuarithm/src/cuda/cmp_mat.cu
@@ -50,8 +50,8 @@
 
 #include "arithm_func_traits.hpp"
 
-using namespace cv::gpu;
-using namespace cv::gpu::cudev;
+using namespace cv::cuda;
+using namespace cv::cuda::cudev;
 
 namespace arithm
 {
@@ -107,7 +107,7 @@ namespace arithm
     };
 }
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     template <> struct TransformFunctorTraits< arithm::VCmpEq4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
     {
diff --git a/modules/gpuarithm/src/cuda/cmp_scalar.cu b/modules/gpuarithm/src/cuda/cmp_scalar.cu
index 678f253ea8..58eb83ed04 100644
--- a/modules/gpuarithm/src/cuda/cmp_scalar.cu
+++ b/modules/gpuarithm/src/cuda/cmp_scalar.cu
@@ -51,8 +51,8 @@
 
 #include "arithm_func_traits.hpp"
 
-using namespace cv::gpu;
-using namespace cv::gpu::cudev;
+using namespace cv::cuda;
+using namespace cv::cuda::cudev;
 
 namespace arithm
 {
@@ -125,7 +125,7 @@ namespace arithm
 #undef TYPE_VEC
 }
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     template <class Op, typename T> struct TransformFunctorTraits< arithm::CmpScalar<Op, T, 1> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(uchar)>
     {
diff --git a/modules/gpuarithm/src/cuda/copy_make_border.cu b/modules/gpuarithm/src/cuda/copy_make_border.cu
index d772e09ede..f7d93feec5 100644
--- a/modules/gpuarithm/src/cuda/copy_make_border.cu
+++ b/modules/gpuarithm/src/cuda/copy_make_border.cu
@@ -45,7 +45,7 @@
 #include "opencv2/core/cuda/common.hpp"
 #include "opencv2/core/cuda/border_interpolate.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace imgproc
     {
@@ -126,6 +126,6 @@ namespace cv { namespace gpu { namespace cudev
         template void copyMakeBorder_gpu<float, 3>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);
         template void copyMakeBorder_gpu<float, 4>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);
     } // namespace imgproc
-}}} // namespace cv { namespace gpu { namespace cudev
+}}} // namespace cv { namespace cuda { namespace cudev
 
 #endif /* CUDA_DISABLER */
diff --git a/modules/gpuarithm/src/cuda/countnonzero.cu b/modules/gpuarithm/src/cuda/countnonzero.cu
index beab82b4b0..48b201fdb1 100644
--- a/modules/gpuarithm/src/cuda/countnonzero.cu
+++ b/modules/gpuarithm/src/cuda/countnonzero.cu
@@ -49,8 +49,8 @@
 #include "opencv2/core/cuda/reduce.hpp"
 #include "opencv2/core/cuda/emulation.hpp"
 
-using namespace cv::gpu;
-using namespace cv::gpu::cudev;
+using namespace cv::cuda;
+using namespace cv::cuda::cudev;
 
 namespace countNonZero
 {
diff --git a/modules/gpuarithm/src/cuda/div_mat.cu b/modules/gpuarithm/src/cuda/div_mat.cu
index aab6638900..604b63649a 100644
--- a/modules/gpuarithm/src/cuda/div_mat.cu
+++ b/modules/gpuarithm/src/cuda/div_mat.cu
@@ -50,8 +50,8 @@
 
 #include "arithm_func_traits.hpp"
 
-using namespace cv::gpu;
-using namespace cv::gpu::cudev;
+using namespace cv::cuda;
+using namespace cv::cuda::cudev;
 
 namespace arithm
 {
@@ -128,7 +128,7 @@ namespace arithm
     };
 }
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     template <> struct TransformFunctorTraits<arithm::Div_8uc4_32f> : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
     {
diff --git a/modules/gpuarithm/src/cuda/div_scalar.cu b/modules/gpuarithm/src/cuda/div_scalar.cu
index 464c4adf87..1fa13b4a35 100644
--- a/modules/gpuarithm/src/cuda/div_scalar.cu
+++ b/modules/gpuarithm/src/cuda/div_scalar.cu
@@ -50,8 +50,8 @@
 
 #include "arithm_func_traits.hpp"
 
-using namespace cv::gpu;
-using namespace cv::gpu::cudev;
+using namespace cv::cuda;
+using namespace cv::cuda::cudev;
 
 namespace arithm
 {
@@ -80,7 +80,7 @@ namespace arithm
     };
 }
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::DivScalar<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
     {
diff --git a/modules/gpuarithm/src/cuda/integral.cu b/modules/gpuarithm/src/cuda/integral.cu
index d8276b2c9a..234b7cd329 100644
--- a/modules/gpuarithm/src/cuda/integral.cu
+++ b/modules/gpuarithm/src/cuda/integral.cu
@@ -44,7 +44,7 @@
 
 #include "opencv2/core/cuda/common.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace imgproc
     {
diff --git a/modules/gpuarithm/src/cuda/math.cu b/modules/gpuarithm/src/cuda/math.cu
index 1f2e7b8a14..7651057255 100644
--- a/modules/gpuarithm/src/cuda/math.cu
+++ b/modules/gpuarithm/src/cuda/math.cu
@@ -52,13 +52,13 @@
 
 #include "arithm_func_traits.hpp"
 
-using namespace cv::gpu;
-using namespace cv::gpu::cudev;
+using namespace cv::cuda;
+using namespace cv::cuda::cudev;
 
 //////////////////////////////////////////////////////////////////////////
 // absMat
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     template <typename T> struct TransformFunctorTraits< abs_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
     {
@@ -99,7 +99,7 @@ namespace arithm
     };
 }
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     template <typename T> struct TransformFunctorTraits< arithm::Sqr<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
     {
@@ -126,7 +126,7 @@ namespace arithm
 //////////////////////////////////////////////////////////////////////////
 // sqrtMat
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     template <typename T> struct TransformFunctorTraits< sqrt_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
     {
@@ -153,7 +153,7 @@ namespace arithm
 //////////////////////////////////////////////////////////////////////////
 // logMat
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     template <typename T> struct TransformFunctorTraits< log_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
     {
@@ -195,7 +195,7 @@ namespace arithm
     };
 }
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     template <typename T> struct TransformFunctorTraits< arithm::Exp<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
     {
@@ -275,7 +275,7 @@ namespace arithm
     };
 }
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     template <typename T> struct TransformFunctorTraits< arithm::PowOp<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
     {
diff --git a/modules/gpuarithm/src/cuda/minmax.cu b/modules/gpuarithm/src/cuda/minmax.cu
index 1bdeb7ddd6..249c21b87c 100644
--- a/modules/gpuarithm/src/cuda/minmax.cu
+++ b/modules/gpuarithm/src/cuda/minmax.cu
@@ -51,8 +51,8 @@
 #include "opencv2/core/cuda/limits.hpp"
 #include "opencv2/core/cuda/utility.hpp"
 
-using namespace cv::gpu;
-using namespace cv::gpu::cudev;
+using namespace cv::cuda;
+using namespace cv::cuda::cudev;
 
 namespace minMax
 {
diff --git a/modules/gpuarithm/src/cuda/minmax_mat.cu b/modules/gpuarithm/src/cuda/minmax_mat.cu
index 0bf5a468d9..89db8fd14e 100644
--- a/modules/gpuarithm/src/cuda/minmax_mat.cu
+++ b/modules/gpuarithm/src/cuda/minmax_mat.cu
@@ -50,8 +50,8 @@
 
 #include "arithm_func_traits.hpp"
 
-using namespace cv::gpu;
-using namespace cv::gpu::cudev;
+using namespace cv::cuda;
+using namespace cv::cuda::cudev;
 
 //////////////////////////////////////////////////////////////////////////
 // min
@@ -81,7 +81,7 @@ namespace arithm
     };
 }
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     template <> struct TransformFunctorTraits< arithm::VMin4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
     {
@@ -127,7 +127,7 @@ namespace arithm
 
     template <typename T> void minScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream)
     {
-        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::gpu::cudev::bind2nd(minimum<T>(), src2), WithOutMask(), stream);
+        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::cuda::cudev::bind2nd(minimum<T>(), src2), WithOutMask(), stream);
     }
 
     template void minScalar<uchar >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
@@ -167,7 +167,7 @@ namespace arithm
     };
 }
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     template <> struct TransformFunctorTraits< arithm::VMax4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
     {
@@ -213,7 +213,7 @@ namespace arithm
 
     template <typename T> void maxScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream)
     {
-        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::gpu::cudev::bind2nd(maximum<T>(), src2), WithOutMask(), stream);
+        cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::cuda::cudev::bind2nd(maximum<T>(), src2), WithOutMask(), stream);
     }
 
     template void maxScalar<uchar >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
diff --git a/modules/gpuarithm/src/cuda/minmaxloc.cu b/modules/gpuarithm/src/cuda/minmaxloc.cu
index fbd7029861..8648a3f4d4 100644
--- a/modules/gpuarithm/src/cuda/minmaxloc.cu
+++ b/modules/gpuarithm/src/cuda/minmaxloc.cu
@@ -51,8 +51,8 @@
 #include "opencv2/core/cuda/limits.hpp"
 #include "opencv2/core/cuda/utility.hpp"
 
-using namespace cv::gpu;
-using namespace cv::gpu::cudev;
+using namespace cv::cuda;
+using namespace cv::cuda::cudev;
 
 namespace minMaxLoc
 {
diff --git a/modules/gpuarithm/src/cuda/mul_mat.cu b/modules/gpuarithm/src/cuda/mul_mat.cu
index 25bc8a5970..500e98de88 100644
--- a/modules/gpuarithm/src/cuda/mul_mat.cu
+++ b/modules/gpuarithm/src/cuda/mul_mat.cu
@@ -50,8 +50,8 @@
 
 #include "arithm_func_traits.hpp"
 
-using namespace cv::gpu;
-using namespace cv::gpu::cudev;
+using namespace cv::cuda;
+using namespace cv::cuda::cudev;
 
 namespace arithm
 {
@@ -109,7 +109,7 @@ namespace arithm
     };
 }
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     template <> struct TransformFunctorTraits<arithm::Mul_8uc4_32f> : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
     {
diff --git a/modules/gpuarithm/src/cuda/mul_scalar.cu b/modules/gpuarithm/src/cuda/mul_scalar.cu
index 6546550275..579db188b3 100644
--- a/modules/gpuarithm/src/cuda/mul_scalar.cu
+++ b/modules/gpuarithm/src/cuda/mul_scalar.cu
@@ -50,8 +50,8 @@
 
 #include "arithm_func_traits.hpp"
 
-using namespace cv::gpu;
-using namespace cv::gpu::cudev;
+using namespace cv::cuda;
+using namespace cv::cuda::cudev;
 
 namespace arithm
 {
@@ -68,7 +68,7 @@ namespace arithm
     };
 }
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::MulScalar<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
     {
diff --git a/modules/gpuarithm/src/cuda/mul_spectrums.cu b/modules/gpuarithm/src/cuda/mul_spectrums.cu
index 1b58b8ca7f..e62768a9df 100644
--- a/modules/gpuarithm/src/cuda/mul_spectrums.cu
+++ b/modules/gpuarithm/src/cuda/mul_spectrums.cu
@@ -50,7 +50,7 @@
 
 #include "opencv2/core/cuda/common.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     //////////////////////////////////////////////////////////////////////////
     // mulSpectrums
@@ -164,7 +164,7 @@ namespace cv { namespace gpu { namespace cudev
         if (stream == 0)
             cudaSafeCall( cudaDeviceSynchronize() );
     }
-}}} // namespace cv { namespace gpu { namespace cudev
+}}} // namespace cv { namespace cuda { namespace cudev
 
 #endif // HAVE_CUFFT
 
diff --git a/modules/gpuarithm/src/cuda/polar_cart.cu b/modules/gpuarithm/src/cuda/polar_cart.cu
index 88626c5d2e..a39ccf999e 100644
--- a/modules/gpuarithm/src/cuda/polar_cart.cu
+++ b/modules/gpuarithm/src/cuda/polar_cart.cu
@@ -44,7 +44,7 @@
 
 #include "opencv2/core/cuda/common.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace mathfunc
     {
@@ -212,6 +212,6 @@ namespace cv { namespace gpu { namespace cudev
             callers[mag.data == 0](mag, angle, x, y, angleInDegrees, stream);
         }
     } // namespace mathfunc
-}}} // namespace cv { namespace gpu { namespace cudev
+}}} // namespace cv { namespace cuda { namespace cudev
 
 #endif /* CUDA_DISABLER */
diff --git a/modules/gpuarithm/src/cuda/reduce.cu b/modules/gpuarithm/src/cuda/reduce.cu
index 8588a3b234..3f6c242c32 100644
--- a/modules/gpuarithm/src/cuda/reduce.cu
+++ b/modules/gpuarithm/src/cuda/reduce.cu
@@ -52,8 +52,8 @@
 
 #include "unroll_detail.hpp"
 
-using namespace cv::gpu;
-using namespace cv::gpu::cudev;
+using namespace cv::cuda;
+using namespace cv::cuda::cudev;
 
 namespace reduce
 {
diff --git a/modules/gpuarithm/src/cuda/split_merge.cu b/modules/gpuarithm/src/cuda/split_merge.cu
index 388441c634..c8c574f72a 100644
--- a/modules/gpuarithm/src/cuda/split_merge.cu
+++ b/modules/gpuarithm/src/cuda/split_merge.cu
@@ -44,7 +44,7 @@
 
 #include "opencv2/core/cuda/common.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace split_merge
     {
@@ -505,7 +505,7 @@ namespace cv { namespace gpu { namespace cudev
             split_func(src, dst, stream);
         }
     } // namespace split_merge
-}}} // namespace cv { namespace gpu { namespace cudev
+}}} // namespace cv { namespace cuda { namespace cudev
 
 
 #endif /* CUDA_DISABLER */
diff --git a/modules/gpuarithm/src/cuda/sub_mat.cu b/modules/gpuarithm/src/cuda/sub_mat.cu
index 077eafb356..baafdc2101 100644
--- a/modules/gpuarithm/src/cuda/sub_mat.cu
+++ b/modules/gpuarithm/src/cuda/sub_mat.cu
@@ -50,8 +50,8 @@
 
 #include "arithm_func_traits.hpp"
 
-using namespace cv::gpu;
-using namespace cv::gpu::cudev;
+using namespace cv::cuda;
+using namespace cv::cuda::cudev;
 
 namespace arithm
 {
@@ -89,7 +89,7 @@ namespace arithm
     };
 }
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     template <> struct TransformFunctorTraits< arithm::VSub4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
     {
diff --git a/modules/gpuarithm/src/cuda/sub_scalar.cu b/modules/gpuarithm/src/cuda/sub_scalar.cu
index 619ab4310f..5940423775 100644
--- a/modules/gpuarithm/src/cuda/sub_scalar.cu
+++ b/modules/gpuarithm/src/cuda/sub_scalar.cu
@@ -50,8 +50,8 @@
 
 #include "arithm_func_traits.hpp"
 
-using namespace cv::gpu;
-using namespace cv::gpu::cudev;
+using namespace cv::cuda;
+using namespace cv::cuda::cudev;
 
 namespace arithm
 {
@@ -69,7 +69,7 @@ namespace arithm
     };
 }
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::SubScalar<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
     {
diff --git a/modules/gpuarithm/src/cuda/sum.cu b/modules/gpuarithm/src/cuda/sum.cu
index 2af7692061..99541869f3 100644
--- a/modules/gpuarithm/src/cuda/sum.cu
+++ b/modules/gpuarithm/src/cuda/sum.cu
@@ -52,8 +52,8 @@
 
 #include "unroll_detail.hpp"
 
-using namespace cv::gpu;
-using namespace cv::gpu::cudev;
+using namespace cv::cuda;
+using namespace cv::cuda::cudev;
 
 namespace sum
 {
diff --git a/modules/gpuarithm/src/cuda/threshold.cu b/modules/gpuarithm/src/cuda/threshold.cu
index 73ce8cee7a..cdaa8ccf6a 100644
--- a/modules/gpuarithm/src/cuda/threshold.cu
+++ b/modules/gpuarithm/src/cuda/threshold.cu
@@ -50,10 +50,10 @@
 
 #include "arithm_func_traits.hpp"
 
-using namespace cv::gpu;
-using namespace cv::gpu::cudev;
+using namespace cv::cuda;
+using namespace cv::cuda::cudev;
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     template <typename T> struct TransformFunctorTraits< thresh_binary_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
     {
diff --git a/modules/gpuarithm/src/cuda/transpose.cu b/modules/gpuarithm/src/cuda/transpose.cu
index b51dc201a3..6320c1b3b5 100644
--- a/modules/gpuarithm/src/cuda/transpose.cu
+++ b/modules/gpuarithm/src/cuda/transpose.cu
@@ -44,8 +44,8 @@
 
 #include "opencv2/core/cuda/common.hpp"
 
-using namespace cv::gpu;
-using namespace cv::gpu::cudev;
+using namespace cv::cuda;
+using namespace cv::cuda::cudev;
 
 namespace arithm
 {
diff --git a/modules/gpuarithm/src/cuda/unroll_detail.hpp b/modules/gpuarithm/src/cuda/unroll_detail.hpp
index 993b10be5a..60492d7d69 100644
--- a/modules/gpuarithm/src/cuda/unroll_detail.hpp
+++ b/modules/gpuarithm/src/cuda/unroll_detail.hpp
@@ -75,11 +75,11 @@ namespace detail
         template <int BLOCK_SIZE, typename R>
         static __device__ __forceinline__ thrust::tuple<volatile R*, volatile R*> smem_tuple(R* smem)
         {
-            return cv::gpu::cudev::smem_tuple(smem, smem + BLOCK_SIZE);
+            return cv::cuda::cudev::smem_tuple(smem, smem + BLOCK_SIZE);
         }
 
         template <typename R>
-        static __device__ __forceinline__ thrust::tuple<typename cv::gpu::cudev::VecTraits<R>::elem_type&, typename cv::gpu::cudev::VecTraits<R>::elem_type&> tie(R& val)
+        static __device__ __forceinline__ thrust::tuple<typename cv::cuda::cudev::VecTraits<R>::elem_type&, typename cv::cuda::cudev::VecTraits<R>::elem_type&> tie(R& val)
         {
             return thrust::tie(val.x, val.y);
         }
@@ -95,11 +95,11 @@ namespace detail
         template <int BLOCK_SIZE, typename R>
         static __device__ __forceinline__ thrust::tuple<volatile R*, volatile R*, volatile R*> smem_tuple(R* smem)
         {
-            return cv::gpu::cudev::smem_tuple(smem, smem + BLOCK_SIZE, smem + 2 * BLOCK_SIZE);
+            return cv::cuda::cudev::smem_tuple(smem, smem + BLOCK_SIZE, smem + 2 * BLOCK_SIZE);
         }
 
         template <typename R>
-        static __device__ __forceinline__ thrust::tuple<typename cv::gpu::cudev::VecTraits<R>::elem_type&, typename cv::gpu::cudev::VecTraits<R>::elem_type&, typename cv::gpu::cudev::VecTraits<R>::elem_type&> tie(R& val)
+        static __device__ __forceinline__ thrust::tuple<typename cv::cuda::cudev::VecTraits<R>::elem_type&, typename cv::cuda::cudev::VecTraits<R>::elem_type&, typename cv::cuda::cudev::VecTraits<R>::elem_type&> tie(R& val)
         {
             return thrust::tie(val.x, val.y, val.z);
         }
@@ -115,11 +115,11 @@ namespace detail
         template <int BLOCK_SIZE, typename R>
         static __device__ __forceinline__ thrust::tuple<volatile R*, volatile R*, volatile R*, volatile R*> smem_tuple(R* smem)
         {
-            return cv::gpu::cudev::smem_tuple(smem, smem + BLOCK_SIZE, smem + 2 * BLOCK_SIZE, smem + 3 * BLOCK_SIZE);
+            return cv::cuda::cudev::smem_tuple(smem, smem + BLOCK_SIZE, smem + 2 * BLOCK_SIZE, smem + 3 * BLOCK_SIZE);
         }
 
         template <typename R>
-        static __device__ __forceinline__ thrust::tuple<typename cv::gpu::cudev::VecTraits<R>::elem_type&, typename cv::gpu::cudev::VecTraits<R>::elem_type&, typename cv::gpu::cudev::VecTraits<R>::elem_type&, typename cv::gpu::cudev::VecTraits<R>::elem_type&> tie(R& val)
+        static __device__ __forceinline__ thrust::tuple<typename cv::cuda::cudev::VecTraits<R>::elem_type&, typename cv::cuda::cudev::VecTraits<R>::elem_type&, typename cv::cuda::cudev::VecTraits<R>::elem_type&, typename cv::cuda::cudev::VecTraits<R>::elem_type&> tie(R& val)
         {
             return thrust::tie(val.x, val.y, val.z, val.w);
         }
diff --git a/modules/gpuarithm/src/element_operations.cpp b/modules/gpuarithm/src/element_operations.cpp
index 20473de381..542ec0d65d 100644
--- a/modules/gpuarithm/src/element_operations.cpp
+++ b/modules/gpuarithm/src/element_operations.cpp
@@ -43,47 +43,47 @@
 #include "precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
 
-void cv::gpu::add(InputArray, InputArray, OutputArray, InputArray, int, Stream&) { throw_no_cuda(); }
-void cv::gpu::subtract(InputArray, InputArray, OutputArray, InputArray, int, Stream&) { throw_no_cuda(); }
-void cv::gpu::multiply(InputArray, InputArray, OutputArray, double, int, Stream&) { throw_no_cuda(); }
-void cv::gpu::divide(InputArray, InputArray, OutputArray, double, int, Stream&) { throw_no_cuda(); }
-void cv::gpu::absdiff(InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda(); }
+void cv::cuda::add(InputArray, InputArray, OutputArray, InputArray, int, Stream&) { throw_no_cuda(); }
+void cv::cuda::subtract(InputArray, InputArray, OutputArray, InputArray, int, Stream&) { throw_no_cuda(); }
+void cv::cuda::multiply(InputArray, InputArray, OutputArray, double, int, Stream&) { throw_no_cuda(); }
+void cv::cuda::divide(InputArray, InputArray, OutputArray, double, int, Stream&) { throw_no_cuda(); }
+void cv::cuda::absdiff(InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda(); }
 
-void cv::gpu::abs(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
-void cv::gpu::sqr(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
-void cv::gpu::sqrt(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
-void cv::gpu::exp(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
-void cv::gpu::log(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
-void cv::gpu::pow(InputArray, double, OutputArray, Stream&) { throw_no_cuda(); }
+void cv::cuda::abs(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
+void cv::cuda::sqr(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
+void cv::cuda::sqrt(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
+void cv::cuda::exp(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
+void cv::cuda::log(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
+void cv::cuda::pow(InputArray, double, OutputArray, Stream&) { throw_no_cuda(); }
 
-void cv::gpu::compare(InputArray, InputArray, OutputArray, int, Stream&) { throw_no_cuda(); }
+void cv::cuda::compare(InputArray, InputArray, OutputArray, int, Stream&) { throw_no_cuda(); }
 
-void cv::gpu::bitwise_not(InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); }
-void cv::gpu::bitwise_or(InputArray, InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); }
-void cv::gpu::bitwise_and(InputArray, InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); }
-void cv::gpu::bitwise_xor(InputArray, InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); }
+void cv::cuda::bitwise_not(InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); }
+void cv::cuda::bitwise_or(InputArray, InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); }
+void cv::cuda::bitwise_and(InputArray, InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); }
+void cv::cuda::bitwise_xor(InputArray, InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); }
 
-void cv::gpu::rshift(InputArray, Scalar_<int>, OutputArray, Stream&) { throw_no_cuda(); }
-void cv::gpu::lshift(InputArray, Scalar_<int>, OutputArray, Stream&) { throw_no_cuda(); }
+void cv::cuda::rshift(InputArray, Scalar_<int>, OutputArray, Stream&) { throw_no_cuda(); }
+void cv::cuda::lshift(InputArray, Scalar_<int>, OutputArray, Stream&) { throw_no_cuda(); }
 
-void cv::gpu::min(InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda(); }
-void cv::gpu::max(InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda(); }
+void cv::cuda::min(InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda(); }
+void cv::cuda::max(InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda(); }
 
-void cv::gpu::addWeighted(InputArray, double, InputArray, double, double, OutputArray, int, Stream&) { throw_no_cuda(); }
+void cv::cuda::addWeighted(InputArray, double, InputArray, double, double, OutputArray, int, Stream&) { throw_no_cuda(); }
 
-double cv::gpu::threshold(InputArray, OutputArray, double, double, int, Stream&) {throw_no_cuda(); return 0.0;}
+double cv::cuda::threshold(InputArray, OutputArray, double, double, int, Stream&) {throw_no_cuda(); return 0.0;}
 
-void cv::gpu::magnitude(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
-void cv::gpu::magnitude(InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda(); }
-void cv::gpu::magnitudeSqr(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
-void cv::gpu::magnitudeSqr(InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda(); }
-void cv::gpu::phase(InputArray, InputArray, OutputArray, bool, Stream&) { throw_no_cuda(); }
-void cv::gpu::cartToPolar(InputArray, InputArray, OutputArray, OutputArray, bool, Stream&) { throw_no_cuda(); }
-void cv::gpu::polarToCart(InputArray, InputArray, OutputArray, OutputArray, bool, Stream&) { throw_no_cuda(); }
+void cv::cuda::magnitude(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
+void cv::cuda::magnitude(InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda(); }
+void cv::cuda::magnitudeSqr(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
+void cv::cuda::magnitudeSqr(InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda(); }
+void cv::cuda::phase(InputArray, InputArray, OutputArray, bool, Stream&) { throw_no_cuda(); }
+void cv::cuda::cartToPolar(InputArray, InputArray, OutputArray, OutputArray, bool, Stream&) { throw_no_cuda(); }
+void cv::cuda::polarToCart(InputArray, InputArray, OutputArray, OutputArray, bool, Stream&) { throw_no_cuda(); }
 
 #else
 
@@ -579,7 +579,7 @@ static void addScalar(const GpuMat& src, Scalar val, bool, GpuMat& dst, const Gp
     func(src, val[0], dst, mask, stream);
 }
 
-void cv::gpu::add(InputArray src1, InputArray src2, OutputArray dst, InputArray mask, int dtype, Stream& stream)
+void cv::cuda::add(InputArray src1, InputArray src2, OutputArray dst, InputArray mask, int dtype, Stream& stream)
 {
     arithm_op(src1, src2, dst, mask, 1.0, dtype, stream, addMat, addScalar);
 }
@@ -830,7 +830,7 @@ static void subScalar(const GpuMat& src, Scalar val, bool inv, GpuMat& dst, cons
     func(src, val[0], inv, dst, mask, stream);
 }
 
-void cv::gpu::subtract(InputArray src1, InputArray src2, OutputArray dst, InputArray mask, int dtype, Stream& stream)
+void cv::cuda::subtract(InputArray src1, InputArray src2, OutputArray dst, InputArray mask, int dtype, Stream& stream)
 {
     arithm_op(src1, src2, dst, mask, 1.0, dtype, stream, subMat, subScalar);
 }
@@ -1052,7 +1052,7 @@ static void mulScalar(const GpuMat& src, Scalar val, bool, GpuMat& dst, const Gp
     func(src, val[0], dst, stream);
 }
 
-void cv::gpu::multiply(InputArray _src1, InputArray _src2, OutputArray _dst, double scale, int dtype, Stream& stream)
+void cv::cuda::multiply(InputArray _src1, InputArray _src2, OutputArray _dst, double scale, int dtype, Stream& stream)
 {
     if (_src1.type() == CV_8UC4 && _src2.type() == CV_32FC1)
     {
@@ -1311,7 +1311,7 @@ static void divScalar(const GpuMat& src, Scalar val, bool inv, GpuMat& dst, cons
     func(src, val[0], inv, dst, stream);
 }
 
-void cv::gpu::divide(InputArray _src1, InputArray _src2, OutputArray _dst, double scale, int dtype, Stream& stream)
+void cv::cuda::divide(InputArray _src1, InputArray _src2, OutputArray _dst, double scale, int dtype, Stream& stream)
 {
     if (_src1.type() == CV_8UC4 && _src2.type() == CV_32FC1)
     {
@@ -1446,7 +1446,7 @@ static void absDiffScalar(const GpuMat& src, Scalar val, bool, GpuMat& dst, cons
     funcs[depth](src, val[0], dst, StreamAccessor::getStream(stream));
 }
 
-void cv::gpu::absdiff(InputArray src1, InputArray src2, OutputArray dst, Stream& stream)
+void cv::cuda::absdiff(InputArray src1, InputArray src2, OutputArray dst, Stream& stream)
 {
     arithm_op(src1, src2, dst, noArray(), 1.0, -1, stream, absDiffMat, absDiffScalar);
 }
@@ -1460,7 +1460,7 @@ namespace arithm
     void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
 }
 
-void cv::gpu::abs(InputArray _src, OutputArray _dst, Stream& stream)
+void cv::cuda::abs(InputArray _src, OutputArray _dst, Stream& stream)
 {
     using namespace arithm;
 
@@ -1504,7 +1504,7 @@ namespace arithm
     void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
 }
 
-void cv::gpu::sqr(InputArray _src, OutputArray _dst, Stream& stream)
+void cv::cuda::sqr(InputArray _src, OutputArray _dst, Stream& stream)
 {
     using namespace arithm;
 
@@ -1548,7 +1548,7 @@ namespace arithm
     void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
 }
 
-void cv::gpu::sqrt(InputArray _src, OutputArray _dst, Stream& stream)
+void cv::cuda::sqrt(InputArray _src, OutputArray _dst, Stream& stream)
 {
     using namespace arithm;
 
@@ -1592,7 +1592,7 @@ namespace arithm
     void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
 }
 
-void cv::gpu::exp(InputArray _src, OutputArray _dst, Stream& stream)
+void cv::cuda::exp(InputArray _src, OutputArray _dst, Stream& stream)
 {
     using namespace arithm;
 
@@ -1636,7 +1636,7 @@ namespace arithm
     void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
 }
 
-void cv::gpu::log(InputArray _src, OutputArray _dst, Stream& stream)
+void cv::cuda::log(InputArray _src, OutputArray _dst, Stream& stream)
 {
     using namespace arithm;
 
@@ -1679,7 +1679,7 @@ namespace arithm
     template<typename T> void pow(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream);
 }
 
-void cv::gpu::pow(InputArray _src, double power, OutputArray _dst, Stream& stream)
+void cv::cuda::pow(InputArray _src, double power, OutputArray _dst, Stream& stream)
 {
     typedef void (*func_t)(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream);
     static const func_t funcs[] =
@@ -1860,7 +1860,7 @@ static void cmpScalar(const GpuMat& src, Scalar val, bool inv, GpuMat& dst, cons
     funcs[depth][cmpop](src, cn, val.val, dst, StreamAccessor::getStream(stream));
 }
 
-void cv::gpu::compare(InputArray src1, InputArray src2, OutputArray dst, int cmpop, Stream& stream)
+void cv::cuda::compare(InputArray src1, InputArray src2, OutputArray dst, int cmpop, Stream& stream)
 {
     arithm_op(src1, src2, dst, noArray(), 1.0, CV_8U, stream, cmpMat, cmpScalar, cmpop);
 }
@@ -1873,7 +1873,7 @@ namespace arithm
     template <typename T> void bitMatNot(PtrStepSzb src, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
 }
 
-void cv::gpu::bitwise_not(InputArray _src, OutputArray _dst, InputArray _mask, Stream& _stream)
+void cv::cuda::bitwise_not(InputArray _src, OutputArray _dst, InputArray _mask, Stream& _stream)
 {
     using namespace arithm;
 
@@ -2122,17 +2122,17 @@ static void bitScalar(const GpuMat& src, Scalar val, bool, GpuMat& dst, const Gp
     funcs[op][depth][cn - 1](src, val, dst, StreamAccessor::getStream(stream));
 }
 
-void cv::gpu::bitwise_or(InputArray src1, InputArray src2, OutputArray dst, InputArray mask, Stream& stream)
+void cv::cuda::bitwise_or(InputArray src1, InputArray src2, OutputArray dst, InputArray mask, Stream& stream)
 {
     arithm_op(src1, src2, dst, mask, 1.0, -1, stream, bitMat, bitScalar, BIT_OP_OR);
 }
 
-void cv::gpu::bitwise_and(InputArray src1, InputArray src2, OutputArray dst, InputArray mask, Stream& stream)
+void cv::cuda::bitwise_and(InputArray src1, InputArray src2, OutputArray dst, InputArray mask, Stream& stream)
 {
     arithm_op(src1, src2, dst, mask, 1.0, -1, stream, bitMat, bitScalar, BIT_OP_AND);
 }
 
-void cv::gpu::bitwise_xor(InputArray src1, InputArray src2, OutputArray dst, InputArray mask, Stream& stream)
+void cv::cuda::bitwise_xor(InputArray src1, InputArray src2, OutputArray dst, InputArray mask, Stream& stream)
 {
     arithm_op(src1, src2, dst, mask, 1.0, -1, stream, bitMat, bitScalar, BIT_OP_XOR);
 }
@@ -2193,7 +2193,7 @@ namespace
     };
 }
 
-void cv::gpu::rshift(InputArray _src, Scalar_<int> val, OutputArray _dst, Stream& stream)
+void cv::cuda::rshift(InputArray _src, Scalar_<int> val, OutputArray _dst, Stream& stream)
 {
     typedef void (*func_t)(const GpuMat& src, Scalar_<Npp32u> sc, GpuMat& dst, cudaStream_t stream);
     static const func_t funcs[5][4] =
@@ -2216,7 +2216,7 @@ void cv::gpu::rshift(InputArray _src, Scalar_<int> val, OutputArray _dst, Stream
     funcs[src.depth()][src.channels() - 1](src, val, dst, StreamAccessor::getStream(stream));
 }
 
-void cv::gpu::lshift(InputArray _src, Scalar_<int> val, OutputArray _dst, Stream& stream)
+void cv::cuda::lshift(InputArray _src, Scalar_<int> val, OutputArray _dst, Stream& stream)
 {
     typedef void (*func_t)(const GpuMat& src, Scalar_<Npp32u> sc, GpuMat& dst, cudaStream_t stream);
     static const func_t funcs[5][4] =
@@ -2404,12 +2404,12 @@ void minMaxScalar(const GpuMat& src, Scalar val, bool, GpuMat& dst, const GpuMat
     funcs[op][depth](src, cast_func[depth](val[0]), dst, StreamAccessor::getStream(stream));
 }
 
-void cv::gpu::min(InputArray src1, InputArray src2, OutputArray dst, Stream& stream)
+void cv::cuda::min(InputArray src1, InputArray src2, OutputArray dst, Stream& stream)
 {
     arithm_op(src1, src2, dst, noArray(), 1.0, -1, stream, minMaxMat, minMaxScalar, MIN_OP);
 }
 
-void cv::gpu::max(InputArray src1, InputArray src2, OutputArray dst, Stream& stream)
+void cv::cuda::max(InputArray src1, InputArray src2, OutputArray dst, Stream& stream)
 {
     arithm_op(src1, src2, dst, noArray(), 1.0, -1, stream, minMaxMat, minMaxScalar, MAX_OP);
 }
@@ -2423,7 +2423,7 @@ namespace arithm
     void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
 }
 
-void cv::gpu::addWeighted(InputArray _src1, double alpha, InputArray _src2, double beta, double gamma, OutputArray _dst, int ddepth, Stream& stream)
+void cv::cuda::addWeighted(InputArray _src1, double alpha, InputArray _src2, double beta, double gamma, OutputArray _dst, int ddepth, Stream& stream)
 {
     typedef void (*func_t)(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
     static const func_t funcs[7][7][7] =
@@ -2934,7 +2934,7 @@ namespace arithm
     void threshold(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream);
 }
 
-double cv::gpu::threshold(InputArray _src, OutputArray _dst, double thresh, double maxVal, int type, Stream& _stream)
+double cv::cuda::threshold(InputArray _src, OutputArray _dst, double thresh, double maxVal, int type, Stream& _stream)
 {
     GpuMat src = _src.getGpuMat();
 
@@ -3018,7 +3018,7 @@ namespace
     }
 }
 
-void cv::gpu::magnitude(InputArray _src, OutputArray _dst, Stream& stream)
+void cv::cuda::magnitude(InputArray _src, OutputArray _dst, Stream& stream)
 {
     GpuMat src = _src.getGpuMat();
 
@@ -3028,7 +3028,7 @@ void cv::gpu::magnitude(InputArray _src, OutputArray _dst, Stream& stream)
     npp_magnitude(src, dst, nppiMagnitude_32fc32f_C1R, StreamAccessor::getStream(stream));
 }
 
-void cv::gpu::magnitudeSqr(InputArray _src, OutputArray _dst, Stream& stream)
+void cv::cuda::magnitudeSqr(InputArray _src, OutputArray _dst, Stream& stream)
 {
     GpuMat src = _src.getGpuMat();
 
@@ -3041,7 +3041,7 @@ void cv::gpu::magnitudeSqr(InputArray _src, OutputArray _dst, Stream& stream)
 ////////////////////////////////////////////////////////////////////////
 // Polar <-> Cart
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace mathfunc
     {
@@ -3054,7 +3054,7 @@ namespace
 {
     void cartToPolar_caller(const GpuMat& x, const GpuMat& y, GpuMat* mag, bool magSqr, GpuMat* angle, bool angleInDegrees, cudaStream_t stream)
     {
-        using namespace ::cv::gpu::cudev::mathfunc;
+        using namespace ::cv::cuda::cudev::mathfunc;
 
         CV_Assert(x.size() == y.size() && x.type() == y.type());
         CV_Assert(x.depth() == CV_32F);
@@ -3069,7 +3069,7 @@ namespace
 
     void polarToCart_caller(const GpuMat& mag, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees, cudaStream_t stream)
     {
-        using namespace ::cv::gpu::cudev::mathfunc;
+        using namespace ::cv::cuda::cudev::mathfunc;
 
         CV_Assert((mag.empty() || mag.size() == angle.size()) && mag.type() == angle.type());
         CV_Assert(mag.depth() == CV_32F);
@@ -3083,7 +3083,7 @@ namespace
     }
 }
 
-void cv::gpu::magnitude(InputArray _x, InputArray _y, OutputArray _dst, Stream& stream)
+void cv::cuda::magnitude(InputArray _x, InputArray _y, OutputArray _dst, Stream& stream)
 {
     GpuMat x = _x.getGpuMat();
     GpuMat y = _y.getGpuMat();
@@ -3094,7 +3094,7 @@ void cv::gpu::magnitude(InputArray _x, InputArray _y, OutputArray _dst, Stream&
     cartToPolar_caller(x, y, &dst, false, 0, false, StreamAccessor::getStream(stream));
 }
 
-void cv::gpu::magnitudeSqr(InputArray _x, InputArray _y, OutputArray _dst, Stream& stream)
+void cv::cuda::magnitudeSqr(InputArray _x, InputArray _y, OutputArray _dst, Stream& stream)
 {
     GpuMat x = _x.getGpuMat();
     GpuMat y = _y.getGpuMat();
@@ -3105,7 +3105,7 @@ void cv::gpu::magnitudeSqr(InputArray _x, InputArray _y, OutputArray _dst, Strea
     cartToPolar_caller(x, y, &dst, true, 0, false, StreamAccessor::getStream(stream));
 }
 
-void cv::gpu::phase(InputArray _x, InputArray _y, OutputArray _dst, bool angleInDegrees, Stream& stream)
+void cv::cuda::phase(InputArray _x, InputArray _y, OutputArray _dst, bool angleInDegrees, Stream& stream)
 {
     GpuMat x = _x.getGpuMat();
     GpuMat y = _y.getGpuMat();
@@ -3116,7 +3116,7 @@ void cv::gpu::phase(InputArray _x, InputArray _y, OutputArray _dst, bool angleIn
     cartToPolar_caller(x, y, 0, false, &dst, angleInDegrees, StreamAccessor::getStream(stream));
 }
 
-void cv::gpu::cartToPolar(InputArray _x, InputArray _y, OutputArray _mag, OutputArray _angle, bool angleInDegrees, Stream& stream)
+void cv::cuda::cartToPolar(InputArray _x, InputArray _y, OutputArray _mag, OutputArray _angle, bool angleInDegrees, Stream& stream)
 {
     GpuMat x = _x.getGpuMat();
     GpuMat y = _y.getGpuMat();
@@ -3130,7 +3130,7 @@ void cv::gpu::cartToPolar(InputArray _x, InputArray _y, OutputArray _mag, Output
     cartToPolar_caller(x, y, &mag, false, &angle, angleInDegrees, StreamAccessor::getStream(stream));
 }
 
-void cv::gpu::polarToCart(InputArray _mag, InputArray _angle, OutputArray _x, OutputArray _y, bool angleInDegrees, Stream& stream)
+void cv::cuda::polarToCart(InputArray _mag, InputArray _angle, OutputArray _x, OutputArray _y, bool angleInDegrees, Stream& stream)
 {
     GpuMat mag = _mag.getGpuMat();
     GpuMat angle = _angle.getGpuMat();
diff --git a/modules/gpuarithm/src/reductions.cpp b/modules/gpuarithm/src/reductions.cpp
index 248fa9a4e7..e6eb85d073 100644
--- a/modules/gpuarithm/src/reductions.cpp
+++ b/modules/gpuarithm/src/reductions.cpp
@@ -43,32 +43,32 @@
 #include "precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
 
-double cv::gpu::norm(InputArray, int, InputArray, GpuMat&) { throw_no_cuda(); return 0.0; }
-double cv::gpu::norm(InputArray, InputArray, GpuMat&, int) { throw_no_cuda(); return 0.0; }
+double cv::cuda::norm(InputArray, int, InputArray, GpuMat&) { throw_no_cuda(); return 0.0; }
+double cv::cuda::norm(InputArray, InputArray, GpuMat&, int) { throw_no_cuda(); return 0.0; }
 
-Scalar cv::gpu::sum(InputArray, InputArray, GpuMat&) { throw_no_cuda(); return Scalar(); }
-Scalar cv::gpu::absSum(InputArray, InputArray, GpuMat&) { throw_no_cuda(); return Scalar(); }
-Scalar cv::gpu::sqrSum(InputArray, InputArray, GpuMat&) { throw_no_cuda(); return Scalar(); }
+Scalar cv::cuda::sum(InputArray, InputArray, GpuMat&) { throw_no_cuda(); return Scalar(); }
+Scalar cv::cuda::absSum(InputArray, InputArray, GpuMat&) { throw_no_cuda(); return Scalar(); }
+Scalar cv::cuda::sqrSum(InputArray, InputArray, GpuMat&) { throw_no_cuda(); return Scalar(); }
 
-void cv::gpu::minMax(InputArray, double*, double*, InputArray, GpuMat&) { throw_no_cuda(); }
-void cv::gpu::minMaxLoc(InputArray, double*, double*, Point*, Point*, InputArray, GpuMat&, GpuMat&) { throw_no_cuda(); }
+void cv::cuda::minMax(InputArray, double*, double*, InputArray, GpuMat&) { throw_no_cuda(); }
+void cv::cuda::minMaxLoc(InputArray, double*, double*, Point*, Point*, InputArray, GpuMat&, GpuMat&) { throw_no_cuda(); }
 
-int cv::gpu::countNonZero(InputArray, GpuMat&) { throw_no_cuda(); return 0; }
+int cv::cuda::countNonZero(InputArray, GpuMat&) { throw_no_cuda(); return 0; }
 
-void cv::gpu::reduce(InputArray, OutputArray, int, int, int, Stream&) { throw_no_cuda(); }
+void cv::cuda::reduce(InputArray, OutputArray, int, int, int, Stream&) { throw_no_cuda(); }
 
-void cv::gpu::meanStdDev(InputArray, Scalar&, Scalar&, GpuMat&) { throw_no_cuda(); }
+void cv::cuda::meanStdDev(InputArray, Scalar&, Scalar&, GpuMat&) { throw_no_cuda(); }
 
-void cv::gpu::rectStdDev(InputArray, InputArray, OutputArray, Rect, Stream&) { throw_no_cuda(); }
+void cv::cuda::rectStdDev(InputArray, InputArray, OutputArray, Rect, Stream&) { throw_no_cuda(); }
 
-void cv::gpu::normalize(InputArray, OutputArray, double, double, int, int, InputArray, GpuMat&, GpuMat&) { throw_no_cuda(); }
+void cv::cuda::normalize(InputArray, OutputArray, double, double, int, int, InputArray, GpuMat&, GpuMat&) { throw_no_cuda(); }
 
-void cv::gpu::integral(InputArray, OutputArray, GpuMat&, Stream&) { throw_no_cuda(); }
-void cv::gpu::sqrIntegral(InputArray, OutputArray, GpuMat&, Stream&) { throw_no_cuda(); }
+void cv::cuda::integral(InputArray, OutputArray, GpuMat&, Stream&) { throw_no_cuda(); }
+void cv::cuda::sqrIntegral(InputArray, OutputArray, GpuMat&, Stream&) { throw_no_cuda(); }
 
 #else
 
@@ -111,7 +111,7 @@ namespace
 ////////////////////////////////////////////////////////////////////////
 // norm
 
-double cv::gpu::norm(InputArray _src, int normType, InputArray _mask, GpuMat& buf)
+double cv::cuda::norm(InputArray _src, int normType, InputArray _mask, GpuMat& buf)
 {
     GpuMat src = _src.getGpuMat();
     GpuMat mask = _mask.getGpuMat();
@@ -122,18 +122,18 @@ double cv::gpu::norm(InputArray _src, int normType, InputArray _mask, GpuMat& bu
     GpuMat src_single_channel = src.reshape(1);
 
     if (normType == NORM_L1)
-        return gpu::absSum(src_single_channel, mask, buf)[0];
+        return cuda::absSum(src_single_channel, mask, buf)[0];
 
     if (normType == NORM_L2)
-        return std::sqrt(gpu::sqrSum(src_single_channel, mask, buf)[0]);
+        return std::sqrt(cuda::sqrSum(src_single_channel, mask, buf)[0]);
 
     // NORM_INF
     double min_val, max_val;
-    gpu::minMax(src_single_channel, &min_val, &max_val, mask, buf);
+    cuda::minMax(src_single_channel, &min_val, &max_val, mask, buf);
     return std::max(std::abs(min_val), std::abs(max_val));
 }
 
-double cv::gpu::norm(InputArray _src1, InputArray _src2, GpuMat& buf, int normType)
+double cv::cuda::norm(InputArray _src1, InputArray _src2, GpuMat& buf, int normType)
 {
 #if CUDA_VERSION < 5050
     (void) buf;
@@ -203,7 +203,7 @@ namespace sum
     void runSqr(PtrStepSzb src, void* buf, double* sum, PtrStepSzb mask);
 }
 
-Scalar cv::gpu::sum(InputArray _src, InputArray _mask, GpuMat& buf)
+Scalar cv::cuda::sum(InputArray _src, InputArray _mask, GpuMat& buf)
 {
     GpuMat src = _src.getGpuMat();
     GpuMat mask = _mask.getGpuMat();
@@ -241,7 +241,7 @@ Scalar cv::gpu::sum(InputArray _src, InputArray _mask, GpuMat& buf)
     return Scalar(result[0], result[1], result[2], result[3]);
 }
 
-Scalar cv::gpu::absSum(InputArray _src, InputArray _mask, GpuMat& buf)
+Scalar cv::cuda::absSum(InputArray _src, InputArray _mask, GpuMat& buf)
 {
     GpuMat src = _src.getGpuMat();
     GpuMat mask = _mask.getGpuMat();
@@ -279,7 +279,7 @@ Scalar cv::gpu::absSum(InputArray _src, InputArray _mask, GpuMat& buf)
     return Scalar(result[0], result[1], result[2], result[3]);
 }
 
-Scalar cv::gpu::sqrSum(InputArray _src, InputArray _mask, GpuMat& buf)
+Scalar cv::cuda::sqrSum(InputArray _src, InputArray _mask, GpuMat& buf)
 {
     GpuMat src = _src.getGpuMat();
     GpuMat mask = _mask.getGpuMat();
@@ -328,7 +328,7 @@ namespace minMax
     void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf);
 }
 
-void cv::gpu::minMax(InputArray _src, double* minVal, double* maxVal, InputArray _mask, GpuMat& buf)
+void cv::cuda::minMax(InputArray _src, double* minVal, double* maxVal, InputArray _mask, GpuMat& buf)
 {
     GpuMat src = _src.getGpuMat();
     GpuMat mask = _mask.getGpuMat();
@@ -375,7 +375,7 @@ namespace minMaxLoc
     void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf);
 }
 
-void cv::gpu::minMaxLoc(InputArray _src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc,
+void cv::cuda::minMaxLoc(InputArray _src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc,
                         InputArray _mask, GpuMat& valBuf, GpuMat& locBuf)
 {
     GpuMat src = _src.getGpuMat();
@@ -425,7 +425,7 @@ namespace countNonZero
     int run(const PtrStepSzb src, PtrStep<unsigned int> buf);
 }
 
-int cv::gpu::countNonZero(InputArray _src, GpuMat& buf)
+int cv::cuda::countNonZero(InputArray _src, GpuMat& buf)
 {
     GpuMat src = _src.getGpuMat();
 
@@ -470,7 +470,7 @@ namespace reduce
     void cols(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
 }
 
-void cv::gpu::reduce(InputArray _src, OutputArray _dst, int dim, int reduceOp, int dtype, Stream& stream)
+void cv::cuda::reduce(InputArray _src, OutputArray _dst, int dim, int reduceOp, int dtype, Stream& stream)
 {
     GpuMat src = _src.getGpuMat();
 
@@ -643,7 +643,7 @@ void cv::gpu::reduce(InputArray _src, OutputArray _dst, int dim, int reduceOp, i
 ////////////////////////////////////////////////////////////////////////
 // meanStdDev
 
-void cv::gpu::meanStdDev(InputArray _src, Scalar& mean, Scalar& stddev, GpuMat& buf)
+void cv::cuda::meanStdDev(InputArray _src, Scalar& mean, Scalar& stddev, GpuMat& buf)
 {
     GpuMat src = _src.getGpuMat();
 
@@ -678,7 +678,7 @@ void cv::gpu::meanStdDev(InputArray _src, Scalar& mean, Scalar& stddev, GpuMat&
 //////////////////////////////////////////////////////////////////////////////
 // rectStdDev
 
-void cv::gpu::rectStdDev(InputArray _src, InputArray _sqr, OutputArray _dst, Rect rect, Stream& _stream)
+void cv::cuda::rectStdDev(InputArray _src, InputArray _sqr, OutputArray _dst, Rect rect, Stream& _stream)
 {
     GpuMat src = _src.getGpuMat();
     GpuMat sqr = _sqr.getGpuMat();
@@ -712,7 +712,7 @@ void cv::gpu::rectStdDev(InputArray _src, InputArray _sqr, OutputArray _dst, Rec
 ////////////////////////////////////////////////////////////////////////
 // normalize
 
-void cv::gpu::normalize(InputArray _src, OutputArray dst, double a, double b, int norm_type, int dtype, InputArray mask, GpuMat& norm_buf, GpuMat& cvt_buf)
+void cv::cuda::normalize(InputArray _src, OutputArray dst, double a, double b, int norm_type, int dtype, InputArray mask, GpuMat& norm_buf, GpuMat& cvt_buf)
 {
     GpuMat src = _src.getGpuMat();
 
@@ -722,13 +722,13 @@ void cv::gpu::normalize(InputArray _src, OutputArray dst, double a, double b, in
     {
         double smin = 0, smax = 0;
         double dmin = std::min(a, b), dmax = std::max(a, b);
-        gpu::minMax(src, &smin, &smax, mask, norm_buf);
+        cuda::minMax(src, &smin, &smax, mask, norm_buf);
         scale = (dmax - dmin) * (smax - smin > std::numeric_limits<double>::epsilon() ? 1.0 / (smax - smin) : 0.0);
         shift = dmin - smin * scale;
     }
     else if (norm_type == NORM_L2 || norm_type == NORM_L1 || norm_type == NORM_INF)
     {
-        scale = gpu::norm(src, norm_type, mask, norm_buf);
+        scale = cuda::norm(src, norm_type, mask, norm_buf);
         scale = scale > std::numeric_limits<double>::epsilon() ? a / scale : 0.0;
         shift = 0;
     }
@@ -751,7 +751,7 @@ void cv::gpu::normalize(InputArray _src, OutputArray dst, double a, double b, in
 ////////////////////////////////////////////////////////////////////////
 // integral
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace imgproc
     {
@@ -759,7 +759,7 @@ namespace cv { namespace gpu { namespace cudev
     }
 }}}
 
-void cv::gpu::integral(InputArray _src, OutputArray _dst, GpuMat& buffer, Stream& _stream)
+void cv::cuda::integral(InputArray _src, OutputArray _dst, GpuMat& buffer, Stream& _stream)
 {
     GpuMat src = _src.getGpuMat();
 
@@ -776,7 +776,7 @@ void cv::gpu::integral(InputArray _src, OutputArray _dst, GpuMat& buffer, Stream
     {
         ensureSizeIsEnough(((src.rows + 7) / 8) * 8, ((src.cols + 63) / 64) * 64, CV_32SC1, buffer);
 
-        cv::gpu::cudev::imgproc::shfl_integral_gpu(src, buffer, stream);
+        cv::cuda::cudev::imgproc::shfl_integral_gpu(src, buffer, stream);
 
         _dst.create(src.rows + 1, src.cols + 1, CV_32SC1);
         GpuMat dst = _dst.getGpuMat();
@@ -801,7 +801,7 @@ void cv::gpu::integral(InputArray _src, OutputArray _dst, GpuMat& buffer, Stream
         roiSize.height = src.rows;
 
         cudaDeviceProp prop;
-        cudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) );
+        cudaSafeCall( cudaGetDeviceProperties(&prop, cv::cuda::getDevice()) );
 
         Ncv32u bufSize;
         ncvSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) );
@@ -821,7 +821,7 @@ void cv::gpu::integral(InputArray _src, OutputArray _dst, GpuMat& buffer, Stream
 //////////////////////////////////////////////////////////////////////////////
 // sqrIntegral
 
-void cv::gpu::sqrIntegral(InputArray _src, OutputArray _dst, GpuMat& buf, Stream& _stream)
+void cv::cuda::sqrIntegral(InputArray _src, OutputArray _dst, GpuMat& buf, Stream& _stream)
 {
 #ifndef HAVE_OPENCV_GPULEGACY
     (void) _src;
@@ -838,7 +838,7 @@ void cv::gpu::sqrIntegral(InputArray _src, OutputArray _dst, GpuMat& buf, Stream
     roiSize.height = src.rows;
 
     cudaDeviceProp prop;
-    cudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) );
+    cudaSafeCall( cudaGetDeviceProperties(&prop, cv::cuda::getDevice()) );
 
     Ncv32u bufSize;
     ncvSafeCall(nppiStSqrIntegralGetSize_8u64u(roiSize, &bufSize, prop));
diff --git a/modules/gpuarithm/test/test_arithm.cpp b/modules/gpuarithm/test/test_arithm.cpp
index 0534e219d8..38611772e6 100644
--- a/modules/gpuarithm/test/test_arithm.cpp
+++ b/modules/gpuarithm/test/test_arithm.cpp
@@ -54,9 +54,9 @@ using namespace cvtest;
 CV_FLAGS(GemmFlags, 0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_3_T);
 #define ALL_GEMM_FLAGS testing::Values(GemmFlags(0), GemmFlags(cv::GEMM_1_T), GemmFlags(cv::GEMM_2_T), GemmFlags(cv::GEMM_3_T), GemmFlags(cv::GEMM_1_T | cv::GEMM_2_T), GemmFlags(cv::GEMM_1_T | cv::GEMM_3_T), GemmFlags(cv::GEMM_1_T | cv::GEMM_2_T | cv::GEMM_3_T))
 
-PARAM_TEST_CASE(GEMM, cv::gpu::DeviceInfo, cv::Size, MatType, GemmFlags, UseRoi)
+PARAM_TEST_CASE(GEMM, cv::cuda::DeviceInfo, cv::Size, MatType, GemmFlags, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int type;
     int flags;
@@ -70,7 +70,7 @@ PARAM_TEST_CASE(GEMM, cv::gpu::DeviceInfo, cv::Size, MatType, GemmFlags, UseRoi)
         flags = GET_PARAM(3);
         useRoi = GET_PARAM(4);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -82,12 +82,12 @@ GPU_TEST_P(GEMM, Accuracy)
     double alpha = randomDouble(-10.0, 10.0);
     double beta = randomDouble(-10.0, 10.0);
 
-    if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
         {
-            cv::gpu::GpuMat dst;
-            cv::gpu::gemm(loadMat(src1), loadMat(src2), alpha, loadMat(src3), beta, dst, flags);
+            cv::cuda::GpuMat dst;
+            cv::cuda::gemm(loadMat(src1), loadMat(src2), alpha, loadMat(src3), beta, dst, flags);
         }
         catch (const cv::Exception& e)
         {
@@ -98,8 +98,8 @@ GPU_TEST_P(GEMM, Accuracy)
     {
         try
         {
-            cv::gpu::GpuMat dst;
-            cv::gpu::gemm(loadMat(src1), loadMat(src2), alpha, loadMat(src3), beta, dst, flags);
+            cv::cuda::GpuMat dst;
+            cv::cuda::gemm(loadMat(src1), loadMat(src2), alpha, loadMat(src3), beta, dst, flags);
         }
         catch (const cv::Exception& e)
         {
@@ -108,8 +108,8 @@ GPU_TEST_P(GEMM, Accuracy)
     }
     else
     {
-        cv::gpu::GpuMat dst = createMat(size, type, useRoi);
-        cv::gpu::gemm(loadMat(src1, useRoi), loadMat(src2, useRoi), alpha, loadMat(src3, useRoi), beta, dst, flags);
+        cv::cuda::GpuMat dst = createMat(size, type, useRoi);
+        cv::cuda::gemm(loadMat(src1, useRoi), loadMat(src2, useRoi), alpha, loadMat(src3, useRoi), beta, dst, flags);
 
         cv::Mat dst_gold;
         cv::gemm(src1, src2, alpha, src3, beta, dst_gold, flags);
@@ -128,9 +128,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, GEMM, testing::Combine(
 ///////////////////////////////////////////////////////////////////////////////////////////////////////
 // Integral
 
-PARAM_TEST_CASE(Integral, cv::gpu::DeviceInfo, cv::Size, UseRoi)
+PARAM_TEST_CASE(Integral, cv::cuda::DeviceInfo, cv::Size, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     bool useRoi;
 
@@ -140,7 +140,7 @@ PARAM_TEST_CASE(Integral, cv::gpu::DeviceInfo, cv::Size, UseRoi)
         size = GET_PARAM(1);
         useRoi = GET_PARAM(2);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -148,8 +148,8 @@ GPU_TEST_P(Integral, Accuracy)
 {
     cv::Mat src = randomMat(size, CV_8UC1);
 
-    cv::gpu::GpuMat dst = createMat(cv::Size(src.cols + 1, src.rows + 1), CV_32SC1, useRoi);
-    cv::gpu::integral(loadMat(src, useRoi), dst);
+    cv::cuda::GpuMat dst = createMat(cv::Size(src.cols + 1, src.rows + 1), CV_32SC1, useRoi);
+    cv::cuda::integral(loadMat(src, useRoi), dst);
 
     cv::Mat dst_gold;
     cv::integral(src, dst_gold, CV_32S);
@@ -167,9 +167,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, Integral, testing::Combine(
 
 CV_FLAGS(DftFlags, 0, cv::DFT_INVERSE, cv::DFT_SCALE, cv::DFT_ROWS, cv::DFT_COMPLEX_OUTPUT, cv::DFT_REAL_OUTPUT)
 
-PARAM_TEST_CASE(MulSpectrums, cv::gpu::DeviceInfo, cv::Size, DftFlags)
+PARAM_TEST_CASE(MulSpectrums, cv::cuda::DeviceInfo, cv::Size, DftFlags)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int flag;
 
@@ -181,7 +181,7 @@ PARAM_TEST_CASE(MulSpectrums, cv::gpu::DeviceInfo, cv::Size, DftFlags)
         size = GET_PARAM(1);
         flag = GET_PARAM(2);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
 
         a = randomMat(size, CV_32FC2);
         b = randomMat(size, CV_32FC2);
@@ -190,8 +190,8 @@ PARAM_TEST_CASE(MulSpectrums, cv::gpu::DeviceInfo, cv::Size, DftFlags)
 
 GPU_TEST_P(MulSpectrums, Simple)
 {
-    cv::gpu::GpuMat c;
-    cv::gpu::mulSpectrums(loadMat(a), loadMat(b), c, flag, false);
+    cv::cuda::GpuMat c;
+    cv::cuda::mulSpectrums(loadMat(a), loadMat(b), c, flag, false);
 
     cv::Mat c_gold;
     cv::mulSpectrums(a, b, c_gold, flag, false);
@@ -203,8 +203,8 @@ GPU_TEST_P(MulSpectrums, Scaled)
 {
     float scale = 1.f / size.area();
 
-    cv::gpu::GpuMat c;
-    cv::gpu::mulAndScaleSpectrums(loadMat(a), loadMat(b), c, flag, scale, false);
+    cv::cuda::GpuMat c;
+    cv::cuda::mulAndScaleSpectrums(loadMat(a), loadMat(b), c, flag, scale, false);
 
     cv::Mat c_gold;
     cv::mulSpectrums(a, b, c_gold, flag, false);
@@ -221,15 +221,15 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, MulSpectrums, testing::Combine(
 ////////////////////////////////////////////////////////////////////////////
 // Dft
 
-struct Dft : testing::TestWithParam<cv::gpu::DeviceInfo>
+struct Dft : testing::TestWithParam<cv::cuda::DeviceInfo>
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
 
     virtual void SetUp()
     {
         devInfo = GetParam();
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -244,14 +244,14 @@ namespace
         cv::Mat b_gold;
         cv::dft(a, b_gold, flags);
 
-        cv::gpu::GpuMat d_b;
-        cv::gpu::GpuMat d_b_data;
+        cv::cuda::GpuMat d_b;
+        cv::cuda::GpuMat d_b_data;
         if (inplace)
         {
             d_b_data.create(1, a.size().area(), CV_32FC2);
-            d_b = cv::gpu::GpuMat(a.rows, a.cols, CV_32FC2, d_b_data.ptr(), a.cols * d_b_data.elemSize());
+            d_b = cv::cuda::GpuMat(a.rows, a.cols, CV_32FC2, d_b_data.ptr(), a.cols * d_b_data.elemSize());
         }
-        cv::gpu::dft(loadMat(a), d_b, cv::Size(cols, rows), flags);
+        cv::cuda::dft(loadMat(a), d_b, cv::Size(cols, rows), flags);
 
         EXPECT_TRUE(!inplace || d_b.ptr() == d_b_data.ptr());
         ASSERT_EQ(CV_32F, d_b.depth());
@@ -293,26 +293,26 @@ namespace
 
         cv::Mat a = randomMat(cv::Size(cols, rows), CV_32FC1, 0.0, 10.0);
 
-        cv::gpu::GpuMat d_b, d_c;
-        cv::gpu::GpuMat d_b_data, d_c_data;
+        cv::cuda::GpuMat d_b, d_c;
+        cv::cuda::GpuMat d_b_data, d_c_data;
         if (inplace)
         {
             if (a.cols == 1)
             {
                 d_b_data.create(1, (a.rows / 2 + 1) * a.cols, CV_32FC2);
-                d_b = cv::gpu::GpuMat(a.rows / 2 + 1, a.cols, CV_32FC2, d_b_data.ptr(), a.cols * d_b_data.elemSize());
+                d_b = cv::cuda::GpuMat(a.rows / 2 + 1, a.cols, CV_32FC2, d_b_data.ptr(), a.cols * d_b_data.elemSize());
             }
             else
             {
                 d_b_data.create(1, a.rows * (a.cols / 2 + 1), CV_32FC2);
-                d_b = cv::gpu::GpuMat(a.rows, a.cols / 2 + 1, CV_32FC2, d_b_data.ptr(), (a.cols / 2 + 1) * d_b_data.elemSize());
+                d_b = cv::cuda::GpuMat(a.rows, a.cols / 2 + 1, CV_32FC2, d_b_data.ptr(), (a.cols / 2 + 1) * d_b_data.elemSize());
             }
             d_c_data.create(1, a.size().area(), CV_32F);
-            d_c = cv::gpu::GpuMat(a.rows, a.cols, CV_32F, d_c_data.ptr(), a.cols * d_c_data.elemSize());
+            d_c = cv::cuda::GpuMat(a.rows, a.cols, CV_32F, d_c_data.ptr(), a.cols * d_c_data.elemSize());
         }
 
-        cv::gpu::dft(loadMat(a), d_b, cv::Size(cols, rows), 0);
-        cv::gpu::dft(d_b, d_c, cv::Size(cols, rows), cv::DFT_REAL_OUTPUT | cv::DFT_SCALE);
+        cv::cuda::dft(loadMat(a), d_b, cv::Size(cols, rows), 0);
+        cv::cuda::dft(d_b, d_c, cv::Size(cols, rows), cv::DFT_REAL_OUTPUT | cv::DFT_SCALE);
 
         EXPECT_TRUE(!inplace || d_b.ptr() == d_b_data.ptr());
         EXPECT_TRUE(!inplace || d_c.ptr() == d_c_data.ptr());
@@ -396,9 +396,9 @@ namespace
     IMPLEMENT_PARAM_CLASS(Ccorr, bool)
 }
 
-PARAM_TEST_CASE(Convolve, cv::gpu::DeviceInfo, cv::Size, KSize, Ccorr)
+PARAM_TEST_CASE(Convolve, cv::cuda::DeviceInfo, cv::Size, KSize, Ccorr)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int ksize;
     bool ccorr;
@@ -410,7 +410,7 @@ PARAM_TEST_CASE(Convolve, cv::gpu::DeviceInfo, cv::Size, KSize, Ccorr)
         ksize = GET_PARAM(2);
         ccorr = GET_PARAM(3);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -419,9 +419,9 @@ GPU_TEST_P(Convolve, Accuracy)
     cv::Mat src = randomMat(size, CV_32FC1, 0.0, 100.0);
     cv::Mat kernel = randomMat(cv::Size(ksize, ksize), CV_32FC1, 0.0, 1.0);
 
-    cv::Ptr<cv::gpu::Convolution> conv = cv::gpu::createConvolution();
+    cv::Ptr<cv::cuda::Convolution> conv = cv::cuda::createConvolution();
 
-    cv::gpu::GpuMat dst;
+    cv::cuda::GpuMat dst;
     conv->convolve(loadMat(src), loadMat(kernel), dst, ccorr);
 
     cv::Mat dst_gold;
diff --git a/modules/gpuarithm/test/test_core.cpp b/modules/gpuarithm/test/test_core.cpp
index d465aa4634..063e4d968c 100644
--- a/modules/gpuarithm/test/test_core.cpp
+++ b/modules/gpuarithm/test/test_core.cpp
@@ -49,9 +49,9 @@ using namespace cvtest;
 ////////////////////////////////////////////////////////////////////////////////
 // Merge
 
-PARAM_TEST_CASE(Merge, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels, UseRoi)
+PARAM_TEST_CASE(Merge, cv::cuda::DeviceInfo, cv::Size, MatDepth, Channels, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int depth;
     int channels;
@@ -65,7 +65,7 @@ PARAM_TEST_CASE(Merge, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels, UseRoi
         channels = GET_PARAM(3);
         useRoi = GET_PARAM(4);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -76,16 +76,16 @@ GPU_TEST_P(Merge, Accuracy)
     for (int i = 0; i < channels; ++i)
         src.push_back(cv::Mat(size, depth, cv::Scalar::all(i)));
 
-    std::vector<cv::gpu::GpuMat> d_src;
+    std::vector<cv::cuda::GpuMat> d_src;
     for (int i = 0; i < channels; ++i)
         d_src.push_back(loadMat(src[i], useRoi));
 
-    if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if (depth == CV_64F && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
         {
-            cv::gpu::GpuMat dst;
-            cv::gpu::merge(d_src, dst);
+            cv::cuda::GpuMat dst;
+            cv::cuda::merge(d_src, dst);
         }
         catch (const cv::Exception& e)
         {
@@ -94,8 +94,8 @@ GPU_TEST_P(Merge, Accuracy)
     }
     else
     {
-        cv::gpu::GpuMat dst;
-        cv::gpu::merge(d_src, dst);
+        cv::cuda::GpuMat dst;
+        cv::cuda::merge(d_src, dst);
 
         cv::Mat dst_gold;
         cv::merge(src, dst_gold);
@@ -114,9 +114,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, Merge, testing::Combine(
 ////////////////////////////////////////////////////////////////////////////////
 // Split
 
-PARAM_TEST_CASE(Split, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels, UseRoi)
+PARAM_TEST_CASE(Split, cv::cuda::DeviceInfo, cv::Size, MatDepth, Channels, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int depth;
     int channels;
@@ -132,7 +132,7 @@ PARAM_TEST_CASE(Split, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels, UseRoi
         channels = GET_PARAM(3);
         useRoi = GET_PARAM(4);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
 
         type = CV_MAKE_TYPE(depth, channels);
     }
@@ -142,12 +142,12 @@ GPU_TEST_P(Split, Accuracy)
 {
     cv::Mat src = randomMat(size, type);
 
-    if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if (depth == CV_64F && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
         {
-            std::vector<cv::gpu::GpuMat> dst;
-            cv::gpu::split(loadMat(src), dst);
+            std::vector<cv::cuda::GpuMat> dst;
+            cv::cuda::split(loadMat(src), dst);
         }
         catch (const cv::Exception& e)
         {
@@ -156,8 +156,8 @@ GPU_TEST_P(Split, Accuracy)
     }
     else
     {
-        std::vector<cv::gpu::GpuMat> dst;
-        cv::gpu::split(loadMat(src, useRoi), dst);
+        std::vector<cv::cuda::GpuMat> dst;
+        cv::cuda::split(loadMat(src, useRoi), dst);
 
         std::vector<cv::Mat> dst_gold;
         cv::split(src, dst_gold);
@@ -181,9 +181,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, Split, testing::Combine(
 ////////////////////////////////////////////////////////////////////////////////
 // Transpose
 
-PARAM_TEST_CASE(Transpose, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi)
+PARAM_TEST_CASE(Transpose, cv::cuda::DeviceInfo, cv::Size, MatType, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int type;
     bool useRoi;
@@ -195,7 +195,7 @@ PARAM_TEST_CASE(Transpose, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi)
         type = GET_PARAM(2);
         useRoi = GET_PARAM(3);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -203,12 +203,12 @@ GPU_TEST_P(Transpose, Accuracy)
 {
     cv::Mat src = randomMat(size, type);
 
-    if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
         {
-            cv::gpu::GpuMat dst;
-            cv::gpu::transpose(loadMat(src), dst);
+            cv::cuda::GpuMat dst;
+            cv::cuda::transpose(loadMat(src), dst);
         }
         catch (const cv::Exception& e)
         {
@@ -217,8 +217,8 @@ GPU_TEST_P(Transpose, Accuracy)
     }
     else
     {
-        cv::gpu::GpuMat dst = createMat(cv::Size(size.height, size.width), type, useRoi);
-        cv::gpu::transpose(loadMat(src, useRoi), dst);
+        cv::cuda::GpuMat dst = createMat(cv::Size(size.height, size.width), type, useRoi);
+        cv::cuda::transpose(loadMat(src, useRoi), dst);
 
         cv::Mat dst_gold;
         cv::transpose(src, dst_gold);
@@ -246,9 +246,9 @@ enum {FLIP_BOTH = 0, FLIP_X = 1, FLIP_Y = -1};
 CV_ENUM(FlipCode, FLIP_BOTH, FLIP_X, FLIP_Y)
 #define ALL_FLIP_CODES testing::Values(FlipCode(FLIP_BOTH), FlipCode(FLIP_X), FlipCode(FLIP_Y))
 
-PARAM_TEST_CASE(Flip, cv::gpu::DeviceInfo, cv::Size, MatType, FlipCode, UseRoi)
+PARAM_TEST_CASE(Flip, cv::cuda::DeviceInfo, cv::Size, MatType, FlipCode, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int type;
     int flip_code;
@@ -262,7 +262,7 @@ PARAM_TEST_CASE(Flip, cv::gpu::DeviceInfo, cv::Size, MatType, FlipCode, UseRoi)
         flip_code = GET_PARAM(3);
         useRoi = GET_PARAM(4);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -270,8 +270,8 @@ GPU_TEST_P(Flip, Accuracy)
 {
     cv::Mat src = randomMat(size, type);
 
-    cv::gpu::GpuMat dst = createMat(size, type, useRoi);
-    cv::gpu::flip(loadMat(src, useRoi), dst, flip_code);
+    cv::cuda::GpuMat dst = createMat(size, type, useRoi);
+    cv::cuda::flip(loadMat(src, useRoi), dst, flip_code);
 
     cv::Mat dst_gold;
     cv::flip(src, dst_gold, flip_code);
@@ -300,9 +300,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, Flip, testing::Combine(
 ////////////////////////////////////////////////////////////////////////////////
 // LUT
 
-PARAM_TEST_CASE(LUT, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi)
+PARAM_TEST_CASE(LUT, cv::cuda::DeviceInfo, cv::Size, MatType, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int type;
     bool useRoi;
@@ -314,7 +314,7 @@ PARAM_TEST_CASE(LUT, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi)
         type = GET_PARAM(2);
         useRoi = GET_PARAM(3);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -323,9 +323,9 @@ GPU_TEST_P(LUT, OneChannel)
     cv::Mat src = randomMat(size, type);
     cv::Mat lut = randomMat(cv::Size(256, 1), CV_8UC1);
 
-    cv::Ptr<cv::gpu::LookUpTable> lutAlg = cv::gpu::createLookUpTable(lut);
+    cv::Ptr<cv::cuda::LookUpTable> lutAlg = cv::cuda::createLookUpTable(lut);
 
-    cv::gpu::GpuMat dst = createMat(size, CV_MAKE_TYPE(lut.depth(), src.channels()));
+    cv::cuda::GpuMat dst = createMat(size, CV_MAKE_TYPE(lut.depth(), src.channels()));
     lutAlg->transform(loadMat(src, useRoi), dst);
 
     cv::Mat dst_gold;
@@ -339,9 +339,9 @@ GPU_TEST_P(LUT, MultiChannel)
     cv::Mat src = randomMat(size, type);
     cv::Mat lut = randomMat(cv::Size(256, 1), CV_MAKE_TYPE(CV_8U, src.channels()));
 
-    cv::Ptr<cv::gpu::LookUpTable> lutAlg = cv::gpu::createLookUpTable(lut);
+    cv::Ptr<cv::cuda::LookUpTable> lutAlg = cv::cuda::createLookUpTable(lut);
 
-    cv::gpu::GpuMat dst = createMat(size, CV_MAKE_TYPE(lut.depth(), src.channels()), useRoi);
+    cv::cuda::GpuMat dst = createMat(size, CV_MAKE_TYPE(lut.depth(), src.channels()), useRoi);
     lutAlg->transform(loadMat(src, useRoi), dst);
 
     cv::Mat dst_gold;
@@ -364,9 +364,9 @@ namespace
     IMPLEMENT_PARAM_CLASS(Border, int)
 }
 
-PARAM_TEST_CASE(CopyMakeBorder, cv::gpu::DeviceInfo, cv::Size, MatType, Border, BorderType, UseRoi)
+PARAM_TEST_CASE(CopyMakeBorder, cv::cuda::DeviceInfo, cv::Size, MatType, Border, BorderType, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int type;
     int border;
@@ -382,7 +382,7 @@ PARAM_TEST_CASE(CopyMakeBorder, cv::gpu::DeviceInfo, cv::Size, MatType, Border,
         borderType = GET_PARAM(4);
         useRoi = GET_PARAM(5);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -391,8 +391,8 @@ GPU_TEST_P(CopyMakeBorder, Accuracy)
     cv::Mat src = randomMat(size, type);
     cv::Scalar val = randomScalar(0, 255);
 
-    cv::gpu::GpuMat dst = createMat(cv::Size(size.width + 2 * border, size.height + 2 * border), type, useRoi);
-    cv::gpu::copyMakeBorder(loadMat(src, useRoi), dst, border, border, border, border, borderType, val);
+    cv::cuda::GpuMat dst = createMat(cv::Size(size.width + 2 * border, size.height + 2 * border), type, useRoi);
+    cv::cuda::copyMakeBorder(loadMat(src, useRoi), dst, border, border, border, border, borderType, val);
 
     cv::Mat dst_gold;
     cv::copyMakeBorder(src, dst_gold, border, border, border, border, borderType, val);
diff --git a/modules/gpuarithm/test/test_element_operations.cpp b/modules/gpuarithm/test/test_element_operations.cpp
index 61ea454ead..cf3d72f889 100644
--- a/modules/gpuarithm/test/test_element_operations.cpp
+++ b/modules/gpuarithm/test/test_element_operations.cpp
@@ -49,9 +49,9 @@ using namespace cvtest;
 ////////////////////////////////////////////////////////////////////////////////
 // Add_Array
 
-PARAM_TEST_CASE(Add_Array, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, Channels, UseRoi)
+PARAM_TEST_CASE(Add_Array, cv::cuda::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, Channels, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     std::pair<MatDepth, MatDepth> depth;
     int channels;
@@ -68,7 +68,7 @@ PARAM_TEST_CASE(Add_Array, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, Ma
         channels = GET_PARAM(3);
         useRoi = GET_PARAM(4);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
 
         stype = CV_MAKE_TYPE(depth.first, channels);
         dtype = CV_MAKE_TYPE(depth.second, channels);
@@ -80,12 +80,12 @@ GPU_TEST_P(Add_Array, Accuracy)
     cv::Mat mat1 = randomMat(size, stype);
     cv::Mat mat2 = randomMat(size, stype);
 
-    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
         {
-            cv::gpu::GpuMat dst;
-            cv::gpu::add(loadMat(mat1), loadMat(mat2), dst, cv::gpu::GpuMat(), depth.second);
+            cv::cuda::GpuMat dst;
+            cv::cuda::add(loadMat(mat1), loadMat(mat2), dst, cv::cuda::GpuMat(), depth.second);
         }
         catch (const cv::Exception& e)
         {
@@ -94,9 +94,9 @@ GPU_TEST_P(Add_Array, Accuracy)
     }
     else
     {
-        cv::gpu::GpuMat dst = createMat(size, dtype, useRoi);
+        cv::cuda::GpuMat dst = createMat(size, dtype, useRoi);
         dst.setTo(cv::Scalar::all(0));
-        cv::gpu::add(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, cv::gpu::GpuMat(), depth.second);
+        cv::cuda::add(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, cv::cuda::GpuMat(), depth.second);
 
         cv::Mat dst_gold(size, dtype, cv::Scalar::all(0));
         cv::add(mat1, mat2, dst_gold, cv::noArray(), depth.second);
@@ -112,9 +112,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, Add_Array, testing::Combine(
     ALL_CHANNELS,
     WHOLE_SUBMAT));
 
-PARAM_TEST_CASE(Add_Array_Mask, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, UseRoi)
+PARAM_TEST_CASE(Add_Array_Mask, cv::cuda::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     std::pair<MatDepth, MatDepth> depth;
     bool useRoi;
@@ -129,7 +129,7 @@ PARAM_TEST_CASE(Add_Array_Mask, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDept
         depth = GET_PARAM(2);
         useRoi = GET_PARAM(3);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
 
         stype = CV_MAKE_TYPE(depth.first, 1);
         dtype = CV_MAKE_TYPE(depth.second, 1);
@@ -142,12 +142,12 @@ GPU_TEST_P(Add_Array_Mask, Accuracy)
     cv::Mat mat2 = randomMat(size, stype);
     cv::Mat mask = randomMat(size, CV_8UC1, 0, 2);
 
-    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
         {
-            cv::gpu::GpuMat dst;
-            cv::gpu::add(loadMat(mat1), loadMat(mat2), dst, cv::gpu::GpuMat(), depth.second);
+            cv::cuda::GpuMat dst;
+            cv::cuda::add(loadMat(mat1), loadMat(mat2), dst, cv::cuda::GpuMat(), depth.second);
         }
         catch (const cv::Exception& e)
         {
@@ -156,9 +156,9 @@ GPU_TEST_P(Add_Array_Mask, Accuracy)
     }
     else
     {
-        cv::gpu::GpuMat dst = createMat(size, dtype, useRoi);
+        cv::cuda::GpuMat dst = createMat(size, dtype, useRoi);
         dst.setTo(cv::Scalar::all(0));
-        cv::gpu::add(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, loadMat(mask, useRoi), depth.second);
+        cv::cuda::add(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, loadMat(mask, useRoi), depth.second);
 
         cv::Mat dst_gold(size, dtype, cv::Scalar::all(0));
         cv::add(mat1, mat2, dst_gold, mask, depth.second);
@@ -176,9 +176,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, Add_Array_Mask, testing::Combine(
 ////////////////////////////////////////////////////////////////////////////////
 // Add_Scalar
 
-PARAM_TEST_CASE(Add_Scalar, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, UseRoi)
+PARAM_TEST_CASE(Add_Scalar, cv::cuda::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     std::pair<MatDepth, MatDepth> depth;
     bool useRoi;
@@ -190,7 +190,7 @@ PARAM_TEST_CASE(Add_Scalar, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, M
         depth = GET_PARAM(2);
         useRoi = GET_PARAM(3);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -199,12 +199,12 @@ GPU_TEST_P(Add_Scalar, WithOutMask)
     cv::Mat mat = randomMat(size, depth.first);
     cv::Scalar val = randomScalar(0, 255);
 
-    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
         {
-            cv::gpu::GpuMat dst;
-            cv::gpu::add(loadMat(mat), val, dst, cv::gpu::GpuMat(), depth.second);
+            cv::cuda::GpuMat dst;
+            cv::cuda::add(loadMat(mat), val, dst, cv::cuda::GpuMat(), depth.second);
         }
         catch (const cv::Exception& e)
         {
@@ -213,9 +213,9 @@ GPU_TEST_P(Add_Scalar, WithOutMask)
     }
     else
     {
-        cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);
+        cv::cuda::GpuMat dst = createMat(size, depth.second, useRoi);
         dst.setTo(cv::Scalar::all(0));
-        cv::gpu::add(loadMat(mat, useRoi), val, dst, cv::gpu::GpuMat(), depth.second);
+        cv::cuda::add(loadMat(mat, useRoi), val, dst, cv::cuda::GpuMat(), depth.second);
 
         cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0));
         cv::add(mat, val, dst_gold, cv::noArray(), depth.second);
@@ -230,12 +230,12 @@ GPU_TEST_P(Add_Scalar, WithMask)
     cv::Scalar val = randomScalar(0, 255);
     cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0);
 
-    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
         {
-            cv::gpu::GpuMat dst;
-            cv::gpu::add(loadMat(mat), val, dst, cv::gpu::GpuMat(), depth.second);
+            cv::cuda::GpuMat dst;
+            cv::cuda::add(loadMat(mat), val, dst, cv::cuda::GpuMat(), depth.second);
         }
         catch (const cv::Exception& e)
         {
@@ -244,9 +244,9 @@ GPU_TEST_P(Add_Scalar, WithMask)
     }
     else
     {
-        cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);
+        cv::cuda::GpuMat dst = createMat(size, depth.second, useRoi);
         dst.setTo(cv::Scalar::all(0));
-        cv::gpu::add(loadMat(mat, useRoi), val, dst, loadMat(mask, useRoi), depth.second);
+        cv::cuda::add(loadMat(mat, useRoi), val, dst, loadMat(mask, useRoi), depth.second);
 
         cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0));
         cv::add(mat, val, dst_gold, mask, depth.second);
@@ -264,9 +264,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, Add_Scalar, testing::Combine(
 ////////////////////////////////////////////////////////////////////////////////
 // Add_Scalar_First
 
-PARAM_TEST_CASE(Add_Scalar_First, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, UseRoi)
+PARAM_TEST_CASE(Add_Scalar_First, cv::cuda::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     std::pair<MatDepth, MatDepth> depth;
     bool useRoi;
@@ -278,7 +278,7 @@ PARAM_TEST_CASE(Add_Scalar_First, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDe
         depth = GET_PARAM(2);
         useRoi = GET_PARAM(3);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -287,12 +287,12 @@ GPU_TEST_P(Add_Scalar_First, WithOutMask)
     cv::Mat mat = randomMat(size, depth.first);
     cv::Scalar val = randomScalar(0, 255);
 
-    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
         {
-            cv::gpu::GpuMat dst;
-            cv::gpu::add(val, loadMat(mat), dst, cv::gpu::GpuMat(), depth.second);
+            cv::cuda::GpuMat dst;
+            cv::cuda::add(val, loadMat(mat), dst, cv::cuda::GpuMat(), depth.second);
         }
         catch (const cv::Exception& e)
         {
@@ -301,9 +301,9 @@ GPU_TEST_P(Add_Scalar_First, WithOutMask)
     }
     else
     {
-        cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);
+        cv::cuda::GpuMat dst = createMat(size, depth.second, useRoi);
         dst.setTo(cv::Scalar::all(0));
-        cv::gpu::add(val, loadMat(mat, useRoi), dst, cv::gpu::GpuMat(), depth.second);
+        cv::cuda::add(val, loadMat(mat, useRoi), dst, cv::cuda::GpuMat(), depth.second);
 
         cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0));
         cv::add(val, mat, dst_gold, cv::noArray(), depth.second);
@@ -318,12 +318,12 @@ GPU_TEST_P(Add_Scalar_First, WithMask)
     cv::Scalar val = randomScalar(0, 255);
     cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0);
 
-    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
         {
-            cv::gpu::GpuMat dst;
-            cv::gpu::add(val, loadMat(mat), dst, cv::gpu::GpuMat(), depth.second);
+            cv::cuda::GpuMat dst;
+            cv::cuda::add(val, loadMat(mat), dst, cv::cuda::GpuMat(), depth.second);
         }
         catch (const cv::Exception& e)
         {
@@ -332,9 +332,9 @@ GPU_TEST_P(Add_Scalar_First, WithMask)
     }
     else
     {
-        cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);
+        cv::cuda::GpuMat dst = createMat(size, depth.second, useRoi);
         dst.setTo(cv::Scalar::all(0));
-        cv::gpu::add(val, loadMat(mat, useRoi), dst, loadMat(mask, useRoi), depth.second);
+        cv::cuda::add(val, loadMat(mat, useRoi), dst, loadMat(mask, useRoi), depth.second);
 
         cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0));
         cv::add(val, mat, dst_gold, mask, depth.second);
@@ -352,9 +352,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, Add_Scalar_First, testing::Combine(
 ////////////////////////////////////////////////////////////////////////////////
 // Subtract_Array
 
-PARAM_TEST_CASE(Subtract_Array, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, Channels, UseRoi)
+PARAM_TEST_CASE(Subtract_Array, cv::cuda::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, Channels, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     std::pair<MatDepth, MatDepth> depth;
     int channels;
@@ -371,7 +371,7 @@ PARAM_TEST_CASE(Subtract_Array, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDept
         channels = GET_PARAM(3);
         useRoi = GET_PARAM(4);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
 
         stype = CV_MAKE_TYPE(depth.first, channels);
         dtype = CV_MAKE_TYPE(depth.second, channels);
@@ -383,12 +383,12 @@ GPU_TEST_P(Subtract_Array, Accuracy)
     cv::Mat mat1 = randomMat(size, stype);
     cv::Mat mat2 = randomMat(size, stype);
 
-    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
         {
-            cv::gpu::GpuMat dst;
-            cv::gpu::subtract(loadMat(mat1), loadMat(mat2), dst, cv::gpu::GpuMat(), depth.second);
+            cv::cuda::GpuMat dst;
+            cv::cuda::subtract(loadMat(mat1), loadMat(mat2), dst, cv::cuda::GpuMat(), depth.second);
         }
         catch (const cv::Exception& e)
         {
@@ -397,9 +397,9 @@ GPU_TEST_P(Subtract_Array, Accuracy)
     }
     else
     {
-        cv::gpu::GpuMat dst = createMat(size, dtype, useRoi);
+        cv::cuda::GpuMat dst = createMat(size, dtype, useRoi);
         dst.setTo(cv::Scalar::all(0));
-        cv::gpu::subtract(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, cv::gpu::GpuMat(), depth.second);
+        cv::cuda::subtract(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, cv::cuda::GpuMat(), depth.second);
 
         cv::Mat dst_gold(size, dtype, cv::Scalar::all(0));
         cv::subtract(mat1, mat2, dst_gold, cv::noArray(), depth.second);
@@ -415,9 +415,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, Subtract_Array, testing::Combine(
     ALL_CHANNELS,
     WHOLE_SUBMAT));
 
-PARAM_TEST_CASE(Subtract_Array_Mask, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, UseRoi)
+PARAM_TEST_CASE(Subtract_Array_Mask, cv::cuda::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     std::pair<MatDepth, MatDepth> depth;
     bool useRoi;
@@ -432,7 +432,7 @@ PARAM_TEST_CASE(Subtract_Array_Mask, cv::gpu::DeviceInfo, cv::Size, std::pair<Ma
         depth = GET_PARAM(2);
         useRoi = GET_PARAM(3);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
 
         stype = CV_MAKE_TYPE(depth.first, 1);
         dtype = CV_MAKE_TYPE(depth.second, 1);
@@ -445,12 +445,12 @@ GPU_TEST_P(Subtract_Array_Mask, Accuracy)
     cv::Mat mat2 = randomMat(size, stype);
     cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0);
 
-    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
         {
-            cv::gpu::GpuMat dst;
-            cv::gpu::subtract(loadMat(mat1), loadMat(mat2), dst, cv::gpu::GpuMat(), depth.second);
+            cv::cuda::GpuMat dst;
+            cv::cuda::subtract(loadMat(mat1), loadMat(mat2), dst, cv::cuda::GpuMat(), depth.second);
         }
         catch (const cv::Exception& e)
         {
@@ -459,9 +459,9 @@ GPU_TEST_P(Subtract_Array_Mask, Accuracy)
     }
     else
     {
-        cv::gpu::GpuMat dst = createMat(size, dtype, useRoi);
+        cv::cuda::GpuMat dst = createMat(size, dtype, useRoi);
         dst.setTo(cv::Scalar::all(0));
-        cv::gpu::subtract(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, loadMat(mask, useRoi), depth.second);
+        cv::cuda::subtract(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, loadMat(mask, useRoi), depth.second);
 
         cv::Mat dst_gold(size, dtype, cv::Scalar::all(0));
         cv::subtract(mat1, mat2, dst_gold, mask, depth.second);
@@ -479,9 +479,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, Subtract_Array_Mask, testing::Combine(
 ////////////////////////////////////////////////////////////////////////////////
 // Subtract_Scalar
 
-PARAM_TEST_CASE(Subtract_Scalar, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, UseRoi)
+PARAM_TEST_CASE(Subtract_Scalar, cv::cuda::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     std::pair<MatDepth, MatDepth> depth;
     bool useRoi;
@@ -493,7 +493,7 @@ PARAM_TEST_CASE(Subtract_Scalar, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDep
         depth = GET_PARAM(2);
         useRoi = GET_PARAM(3);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -502,12 +502,12 @@ GPU_TEST_P(Subtract_Scalar, WithOutMask)
     cv::Mat mat = randomMat(size, depth.first);
     cv::Scalar val = randomScalar(0, 255);
 
-    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
         {
-            cv::gpu::GpuMat dst;
-            cv::gpu::subtract(loadMat(mat), val, dst, cv::gpu::GpuMat(), depth.second);
+            cv::cuda::GpuMat dst;
+            cv::cuda::subtract(loadMat(mat), val, dst, cv::cuda::GpuMat(), depth.second);
         }
         catch (const cv::Exception& e)
         {
@@ -516,9 +516,9 @@ GPU_TEST_P(Subtract_Scalar, WithOutMask)
     }
     else
     {
-        cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);
+        cv::cuda::GpuMat dst = createMat(size, depth.second, useRoi);
         dst.setTo(cv::Scalar::all(0));
-        cv::gpu::subtract(loadMat(mat, useRoi), val, dst, cv::gpu::GpuMat(), depth.second);
+        cv::cuda::subtract(loadMat(mat, useRoi), val, dst, cv::cuda::GpuMat(), depth.second);
 
         cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0));
         cv::subtract(mat, val, dst_gold, cv::noArray(), depth.second);
@@ -533,12 +533,12 @@ GPU_TEST_P(Subtract_Scalar, WithMask)
     cv::Scalar val = randomScalar(0, 255);
     cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0);
 
-    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
         {
-            cv::gpu::GpuMat dst;
-            cv::gpu::subtract(loadMat(mat), val, dst, cv::gpu::GpuMat(), depth.second);
+            cv::cuda::GpuMat dst;
+            cv::cuda::subtract(loadMat(mat), val, dst, cv::cuda::GpuMat(), depth.second);
         }
         catch (const cv::Exception& e)
         {
@@ -547,9 +547,9 @@ GPU_TEST_P(Subtract_Scalar, WithMask)
     }
     else
     {
-        cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);
+        cv::cuda::GpuMat dst = createMat(size, depth.second, useRoi);
         dst.setTo(cv::Scalar::all(0));
-        cv::gpu::subtract(loadMat(mat, useRoi), val, dst, loadMat(mask, useRoi), depth.second);
+        cv::cuda::subtract(loadMat(mat, useRoi), val, dst, loadMat(mask, useRoi), depth.second);
 
         cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0));
         cv::subtract(mat, val, dst_gold, mask, depth.second);
@@ -567,9 +567,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, Subtract_Scalar, testing::Combine(
 ////////////////////////////////////////////////////////////////////////////////
 // Subtract_Scalar_First
 
-PARAM_TEST_CASE(Subtract_Scalar_First, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, UseRoi)
+PARAM_TEST_CASE(Subtract_Scalar_First, cv::cuda::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     std::pair<MatDepth, MatDepth> depth;
     bool useRoi;
@@ -581,7 +581,7 @@ PARAM_TEST_CASE(Subtract_Scalar_First, cv::gpu::DeviceInfo, cv::Size, std::pair<
         depth = GET_PARAM(2);
         useRoi = GET_PARAM(3);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -590,12 +590,12 @@ GPU_TEST_P(Subtract_Scalar_First, WithOutMask)
     cv::Mat mat = randomMat(size, depth.first);
     cv::Scalar val = randomScalar(0, 255);
 
-    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
         {
-            cv::gpu::GpuMat dst;
-            cv::gpu::subtract(val, loadMat(mat), dst, cv::gpu::GpuMat(), depth.second);
+            cv::cuda::GpuMat dst;
+            cv::cuda::subtract(val, loadMat(mat), dst, cv::cuda::GpuMat(), depth.second);
         }
         catch (const cv::Exception& e)
         {
@@ -604,9 +604,9 @@ GPU_TEST_P(Subtract_Scalar_First, WithOutMask)
     }
     else
     {
-        cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);
+        cv::cuda::GpuMat dst = createMat(size, depth.second, useRoi);
         dst.setTo(cv::Scalar::all(0));
-        cv::gpu::subtract(val, loadMat(mat, useRoi), dst, cv::gpu::GpuMat(), depth.second);
+        cv::cuda::subtract(val, loadMat(mat, useRoi), dst, cv::cuda::GpuMat(), depth.second);
 
         cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0));
         cv::subtract(val, mat, dst_gold, cv::noArray(), depth.second);
@@ -621,12 +621,12 @@ GPU_TEST_P(Subtract_Scalar_First, WithMask)
     cv::Scalar val = randomScalar(0, 255);
     cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0);
 
-    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
         {
-            cv::gpu::GpuMat dst;
-            cv::gpu::subtract(val, loadMat(mat), dst, cv::gpu::GpuMat(), depth.second);
+            cv::cuda::GpuMat dst;
+            cv::cuda::subtract(val, loadMat(mat), dst, cv::cuda::GpuMat(), depth.second);
         }
         catch (const cv::Exception& e)
         {
@@ -635,9 +635,9 @@ GPU_TEST_P(Subtract_Scalar_First, WithMask)
     }
     else
     {
-        cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);
+        cv::cuda::GpuMat dst = createMat(size, depth.second, useRoi);
         dst.setTo(cv::Scalar::all(0));
-        cv::gpu::subtract(val, loadMat(mat, useRoi), dst, loadMat(mask, useRoi), depth.second);
+        cv::cuda::subtract(val, loadMat(mat, useRoi), dst, loadMat(mask, useRoi), depth.second);
 
         cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0));
         cv::subtract(val, mat, dst_gold, mask, depth.second);
@@ -655,9 +655,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, Subtract_Scalar_First, testing::Combine(
 ////////////////////////////////////////////////////////////////////////////////
 // Multiply_Array
 
-PARAM_TEST_CASE(Multiply_Array, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, Channels, UseRoi)
+PARAM_TEST_CASE(Multiply_Array, cv::cuda::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, Channels, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     std::pair<MatDepth, MatDepth> depth;
     int channels;
@@ -674,7 +674,7 @@ PARAM_TEST_CASE(Multiply_Array, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDept
         channels = GET_PARAM(3);
         useRoi = GET_PARAM(4);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
 
         stype = CV_MAKE_TYPE(depth.first, channels);
         dtype = CV_MAKE_TYPE(depth.second, channels);
@@ -686,12 +686,12 @@ GPU_TEST_P(Multiply_Array, WithOutScale)
     cv::Mat mat1 = randomMat(size, stype);
     cv::Mat mat2 = randomMat(size, stype);
 
-    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
         {
-            cv::gpu::GpuMat dst;
-            cv::gpu::multiply(loadMat(mat1), loadMat(mat2), dst, 1, depth.second);
+            cv::cuda::GpuMat dst;
+            cv::cuda::multiply(loadMat(mat1), loadMat(mat2), dst, 1, depth.second);
         }
         catch (const cv::Exception& e)
         {
@@ -700,8 +700,8 @@ GPU_TEST_P(Multiply_Array, WithOutScale)
     }
     else
     {
-        cv::gpu::GpuMat dst = createMat(size, dtype, useRoi);
-        cv::gpu::multiply(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, 1, depth.second);
+        cv::cuda::GpuMat dst = createMat(size, dtype, useRoi);
+        cv::cuda::multiply(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, 1, depth.second);
 
         cv::Mat dst_gold;
         cv::multiply(mat1, mat2, dst_gold, 1, depth.second);
@@ -716,12 +716,12 @@ GPU_TEST_P(Multiply_Array, WithScale)
     cv::Mat mat2 = randomMat(size, stype);
     double scale = randomDouble(0.0, 255.0);
 
-    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
         {
-            cv::gpu::GpuMat dst;
-            cv::gpu::multiply(loadMat(mat1), loadMat(mat2), dst, scale, depth.second);
+            cv::cuda::GpuMat dst;
+            cv::cuda::multiply(loadMat(mat1), loadMat(mat2), dst, scale, depth.second);
         }
         catch (const cv::Exception& e)
         {
@@ -730,8 +730,8 @@ GPU_TEST_P(Multiply_Array, WithScale)
     }
     else
     {
-        cv::gpu::GpuMat dst = createMat(size, dtype, useRoi);
-        cv::gpu::multiply(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, scale, depth.second);
+        cv::cuda::GpuMat dst = createMat(size, dtype, useRoi);
+        cv::cuda::multiply(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, scale, depth.second);
 
         cv::Mat dst_gold;
         cv::multiply(mat1, mat2, dst_gold, scale, depth.second);
@@ -750,9 +750,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, Multiply_Array, testing::Combine(
 ////////////////////////////////////////////////////////////////////////////////
 // Multiply_Array_Special
 
-PARAM_TEST_CASE(Multiply_Array_Special, cv::gpu::DeviceInfo, cv::Size, UseRoi)
+PARAM_TEST_CASE(Multiply_Array_Special, cv::cuda::DeviceInfo, cv::Size, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     bool useRoi;
 
@@ -762,7 +762,7 @@ PARAM_TEST_CASE(Multiply_Array_Special, cv::gpu::DeviceInfo, cv::Size, UseRoi)
         size = GET_PARAM(1);
         useRoi = GET_PARAM(2);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -771,8 +771,8 @@ GPU_TEST_P(Multiply_Array_Special, Case_8UC4x_32FC1)
     cv::Mat mat1 = randomMat(size, CV_8UC4);
     cv::Mat mat2 = randomMat(size, CV_32FC1);
 
-    cv::gpu::GpuMat dst = createMat(size, CV_8UC4, useRoi);
-    cv::gpu::multiply(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst);
+    cv::cuda::GpuMat dst = createMat(size, CV_8UC4, useRoi);
+    cv::cuda::multiply(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst);
 
     cv::Mat h_dst(dst);
 
@@ -808,8 +808,8 @@ GPU_TEST_P(Multiply_Array_Special, Case_16SC4x_32FC1)
     cv::Mat mat1 = randomMat(size, CV_16SC4);
     cv::Mat mat2 = randomMat(size, CV_32FC1);
 
-    cv::gpu::GpuMat dst = createMat(size, CV_16SC4, useRoi);
-    cv::gpu::multiply(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst);
+    cv::cuda::GpuMat dst = createMat(size, CV_16SC4, useRoi);
+    cv::cuda::multiply(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst);
 
     cv::Mat h_dst(dst);
 
@@ -848,9 +848,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, Multiply_Array_Special, testing::Combine(
 ////////////////////////////////////////////////////////////////////////////////
 // Multiply_Scalar
 
-PARAM_TEST_CASE(Multiply_Scalar, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, UseRoi)
+PARAM_TEST_CASE(Multiply_Scalar, cv::cuda::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     std::pair<MatDepth, MatDepth> depth;
     bool useRoi;
@@ -862,7 +862,7 @@ PARAM_TEST_CASE(Multiply_Scalar, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDep
         depth = GET_PARAM(2);
         useRoi = GET_PARAM(3);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -871,12 +871,12 @@ GPU_TEST_P(Multiply_Scalar, WithOutScale)
     cv::Mat mat = randomMat(size, depth.first);
     cv::Scalar val = randomScalar(0, 255);
 
-    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
         {
-            cv::gpu::GpuMat dst;
-            cv::gpu::multiply(loadMat(mat), val, dst, 1, depth.second);
+            cv::cuda::GpuMat dst;
+            cv::cuda::multiply(loadMat(mat), val, dst, 1, depth.second);
         }
         catch (const cv::Exception& e)
         {
@@ -885,8 +885,8 @@ GPU_TEST_P(Multiply_Scalar, WithOutScale)
     }
     else
     {
-        cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);
-        cv::gpu::multiply(loadMat(mat, useRoi), val, dst, 1, depth.second);
+        cv::cuda::GpuMat dst = createMat(size, depth.second, useRoi);
+        cv::cuda::multiply(loadMat(mat, useRoi), val, dst, 1, depth.second);
 
         cv::Mat dst_gold;
         cv::multiply(mat, val, dst_gold, 1, depth.second);
@@ -902,12 +902,12 @@ GPU_TEST_P(Multiply_Scalar, WithScale)
     cv::Scalar val = randomScalar(0, 255);
     double scale = randomDouble(0.0, 255.0);
 
-    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
         {
-            cv::gpu::GpuMat dst;
-            cv::gpu::multiply(loadMat(mat), val, dst, scale, depth.second);
+            cv::cuda::GpuMat dst;
+            cv::cuda::multiply(loadMat(mat), val, dst, scale, depth.second);
         }
         catch (const cv::Exception& e)
         {
@@ -916,8 +916,8 @@ GPU_TEST_P(Multiply_Scalar, WithScale)
     }
     else
     {
-        cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);
-        cv::gpu::multiply(loadMat(mat, useRoi), val, dst, scale, depth.second);
+        cv::cuda::GpuMat dst = createMat(size, depth.second, useRoi);
+        cv::cuda::multiply(loadMat(mat, useRoi), val, dst, scale, depth.second);
 
         cv::Mat dst_gold;
         cv::multiply(mat, val, dst_gold, scale, depth.second);
@@ -935,9 +935,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, Multiply_Scalar, testing::Combine(
 ////////////////////////////////////////////////////////////////////////////////
 // Multiply_Scalar_First
 
-PARAM_TEST_CASE(Multiply_Scalar_First, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, UseRoi)
+PARAM_TEST_CASE(Multiply_Scalar_First, cv::cuda::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     std::pair<MatDepth, MatDepth> depth;
     bool useRoi;
@@ -949,7 +949,7 @@ PARAM_TEST_CASE(Multiply_Scalar_First, cv::gpu::DeviceInfo, cv::Size, std::pair<
         depth = GET_PARAM(2);
         useRoi = GET_PARAM(3);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -958,12 +958,12 @@ GPU_TEST_P(Multiply_Scalar_First, WithOutScale)
     cv::Mat mat = randomMat(size, depth.first);
     cv::Scalar val = randomScalar(0, 255);
 
-    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
         {
-            cv::gpu::GpuMat dst;
-            cv::gpu::multiply(val, loadMat(mat), dst, 1, depth.second);
+            cv::cuda::GpuMat dst;
+            cv::cuda::multiply(val, loadMat(mat), dst, 1, depth.second);
         }
         catch (const cv::Exception& e)
         {
@@ -972,8 +972,8 @@ GPU_TEST_P(Multiply_Scalar_First, WithOutScale)
     }
     else
     {
-        cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);
-        cv::gpu::multiply(val, loadMat(mat, useRoi), dst, 1, depth.second);
+        cv::cuda::GpuMat dst = createMat(size, depth.second, useRoi);
+        cv::cuda::multiply(val, loadMat(mat, useRoi), dst, 1, depth.second);
 
         cv::Mat dst_gold;
         cv::multiply(val, mat, dst_gold, 1, depth.second);
@@ -989,12 +989,12 @@ GPU_TEST_P(Multiply_Scalar_First, WithScale)
     cv::Scalar val = randomScalar(0, 255);
     double scale = randomDouble(0.0, 255.0);
 
-    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
         {
-            cv::gpu::GpuMat dst;
-            cv::gpu::multiply(val, loadMat(mat), dst, scale, depth.second);
+            cv::cuda::GpuMat dst;
+            cv::cuda::multiply(val, loadMat(mat), dst, scale, depth.second);
         }
         catch (const cv::Exception& e)
         {
@@ -1003,8 +1003,8 @@ GPU_TEST_P(Multiply_Scalar_First, WithScale)
     }
     else
     {
-        cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);
-        cv::gpu::multiply(val, loadMat(mat, useRoi), dst, scale, depth.second);
+        cv::cuda::GpuMat dst = createMat(size, depth.second, useRoi);
+        cv::cuda::multiply(val, loadMat(mat, useRoi), dst, scale, depth.second);
 
         cv::Mat dst_gold;
         cv::multiply(val, mat, dst_gold, scale, depth.second);
@@ -1022,9 +1022,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, Multiply_Scalar_First, testing::Combine(
 ////////////////////////////////////////////////////////////////////////////////
 // Divide_Array
 
-PARAM_TEST_CASE(Divide_Array, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, Channels, UseRoi)
+PARAM_TEST_CASE(Divide_Array, cv::cuda::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, Channels, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     std::pair<MatDepth, MatDepth> depth;
     int channels;
@@ -1041,7 +1041,7 @@ PARAM_TEST_CASE(Divide_Array, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth,
         channels = GET_PARAM(3);
         useRoi = GET_PARAM(4);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
 
         stype = CV_MAKE_TYPE(depth.first, channels);
         dtype = CV_MAKE_TYPE(depth.second, channels);
@@ -1053,12 +1053,12 @@ GPU_TEST_P(Divide_Array, WithOutScale)
     cv::Mat mat1 = randomMat(size, stype);
     cv::Mat mat2 = randomMat(size, stype, 1.0, 255.0);
 
-    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
         {
-            cv::gpu::GpuMat dst;
-            cv::gpu::divide(loadMat(mat1), loadMat(mat2), dst, 1, depth.second);
+            cv::cuda::GpuMat dst;
+            cv::cuda::divide(loadMat(mat1), loadMat(mat2), dst, 1, depth.second);
         }
         catch (const cv::Exception& e)
         {
@@ -1067,8 +1067,8 @@ GPU_TEST_P(Divide_Array, WithOutScale)
     }
     else
     {
-        cv::gpu::GpuMat dst = createMat(size, dtype, useRoi);
-        cv::gpu::divide(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, 1, depth.second);
+        cv::cuda::GpuMat dst = createMat(size, dtype, useRoi);
+        cv::cuda::divide(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, 1, depth.second);
 
         cv::Mat dst_gold;
         cv::divide(mat1, mat2, dst_gold, 1, depth.second);
@@ -1083,12 +1083,12 @@ GPU_TEST_P(Divide_Array, WithScale)
     cv::Mat mat2 = randomMat(size, stype, 1.0, 255.0);
     double scale = randomDouble(0.0, 255.0);
 
-    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
         {
-            cv::gpu::GpuMat dst;
-            cv::gpu::divide(loadMat(mat1), loadMat(mat2), dst, scale, depth.second);
+            cv::cuda::GpuMat dst;
+            cv::cuda::divide(loadMat(mat1), loadMat(mat2), dst, scale, depth.second);
         }
         catch (const cv::Exception& e)
         {
@@ -1097,8 +1097,8 @@ GPU_TEST_P(Divide_Array, WithScale)
     }
     else
     {
-        cv::gpu::GpuMat dst = createMat(size, dtype, useRoi);
-        cv::gpu::divide(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, scale, depth.second);
+        cv::cuda::GpuMat dst = createMat(size, dtype, useRoi);
+        cv::cuda::divide(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, scale, depth.second);
 
         cv::Mat dst_gold;
         cv::divide(mat1, mat2, dst_gold, scale, depth.second);
@@ -1117,9 +1117,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, Divide_Array, testing::Combine(
 ////////////////////////////////////////////////////////////////////////////////
 // Divide_Array_Special
 
-PARAM_TEST_CASE(Divide_Array_Special, cv::gpu::DeviceInfo, cv::Size, UseRoi)
+PARAM_TEST_CASE(Divide_Array_Special, cv::cuda::DeviceInfo, cv::Size, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     bool useRoi;
 
@@ -1129,7 +1129,7 @@ PARAM_TEST_CASE(Divide_Array_Special, cv::gpu::DeviceInfo, cv::Size, UseRoi)
         size = GET_PARAM(1);
         useRoi = GET_PARAM(2);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -1138,8 +1138,8 @@ GPU_TEST_P(Divide_Array_Special, Case_8UC4x_32FC1)
     cv::Mat mat1 = randomMat(size, CV_8UC4);
     cv::Mat mat2 = randomMat(size, CV_32FC1, 1.0, 255.0);
 
-    cv::gpu::GpuMat dst = createMat(size, CV_8UC4, useRoi);
-    cv::gpu::divide(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst);
+    cv::cuda::GpuMat dst = createMat(size, CV_8UC4, useRoi);
+    cv::cuda::divide(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst);
 
     cv::Mat h_dst(dst);
 
@@ -1175,8 +1175,8 @@ GPU_TEST_P(Divide_Array_Special, Case_16SC4x_32FC1)
     cv::Mat mat1 = randomMat(size, CV_16SC4);
     cv::Mat mat2 = randomMat(size, CV_32FC1, 1.0, 255.0);
 
-    cv::gpu::GpuMat dst = createMat(size, CV_16SC4, useRoi);
-    cv::gpu::divide(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst);
+    cv::cuda::GpuMat dst = createMat(size, CV_16SC4, useRoi);
+    cv::cuda::divide(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst);
 
     cv::Mat h_dst(dst);
 
@@ -1215,9 +1215,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, Divide_Array_Special, testing::Combine(
 ////////////////////////////////////////////////////////////////////////////////
 // Divide_Scalar
 
-PARAM_TEST_CASE(Divide_Scalar, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, UseRoi)
+PARAM_TEST_CASE(Divide_Scalar, cv::cuda::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     std::pair<MatDepth, MatDepth> depth;
     bool useRoi;
@@ -1229,7 +1229,7 @@ PARAM_TEST_CASE(Divide_Scalar, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth
         depth = GET_PARAM(2);
         useRoi = GET_PARAM(3);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -1238,12 +1238,12 @@ GPU_TEST_P(Divide_Scalar, WithOutScale)
     cv::Mat mat = randomMat(size, depth.first);
     cv::Scalar val = randomScalar(1.0, 255.0);
 
-    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
         {
-            cv::gpu::GpuMat dst;
-            cv::gpu::divide(loadMat(mat), val, dst, 1, depth.second);
+            cv::cuda::GpuMat dst;
+            cv::cuda::divide(loadMat(mat), val, dst, 1, depth.second);
         }
         catch (const cv::Exception& e)
         {
@@ -1252,8 +1252,8 @@ GPU_TEST_P(Divide_Scalar, WithOutScale)
     }
     else
     {
-        cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);
-        cv::gpu::divide(loadMat(mat, useRoi), val, dst, 1, depth.second);
+        cv::cuda::GpuMat dst = createMat(size, depth.second, useRoi);
+        cv::cuda::divide(loadMat(mat, useRoi), val, dst, 1, depth.second);
 
         cv::Mat dst_gold;
         cv::divide(mat, val, dst_gold, 1, depth.second);
@@ -1268,12 +1268,12 @@ GPU_TEST_P(Divide_Scalar, WithScale)
     cv::Scalar val = randomScalar(1.0, 255.0);
     double scale = randomDouble(0.0, 255.0);
 
-    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
         {
-            cv::gpu::GpuMat dst;
-            cv::gpu::divide(loadMat(mat), val, dst, scale, depth.second);
+            cv::cuda::GpuMat dst;
+            cv::cuda::divide(loadMat(mat), val, dst, scale, depth.second);
         }
         catch (const cv::Exception& e)
         {
@@ -1282,8 +1282,8 @@ GPU_TEST_P(Divide_Scalar, WithScale)
     }
     else
     {
-        cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);
-        cv::gpu::divide(loadMat(mat, useRoi), val, dst, scale, depth.second);
+        cv::cuda::GpuMat dst = createMat(size, depth.second, useRoi);
+        cv::cuda::divide(loadMat(mat, useRoi), val, dst, scale, depth.second);
 
         cv::Mat dst_gold;
         cv::divide(mat, val, dst_gold, scale, depth.second);
@@ -1301,9 +1301,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, Divide_Scalar, testing::Combine(
 ////////////////////////////////////////////////////////////////////////////////
 // Divide_Scalar_First
 
-PARAM_TEST_CASE(Divide_Scalar_First, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, UseRoi)
+PARAM_TEST_CASE(Divide_Scalar_First, cv::cuda::DeviceInfo, cv::Size, std::pair<MatDepth, MatDepth>, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     std::pair<MatDepth, MatDepth> depth;
     bool useRoi;
@@ -1315,7 +1315,7 @@ PARAM_TEST_CASE(Divide_Scalar_First, cv::gpu::DeviceInfo, cv::Size, std::pair<Ma
         depth = GET_PARAM(2);
         useRoi = GET_PARAM(3);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -1324,12 +1324,12 @@ GPU_TEST_P(Divide_Scalar_First, Accuracy)
     double scale = randomDouble(0.0, 255.0);
     cv::Mat mat = randomMat(size, depth.first, 1.0, 255.0);
 
-    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
         {
-            cv::gpu::GpuMat dst;
-            cv::gpu::divide(scale, loadMat(mat), dst, depth.second);
+            cv::cuda::GpuMat dst;
+            cv::cuda::divide(scale, loadMat(mat), dst, depth.second);
         }
         catch (const cv::Exception& e)
         {
@@ -1338,8 +1338,8 @@ GPU_TEST_P(Divide_Scalar_First, Accuracy)
     }
     else
     {
-        cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);
-        cv::gpu::divide(scale, loadMat(mat, useRoi), dst, depth.second);
+        cv::cuda::GpuMat dst = createMat(size, depth.second, useRoi);
+        cv::cuda::divide(scale, loadMat(mat, useRoi), dst, depth.second);
 
         cv::Mat dst_gold;
         cv::divide(scale, mat, dst_gold, depth.second);
@@ -1357,9 +1357,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, Divide_Scalar_First, testing::Combine(
 ////////////////////////////////////////////////////////////////////////////////
 // AbsDiff
 
-PARAM_TEST_CASE(AbsDiff, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
+PARAM_TEST_CASE(AbsDiff, cv::cuda::DeviceInfo, cv::Size, MatDepth, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int depth;
     bool useRoi;
@@ -1371,7 +1371,7 @@ PARAM_TEST_CASE(AbsDiff, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
         depth = GET_PARAM(2);
         useRoi = GET_PARAM(3);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -1380,12 +1380,12 @@ GPU_TEST_P(AbsDiff, Array)
     cv::Mat src1 = randomMat(size, depth);
     cv::Mat src2 = randomMat(size, depth);
 
-    if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if (depth == CV_64F && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
         {
-            cv::gpu::GpuMat dst;
-            cv::gpu::absdiff(loadMat(src1), loadMat(src2), dst);
+            cv::cuda::GpuMat dst;
+            cv::cuda::absdiff(loadMat(src1), loadMat(src2), dst);
         }
         catch (const cv::Exception& e)
         {
@@ -1394,8 +1394,8 @@ GPU_TEST_P(AbsDiff, Array)
     }
     else
     {
-        cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
-        cv::gpu::absdiff(loadMat(src1, useRoi), loadMat(src2, useRoi), dst);
+        cv::cuda::GpuMat dst = createMat(size, depth, useRoi);
+        cv::cuda::absdiff(loadMat(src1, useRoi), loadMat(src2, useRoi), dst);
 
         cv::Mat dst_gold;
         cv::absdiff(src1, src2, dst_gold);
@@ -1409,12 +1409,12 @@ GPU_TEST_P(AbsDiff, Scalar)
     cv::Mat src = randomMat(size, depth);
     cv::Scalar val = randomScalar(0.0, 255.0);
 
-    if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if (depth == CV_64F && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
         {
-            cv::gpu::GpuMat dst;
-            cv::gpu::absdiff(loadMat(src), val, dst);
+            cv::cuda::GpuMat dst;
+            cv::cuda::absdiff(loadMat(src), val, dst);
         }
         catch (const cv::Exception& e)
         {
@@ -1423,8 +1423,8 @@ GPU_TEST_P(AbsDiff, Scalar)
     }
     else
     {
-        cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
-        cv::gpu::absdiff(loadMat(src, useRoi), val, dst);
+        cv::cuda::GpuMat dst = createMat(size, depth, useRoi);
+        cv::cuda::absdiff(loadMat(src, useRoi), val, dst);
 
         cv::Mat dst_gold;
         cv::absdiff(src, val, dst_gold);
@@ -1438,12 +1438,12 @@ GPU_TEST_P(AbsDiff, Scalar_First)
     cv::Mat src = randomMat(size, depth);
     cv::Scalar val = randomScalar(0.0, 255.0);
 
-    if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if (depth == CV_64F && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
         {
-            cv::gpu::GpuMat dst;
-            cv::gpu::absdiff(val, loadMat(src), dst);
+            cv::cuda::GpuMat dst;
+            cv::cuda::absdiff(val, loadMat(src), dst);
         }
         catch (const cv::Exception& e)
         {
@@ -1452,8 +1452,8 @@ GPU_TEST_P(AbsDiff, Scalar_First)
     }
     else
     {
-        cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
-        cv::gpu::absdiff(val, loadMat(src, useRoi), dst);
+        cv::cuda::GpuMat dst = createMat(size, depth, useRoi);
+        cv::cuda::absdiff(val, loadMat(src, useRoi), dst);
 
         cv::Mat dst_gold;
         cv::absdiff(val, src, dst_gold);
@@ -1471,9 +1471,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, AbsDiff, testing::Combine(
 ////////////////////////////////////////////////////////////////////////////////
 // Abs
 
-PARAM_TEST_CASE(Abs, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
+PARAM_TEST_CASE(Abs, cv::cuda::DeviceInfo, cv::Size, MatDepth, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int depth;
     bool useRoi;
@@ -1485,7 +1485,7 @@ PARAM_TEST_CASE(Abs, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
         depth = GET_PARAM(2);
         useRoi = GET_PARAM(3);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -1493,8 +1493,8 @@ GPU_TEST_P(Abs, Accuracy)
 {
     cv::Mat src = randomMat(size, depth);
 
-    cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
-    cv::gpu::abs(loadMat(src, useRoi), dst);
+    cv::cuda::GpuMat dst = createMat(size, depth, useRoi);
+    cv::cuda::abs(loadMat(src, useRoi), dst);
 
     cv::Mat dst_gold = cv::abs(src);
 
@@ -1510,9 +1510,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, Abs, testing::Combine(
 ////////////////////////////////////////////////////////////////////////////////
 // Sqr
 
-PARAM_TEST_CASE(Sqr, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
+PARAM_TEST_CASE(Sqr, cv::cuda::DeviceInfo, cv::Size, MatDepth, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int depth;
     bool useRoi;
@@ -1524,7 +1524,7 @@ PARAM_TEST_CASE(Sqr, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
         depth = GET_PARAM(2);
         useRoi = GET_PARAM(3);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -1532,8 +1532,8 @@ GPU_TEST_P(Sqr, Accuracy)
 {
     cv::Mat src = randomMat(size, depth, 0, depth == CV_8U ? 16 : 255);
 
-    cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
-    cv::gpu::sqr(loadMat(src, useRoi), dst);
+    cv::cuda::GpuMat dst = createMat(size, depth, useRoi);
+    cv::cuda::sqr(loadMat(src, useRoi), dst);
 
     cv::Mat dst_gold;
     cv::multiply(src, src, dst_gold);
@@ -1580,9 +1580,9 @@ namespace
     }
 }
 
-PARAM_TEST_CASE(Sqrt, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
+PARAM_TEST_CASE(Sqrt, cv::cuda::DeviceInfo, cv::Size, MatDepth, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int depth;
     bool useRoi;
@@ -1594,7 +1594,7 @@ PARAM_TEST_CASE(Sqrt, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
         depth = GET_PARAM(2);
         useRoi = GET_PARAM(3);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -1602,8 +1602,8 @@ GPU_TEST_P(Sqrt, Accuracy)
 {
     cv::Mat src = randomMat(size, depth);
 
-    cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
-    cv::gpu::sqrt(loadMat(src, useRoi), dst);
+    cv::cuda::GpuMat dst = createMat(size, depth, useRoi);
+    cv::cuda::sqrt(loadMat(src, useRoi), dst);
 
     cv::Mat dst_gold;
     sqrtGold(src, dst_gold);
@@ -1650,9 +1650,9 @@ namespace
     }
 }
 
-PARAM_TEST_CASE(Log, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
+PARAM_TEST_CASE(Log, cv::cuda::DeviceInfo, cv::Size, MatDepth, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int depth;
     bool useRoi;
@@ -1664,7 +1664,7 @@ PARAM_TEST_CASE(Log, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
         depth = GET_PARAM(2);
         useRoi = GET_PARAM(3);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -1672,8 +1672,8 @@ GPU_TEST_P(Log, Accuracy)
 {
     cv::Mat src = randomMat(size, depth, 1.0, 255.0);
 
-    cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
-    cv::gpu::log(loadMat(src, useRoi), dst);
+    cv::cuda::GpuMat dst = createMat(size, depth, useRoi);
+    cv::cuda::log(loadMat(src, useRoi), dst);
 
     cv::Mat dst_gold;
     logGold(src, dst_gold);
@@ -1730,9 +1730,9 @@ namespace
     }
 }
 
-PARAM_TEST_CASE(Exp, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
+PARAM_TEST_CASE(Exp, cv::cuda::DeviceInfo, cv::Size, MatDepth, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int depth;
     bool useRoi;
@@ -1744,7 +1744,7 @@ PARAM_TEST_CASE(Exp, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
         depth = GET_PARAM(2);
         useRoi = GET_PARAM(3);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -1752,8 +1752,8 @@ GPU_TEST_P(Exp, Accuracy)
 {
     cv::Mat src = randomMat(size, depth, 0.0, 10.0);
 
-    cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
-    cv::gpu::exp(loadMat(src, useRoi), dst);
+    cv::cuda::GpuMat dst = createMat(size, depth, useRoi);
+    cv::cuda::exp(loadMat(src, useRoi), dst);
 
     cv::Mat dst_gold;
     expGold(src, dst_gold);
@@ -1773,9 +1773,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, Exp, testing::Combine(
 ////////////////////////////////////////////////////////////////////////////////
 // Pow
 
-PARAM_TEST_CASE(Pow, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
+PARAM_TEST_CASE(Pow, cv::cuda::DeviceInfo, cv::Size, MatDepth, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int depth;
     bool useRoi;
@@ -1787,7 +1787,7 @@ PARAM_TEST_CASE(Pow, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
         depth = GET_PARAM(2);
         useRoi = GET_PARAM(3);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -1799,12 +1799,12 @@ GPU_TEST_P(Pow, Accuracy)
     if (src.depth() < CV_32F)
         power = static_cast<int>(power);
 
-    if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if (depth == CV_64F && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
         {
-            cv::gpu::GpuMat dst;
-            cv::gpu::pow(loadMat(src), power, dst);
+            cv::cuda::GpuMat dst;
+            cv::cuda::pow(loadMat(src), power, dst);
         }
         catch (const cv::Exception& e)
         {
@@ -1813,8 +1813,8 @@ GPU_TEST_P(Pow, Accuracy)
     }
     else
     {
-        cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
-        cv::gpu::pow(loadMat(src, useRoi), power, dst);
+        cv::cuda::GpuMat dst = createMat(size, depth, useRoi);
+        cv::cuda::pow(loadMat(src, useRoi), power, dst);
 
         cv::Mat dst_gold;
         cv::pow(src, power, dst_gold);
@@ -1835,9 +1835,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, Pow, testing::Combine(
 CV_ENUM(CmpCode, cv::CMP_EQ, cv::CMP_GT, cv::CMP_GE, cv::CMP_LT, cv::CMP_LE, cv::CMP_NE)
 #define ALL_CMP_CODES testing::Values(CmpCode(cv::CMP_EQ), CmpCode(cv::CMP_NE), CmpCode(cv::CMP_GT), CmpCode(cv::CMP_GE), CmpCode(cv::CMP_LT), CmpCode(cv::CMP_LE))
 
-PARAM_TEST_CASE(Compare_Array, cv::gpu::DeviceInfo, cv::Size, MatDepth, CmpCode, UseRoi)
+PARAM_TEST_CASE(Compare_Array, cv::cuda::DeviceInfo, cv::Size, MatDepth, CmpCode, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int depth;
     int cmp_code;
@@ -1851,7 +1851,7 @@ PARAM_TEST_CASE(Compare_Array, cv::gpu::DeviceInfo, cv::Size, MatDepth, CmpCode,
         cmp_code = GET_PARAM(3);
         useRoi = GET_PARAM(4);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -1860,12 +1860,12 @@ GPU_TEST_P(Compare_Array, Accuracy)
     cv::Mat src1 = randomMat(size, depth);
     cv::Mat src2 = randomMat(size, depth);
 
-    if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if (depth == CV_64F && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
         {
-            cv::gpu::GpuMat dst;
-            cv::gpu::compare(loadMat(src1), loadMat(src2), dst, cmp_code);
+            cv::cuda::GpuMat dst;
+            cv::cuda::compare(loadMat(src1), loadMat(src2), dst, cmp_code);
         }
         catch (const cv::Exception& e)
         {
@@ -1874,8 +1874,8 @@ GPU_TEST_P(Compare_Array, Accuracy)
     }
     else
     {
-        cv::gpu::GpuMat dst = createMat(size, CV_8UC1, useRoi);
-        cv::gpu::compare(loadMat(src1, useRoi), loadMat(src2, useRoi), dst, cmp_code);
+        cv::cuda::GpuMat dst = createMat(size, CV_8UC1, useRoi);
+        cv::cuda::compare(loadMat(src1, useRoi), loadMat(src2, useRoi), dst, cmp_code);
 
         cv::Mat dst_gold;
         cv::compare(src1, src2, dst_gold, cmp_code);
@@ -1937,9 +1937,9 @@ namespace
     }
 }
 
-PARAM_TEST_CASE(Compare_Scalar, cv::gpu::DeviceInfo, cv::Size, MatType, CmpCode, UseRoi)
+PARAM_TEST_CASE(Compare_Scalar, cv::cuda::DeviceInfo, cv::Size, MatType, CmpCode, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int type;
     int cmp_code;
@@ -1953,7 +1953,7 @@ PARAM_TEST_CASE(Compare_Scalar, cv::gpu::DeviceInfo, cv::Size, MatType, CmpCode,
         cmp_code = GET_PARAM(3);
         useRoi = GET_PARAM(4);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -1970,12 +1970,12 @@ GPU_TEST_P(Compare_Scalar, Accuracy)
         sc.val[3] = cvRound(sc.val[3]);
     }
 
-    if (src.depth() == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if (src.depth() == CV_64F && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
         {
-            cv::gpu::GpuMat dst;
-            cv::gpu::compare(loadMat(src), sc, dst, cmp_code);
+            cv::cuda::GpuMat dst;
+            cv::cuda::compare(loadMat(src), sc, dst, cmp_code);
         }
         catch (const cv::Exception& e)
         {
@@ -1984,9 +1984,9 @@ GPU_TEST_P(Compare_Scalar, Accuracy)
     }
     else
     {
-        cv::gpu::GpuMat dst = createMat(size, CV_MAKE_TYPE(CV_8U, src.channels()), useRoi);
+        cv::cuda::GpuMat dst = createMat(size, CV_MAKE_TYPE(CV_8U, src.channels()), useRoi);
 
-        cv::gpu::compare(loadMat(src, useRoi), sc, dst, cmp_code);
+        cv::cuda::compare(loadMat(src, useRoi), sc, dst, cmp_code);
 
         cv::Mat dst_gold;
         compareScalarGold(src, sc, dst_gold, cmp_code);
@@ -2005,9 +2005,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, Compare_Scalar, testing::Combine(
 //////////////////////////////////////////////////////////////////////////////
 // Bitwise_Array
 
-PARAM_TEST_CASE(Bitwise_Array, cv::gpu::DeviceInfo, cv::Size, MatType)
+PARAM_TEST_CASE(Bitwise_Array, cv::cuda::DeviceInfo, cv::Size, MatType)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int type;
 
@@ -2020,7 +2020,7 @@ PARAM_TEST_CASE(Bitwise_Array, cv::gpu::DeviceInfo, cv::Size, MatType)
         size = GET_PARAM(1);
         type = GET_PARAM(2);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
 
         src1 = randomMat(size, type, 0.0, std::numeric_limits<int>::max());
         src2 = randomMat(size, type, 0.0, std::numeric_limits<int>::max());
@@ -2029,8 +2029,8 @@ PARAM_TEST_CASE(Bitwise_Array, cv::gpu::DeviceInfo, cv::Size, MatType)
 
 GPU_TEST_P(Bitwise_Array, Not)
 {
-    cv::gpu::GpuMat dst;
-    cv::gpu::bitwise_not(loadMat(src1), dst);
+    cv::cuda::GpuMat dst;
+    cv::cuda::bitwise_not(loadMat(src1), dst);
 
     cv::Mat dst_gold = ~src1;
 
@@ -2039,8 +2039,8 @@ GPU_TEST_P(Bitwise_Array, Not)
 
 GPU_TEST_P(Bitwise_Array, Or)
 {
-    cv::gpu::GpuMat dst;
-    cv::gpu::bitwise_or(loadMat(src1), loadMat(src2), dst);
+    cv::cuda::GpuMat dst;
+    cv::cuda::bitwise_or(loadMat(src1), loadMat(src2), dst);
 
     cv::Mat dst_gold = src1 | src2;
 
@@ -2049,8 +2049,8 @@ GPU_TEST_P(Bitwise_Array, Or)
 
 GPU_TEST_P(Bitwise_Array, And)
 {
-    cv::gpu::GpuMat dst;
-    cv::gpu::bitwise_and(loadMat(src1), loadMat(src2), dst);
+    cv::cuda::GpuMat dst;
+    cv::cuda::bitwise_and(loadMat(src1), loadMat(src2), dst);
 
     cv::Mat dst_gold = src1 & src2;
 
@@ -2059,8 +2059,8 @@ GPU_TEST_P(Bitwise_Array, And)
 
 GPU_TEST_P(Bitwise_Array, Xor)
 {
-    cv::gpu::GpuMat dst;
-    cv::gpu::bitwise_xor(loadMat(src1), loadMat(src2), dst);
+    cv::cuda::GpuMat dst;
+    cv::cuda::bitwise_xor(loadMat(src1), loadMat(src2), dst);
 
     cv::Mat dst_gold = src1 ^ src2;
 
@@ -2075,9 +2075,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, Bitwise_Array, testing::Combine(
 //////////////////////////////////////////////////////////////////////////////
 // Bitwise_Scalar
 
-PARAM_TEST_CASE(Bitwise_Scalar, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels)
+PARAM_TEST_CASE(Bitwise_Scalar, cv::cuda::DeviceInfo, cv::Size, MatDepth, Channels)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int depth;
     int channels;
@@ -2092,7 +2092,7 @@ PARAM_TEST_CASE(Bitwise_Scalar, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channel
         depth = GET_PARAM(2);
         channels = GET_PARAM(3);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
 
         src = randomMat(size, CV_MAKE_TYPE(depth, channels));
         cv::Scalar_<int> ival = randomScalar(0.0, std::numeric_limits<int>::max());
@@ -2102,8 +2102,8 @@ PARAM_TEST_CASE(Bitwise_Scalar, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channel
 
 GPU_TEST_P(Bitwise_Scalar, Or)
 {
-    cv::gpu::GpuMat dst;
-    cv::gpu::bitwise_or(loadMat(src), val, dst);
+    cv::cuda::GpuMat dst;
+    cv::cuda::bitwise_or(loadMat(src), val, dst);
 
     cv::Mat dst_gold;
     cv::bitwise_or(src, val, dst_gold);
@@ -2113,8 +2113,8 @@ GPU_TEST_P(Bitwise_Scalar, Or)
 
 GPU_TEST_P(Bitwise_Scalar, And)
 {
-    cv::gpu::GpuMat dst;
-    cv::gpu::bitwise_and(loadMat(src), val, dst);
+    cv::cuda::GpuMat dst;
+    cv::cuda::bitwise_and(loadMat(src), val, dst);
 
     cv::Mat dst_gold;
     cv::bitwise_and(src, val, dst_gold);
@@ -2124,8 +2124,8 @@ GPU_TEST_P(Bitwise_Scalar, And)
 
 GPU_TEST_P(Bitwise_Scalar, Xor)
 {
-    cv::gpu::GpuMat dst;
-    cv::gpu::bitwise_xor(loadMat(src), val, dst);
+    cv::cuda::GpuMat dst;
+    cv::cuda::bitwise_xor(loadMat(src), val, dst);
 
     cv::Mat dst_gold;
     cv::bitwise_xor(src, val, dst_gold);
@@ -2173,9 +2173,9 @@ namespace
     }
 }
 
-PARAM_TEST_CASE(RShift, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels, UseRoi)
+PARAM_TEST_CASE(RShift, cv::cuda::DeviceInfo, cv::Size, MatDepth, Channels, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int depth;
     int channels;
@@ -2189,7 +2189,7 @@ PARAM_TEST_CASE(RShift, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels, UseRo
         channels = GET_PARAM(3);
         useRoi = GET_PARAM(4);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -2199,8 +2199,8 @@ GPU_TEST_P(RShift, Accuracy)
     cv::Mat src = randomMat(size, type);
     cv::Scalar_<int> val = randomScalar(0.0, 8.0);
 
-    cv::gpu::GpuMat dst = createMat(size, type, useRoi);
-    cv::gpu::rshift(loadMat(src, useRoi), val, dst);
+    cv::cuda::GpuMat dst = createMat(size, type, useRoi);
+    cv::cuda::rshift(loadMat(src, useRoi), val, dst);
 
     cv::Mat dst_gold;
     rhiftGold(src, val, dst_gold);
@@ -2253,9 +2253,9 @@ namespace
     }
 }
 
-PARAM_TEST_CASE(LShift, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels, UseRoi)
+PARAM_TEST_CASE(LShift, cv::cuda::DeviceInfo, cv::Size, MatDepth, Channels, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int depth;
     int channels;
@@ -2269,7 +2269,7 @@ PARAM_TEST_CASE(LShift, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels, UseRo
         channels = GET_PARAM(3);
         useRoi = GET_PARAM(4);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -2279,8 +2279,8 @@ GPU_TEST_P(LShift, Accuracy)
     cv::Mat src = randomMat(size, type);
     cv::Scalar_<int> val = randomScalar(0.0, 8.0);
 
-    cv::gpu::GpuMat dst = createMat(size, type, useRoi);
-    cv::gpu::lshift(loadMat(src, useRoi), val, dst);
+    cv::cuda::GpuMat dst = createMat(size, type, useRoi);
+    cv::cuda::lshift(loadMat(src, useRoi), val, dst);
 
     cv::Mat dst_gold;
     lhiftGold(src, val, dst_gold);
@@ -2298,9 +2298,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, LShift, testing::Combine(
 //////////////////////////////////////////////////////////////////////////////
 // Min
 
-PARAM_TEST_CASE(Min, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
+PARAM_TEST_CASE(Min, cv::cuda::DeviceInfo, cv::Size, MatDepth, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int depth;
     bool useRoi;
@@ -2312,7 +2312,7 @@ PARAM_TEST_CASE(Min, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
         depth = GET_PARAM(2);
         useRoi = GET_PARAM(3);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -2321,12 +2321,12 @@ GPU_TEST_P(Min, Array)
     cv::Mat src1 = randomMat(size, depth);
     cv::Mat src2 = randomMat(size, depth);
 
-    if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if (depth == CV_64F && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
         {
-            cv::gpu::GpuMat dst;
-            cv::gpu::min(loadMat(src1), loadMat(src2), dst);
+            cv::cuda::GpuMat dst;
+            cv::cuda::min(loadMat(src1), loadMat(src2), dst);
         }
         catch (const cv::Exception& e)
         {
@@ -2335,8 +2335,8 @@ GPU_TEST_P(Min, Array)
     }
     else
     {
-        cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
-        cv::gpu::min(loadMat(src1, useRoi), loadMat(src2, useRoi), dst);
+        cv::cuda::GpuMat dst = createMat(size, depth, useRoi);
+        cv::cuda::min(loadMat(src1, useRoi), loadMat(src2, useRoi), dst);
 
         cv::Mat dst_gold = cv::min(src1, src2);
 
@@ -2349,12 +2349,12 @@ GPU_TEST_P(Min, Scalar)
     cv::Mat src = randomMat(size, depth);
     double val = randomDouble(0.0, 255.0);
 
-    if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if (depth == CV_64F && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
         {
-            cv::gpu::GpuMat dst;
-            cv::gpu::min(loadMat(src), val, dst);
+            cv::cuda::GpuMat dst;
+            cv::cuda::min(loadMat(src), val, dst);
         }
         catch (const cv::Exception& e)
         {
@@ -2363,8 +2363,8 @@ GPU_TEST_P(Min, Scalar)
     }
     else
     {
-        cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
-        cv::gpu::min(loadMat(src, useRoi), val, dst);
+        cv::cuda::GpuMat dst = createMat(size, depth, useRoi);
+        cv::cuda::min(loadMat(src, useRoi), val, dst);
 
         cv::Mat dst_gold = cv::min(src, val);
 
@@ -2381,9 +2381,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, Min, testing::Combine(
 //////////////////////////////////////////////////////////////////////////////
 // Max
 
-PARAM_TEST_CASE(Max, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
+PARAM_TEST_CASE(Max, cv::cuda::DeviceInfo, cv::Size, MatDepth, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int depth;
     bool useRoi;
@@ -2395,7 +2395,7 @@ PARAM_TEST_CASE(Max, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
         depth = GET_PARAM(2);
         useRoi = GET_PARAM(3);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -2404,12 +2404,12 @@ GPU_TEST_P(Max, Array)
     cv::Mat src1 = randomMat(size, depth);
     cv::Mat src2 = randomMat(size, depth);
 
-    if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if (depth == CV_64F && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
         {
-            cv::gpu::GpuMat dst;
-            cv::gpu::max(loadMat(src1), loadMat(src2), dst);
+            cv::cuda::GpuMat dst;
+            cv::cuda::max(loadMat(src1), loadMat(src2), dst);
         }
         catch (const cv::Exception& e)
         {
@@ -2418,8 +2418,8 @@ GPU_TEST_P(Max, Array)
     }
     else
     {
-        cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
-        cv::gpu::max(loadMat(src1, useRoi), loadMat(src2, useRoi), dst);
+        cv::cuda::GpuMat dst = createMat(size, depth, useRoi);
+        cv::cuda::max(loadMat(src1, useRoi), loadMat(src2, useRoi), dst);
 
         cv::Mat dst_gold = cv::max(src1, src2);
 
@@ -2432,12 +2432,12 @@ GPU_TEST_P(Max, Scalar)
     cv::Mat src = randomMat(size, depth);
     double val = randomDouble(0.0, 255.0);
 
-    if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if (depth == CV_64F && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
         {
-            cv::gpu::GpuMat dst;
-            cv::gpu::max(loadMat(src), val, dst);
+            cv::cuda::GpuMat dst;
+            cv::cuda::max(loadMat(src), val, dst);
         }
         catch (const cv::Exception& e)
         {
@@ -2446,8 +2446,8 @@ GPU_TEST_P(Max, Scalar)
     }
     else
     {
-        cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
-        cv::gpu::max(loadMat(src, useRoi), val, dst);
+        cv::cuda::GpuMat dst = createMat(size, depth, useRoi);
+        cv::cuda::max(loadMat(src, useRoi), val, dst);
 
         cv::Mat dst_gold = cv::max(src, val);
 
@@ -2464,9 +2464,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, Max, testing::Combine(
 //////////////////////////////////////////////////////////////////////////////
 // AddWeighted
 
-PARAM_TEST_CASE(AddWeighted, cv::gpu::DeviceInfo, cv::Size, MatDepth, MatDepth, MatDepth, UseRoi)
+PARAM_TEST_CASE(AddWeighted, cv::cuda::DeviceInfo, cv::Size, MatDepth, MatDepth, MatDepth, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int depth1;
     int depth2;
@@ -2482,7 +2482,7 @@ PARAM_TEST_CASE(AddWeighted, cv::gpu::DeviceInfo, cv::Size, MatDepth, MatDepth,
         dst_depth = GET_PARAM(4);
         useRoi = GET_PARAM(5);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -2494,12 +2494,12 @@ GPU_TEST_P(AddWeighted, Accuracy)
     double beta = randomDouble(-10.0, 10.0);
     double gamma = randomDouble(-10.0, 10.0);
 
-    if ((depth1 == CV_64F || depth2 == CV_64F || dst_depth == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if ((depth1 == CV_64F || depth2 == CV_64F || dst_depth == CV_64F) && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
         {
-            cv::gpu::GpuMat dst;
-            cv::gpu::addWeighted(loadMat(src1), alpha, loadMat(src2), beta, gamma, dst, dst_depth);
+            cv::cuda::GpuMat dst;
+            cv::cuda::addWeighted(loadMat(src1), alpha, loadMat(src2), beta, gamma, dst, dst_depth);
         }
         catch (const cv::Exception& e)
         {
@@ -2508,8 +2508,8 @@ GPU_TEST_P(AddWeighted, Accuracy)
     }
     else
     {
-        cv::gpu::GpuMat dst = createMat(size, dst_depth, useRoi);
-        cv::gpu::addWeighted(loadMat(src1, useRoi), alpha, loadMat(src2, useRoi), beta, gamma, dst, dst_depth);
+        cv::cuda::GpuMat dst = createMat(size, dst_depth, useRoi);
+        cv::cuda::addWeighted(loadMat(src1, useRoi), alpha, loadMat(src2, useRoi), beta, gamma, dst, dst_depth);
 
         cv::Mat dst_gold;
         cv::addWeighted(src1, alpha, src2, beta, gamma, dst_gold, dst_depth);
@@ -2532,9 +2532,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, AddWeighted, testing::Combine(
 CV_ENUM(ThreshOp, cv::THRESH_BINARY, cv::THRESH_BINARY_INV, cv::THRESH_TRUNC, cv::THRESH_TOZERO, cv::THRESH_TOZERO_INV)
 #define ALL_THRESH_OPS testing::Values(ThreshOp(cv::THRESH_BINARY), ThreshOp(cv::THRESH_BINARY_INV), ThreshOp(cv::THRESH_TRUNC), ThreshOp(cv::THRESH_TOZERO), ThreshOp(cv::THRESH_TOZERO_INV))
 
-PARAM_TEST_CASE(Threshold, cv::gpu::DeviceInfo, cv::Size, MatType, ThreshOp, UseRoi)
+PARAM_TEST_CASE(Threshold, cv::cuda::DeviceInfo, cv::Size, MatType, ThreshOp, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int type;
     int threshOp;
@@ -2548,7 +2548,7 @@ PARAM_TEST_CASE(Threshold, cv::gpu::DeviceInfo, cv::Size, MatType, ThreshOp, Use
         threshOp = GET_PARAM(3);
         useRoi = GET_PARAM(4);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -2558,8 +2558,8 @@ GPU_TEST_P(Threshold, Accuracy)
     double maxVal = randomDouble(20.0, 127.0);
     double thresh = randomDouble(0.0, maxVal);
 
-    cv::gpu::GpuMat dst = createMat(src.size(), src.type(), useRoi);
-    cv::gpu::threshold(loadMat(src, useRoi), dst, thresh, maxVal, threshOp);
+    cv::cuda::GpuMat dst = createMat(src.size(), src.type(), useRoi);
+    cv::cuda::threshold(loadMat(src, useRoi), dst, thresh, maxVal, threshOp);
 
     cv::Mat dst_gold;
     cv::threshold(src, dst_gold, thresh, maxVal, threshOp);
@@ -2577,9 +2577,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, Threshold, testing::Combine(
 ////////////////////////////////////////////////////////////////////////////////
 // Magnitude
 
-PARAM_TEST_CASE(Magnitude, cv::gpu::DeviceInfo, cv::Size, UseRoi)
+PARAM_TEST_CASE(Magnitude, cv::cuda::DeviceInfo, cv::Size, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     bool useRoi;
 
@@ -2589,7 +2589,7 @@ PARAM_TEST_CASE(Magnitude, cv::gpu::DeviceInfo, cv::Size, UseRoi)
         size = GET_PARAM(1);
         useRoi = GET_PARAM(2);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -2597,8 +2597,8 @@ GPU_TEST_P(Magnitude, NPP)
 {
     cv::Mat src = randomMat(size, CV_32FC2);
 
-    cv::gpu::GpuMat dst = createMat(size, CV_32FC1, useRoi);
-    cv::gpu::magnitude(loadMat(src, useRoi), dst);
+    cv::cuda::GpuMat dst = createMat(size, CV_32FC1, useRoi);
+    cv::cuda::magnitude(loadMat(src, useRoi), dst);
 
     cv::Mat arr[2];
     cv::split(src, arr);
@@ -2612,8 +2612,8 @@ GPU_TEST_P(Magnitude, Sqr_NPP)
 {
     cv::Mat src = randomMat(size, CV_32FC2);
 
-    cv::gpu::GpuMat dst = createMat(size, CV_32FC1, useRoi);
-    cv::gpu::magnitudeSqr(loadMat(src, useRoi), dst);
+    cv::cuda::GpuMat dst = createMat(size, CV_32FC1, useRoi);
+    cv::cuda::magnitudeSqr(loadMat(src, useRoi), dst);
 
     cv::Mat arr[2];
     cv::split(src, arr);
@@ -2629,8 +2629,8 @@ GPU_TEST_P(Magnitude, Accuracy)
     cv::Mat x = randomMat(size, CV_32FC1);
     cv::Mat y = randomMat(size, CV_32FC1);
 
-    cv::gpu::GpuMat dst = createMat(size, CV_32FC1, useRoi);
-    cv::gpu::magnitude(loadMat(x, useRoi), loadMat(y, useRoi), dst);
+    cv::cuda::GpuMat dst = createMat(size, CV_32FC1, useRoi);
+    cv::cuda::magnitude(loadMat(x, useRoi), loadMat(y, useRoi), dst);
 
     cv::Mat dst_gold;
     cv::magnitude(x, y, dst_gold);
@@ -2643,8 +2643,8 @@ GPU_TEST_P(Magnitude, Sqr_Accuracy)
     cv::Mat x = randomMat(size, CV_32FC1);
     cv::Mat y = randomMat(size, CV_32FC1);
 
-    cv::gpu::GpuMat dst = createMat(size, CV_32FC1, useRoi);
-    cv::gpu::magnitudeSqr(loadMat(x, useRoi), loadMat(y, useRoi), dst);
+    cv::cuda::GpuMat dst = createMat(size, CV_32FC1, useRoi);
+    cv::cuda::magnitudeSqr(loadMat(x, useRoi), loadMat(y, useRoi), dst);
 
     cv::Mat dst_gold;
     cv::magnitude(x, y, dst_gold);
@@ -2666,9 +2666,9 @@ namespace
     IMPLEMENT_PARAM_CLASS(AngleInDegrees, bool)
 }
 
-PARAM_TEST_CASE(Phase, cv::gpu::DeviceInfo, cv::Size, AngleInDegrees, UseRoi)
+PARAM_TEST_CASE(Phase, cv::cuda::DeviceInfo, cv::Size, AngleInDegrees, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     bool angleInDegrees;
     bool useRoi;
@@ -2680,7 +2680,7 @@ PARAM_TEST_CASE(Phase, cv::gpu::DeviceInfo, cv::Size, AngleInDegrees, UseRoi)
         angleInDegrees = GET_PARAM(2);
         useRoi = GET_PARAM(3);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -2689,8 +2689,8 @@ GPU_TEST_P(Phase, Accuracy)
     cv::Mat x = randomMat(size, CV_32FC1);
     cv::Mat y = randomMat(size, CV_32FC1);
 
-    cv::gpu::GpuMat dst = createMat(size, CV_32FC1, useRoi);
-    cv::gpu::phase(loadMat(x, useRoi), loadMat(y, useRoi), dst, angleInDegrees);
+    cv::cuda::GpuMat dst = createMat(size, CV_32FC1, useRoi);
+    cv::cuda::phase(loadMat(x, useRoi), loadMat(y, useRoi), dst, angleInDegrees);
 
     cv::Mat dst_gold;
     cv::phase(x, y, dst_gold, angleInDegrees);
@@ -2707,9 +2707,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, Phase, testing::Combine(
 ////////////////////////////////////////////////////////////////////////////////
 // CartToPolar
 
-PARAM_TEST_CASE(CartToPolar, cv::gpu::DeviceInfo, cv::Size, AngleInDegrees, UseRoi)
+PARAM_TEST_CASE(CartToPolar, cv::cuda::DeviceInfo, cv::Size, AngleInDegrees, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     bool angleInDegrees;
     bool useRoi;
@@ -2721,7 +2721,7 @@ PARAM_TEST_CASE(CartToPolar, cv::gpu::DeviceInfo, cv::Size, AngleInDegrees, UseR
         angleInDegrees = GET_PARAM(2);
         useRoi = GET_PARAM(3);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -2730,9 +2730,9 @@ GPU_TEST_P(CartToPolar, Accuracy)
     cv::Mat x = randomMat(size, CV_32FC1);
     cv::Mat y = randomMat(size, CV_32FC1);
 
-    cv::gpu::GpuMat mag = createMat(size, CV_32FC1, useRoi);
-    cv::gpu::GpuMat angle = createMat(size, CV_32FC1, useRoi);
-    cv::gpu::cartToPolar(loadMat(x, useRoi), loadMat(y, useRoi), mag, angle, angleInDegrees);
+    cv::cuda::GpuMat mag = createMat(size, CV_32FC1, useRoi);
+    cv::cuda::GpuMat angle = createMat(size, CV_32FC1, useRoi);
+    cv::cuda::cartToPolar(loadMat(x, useRoi), loadMat(y, useRoi), mag, angle, angleInDegrees);
 
     cv::Mat mag_gold;
     cv::Mat angle_gold;
@@ -2751,9 +2751,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, CartToPolar, testing::Combine(
 ////////////////////////////////////////////////////////////////////////////////
 // polarToCart
 
-PARAM_TEST_CASE(PolarToCart, cv::gpu::DeviceInfo, cv::Size, AngleInDegrees, UseRoi)
+PARAM_TEST_CASE(PolarToCart, cv::cuda::DeviceInfo, cv::Size, AngleInDegrees, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     bool angleInDegrees;
     bool useRoi;
@@ -2765,7 +2765,7 @@ PARAM_TEST_CASE(PolarToCart, cv::gpu::DeviceInfo, cv::Size, AngleInDegrees, UseR
         angleInDegrees = GET_PARAM(2);
         useRoi = GET_PARAM(3);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -2774,9 +2774,9 @@ GPU_TEST_P(PolarToCart, Accuracy)
     cv::Mat magnitude = randomMat(size, CV_32FC1);
     cv::Mat angle = randomMat(size, CV_32FC1);
 
-    cv::gpu::GpuMat x = createMat(size, CV_32FC1, useRoi);
-    cv::gpu::GpuMat y = createMat(size, CV_32FC1, useRoi);
-    cv::gpu::polarToCart(loadMat(magnitude, useRoi), loadMat(angle, useRoi), x, y, angleInDegrees);
+    cv::cuda::GpuMat x = createMat(size, CV_32FC1, useRoi);
+    cv::cuda::GpuMat y = createMat(size, CV_32FC1, useRoi);
+    cv::cuda::polarToCart(loadMat(magnitude, useRoi), loadMat(angle, useRoi), x, y, angleInDegrees);
 
     cv::Mat x_gold;
     cv::Mat y_gold;
diff --git a/modules/gpuarithm/test/test_reductions.cpp b/modules/gpuarithm/test/test_reductions.cpp
index cd25ba72f2..9951fce448 100644
--- a/modules/gpuarithm/test/test_reductions.cpp
+++ b/modules/gpuarithm/test/test_reductions.cpp
@@ -49,9 +49,9 @@ using namespace cvtest;
 ////////////////////////////////////////////////////////////////////////////////
 // Norm
 
-PARAM_TEST_CASE(Norm, cv::gpu::DeviceInfo, cv::Size, MatDepth, NormCode, UseRoi)
+PARAM_TEST_CASE(Norm, cv::cuda::DeviceInfo, cv::Size, MatDepth, NormCode, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int depth;
     int normCode;
@@ -65,7 +65,7 @@ PARAM_TEST_CASE(Norm, cv::gpu::DeviceInfo, cv::Size, MatDepth, NormCode, UseRoi)
         normCode = GET_PARAM(3);
         useRoi = GET_PARAM(4);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -74,8 +74,8 @@ GPU_TEST_P(Norm, Accuracy)
     cv::Mat src = randomMat(size, depth);
     cv::Mat mask = randomMat(size, CV_8UC1, 0, 2);
 
-    cv::gpu::GpuMat d_buf;
-    double val = cv::gpu::norm(loadMat(src, useRoi), normCode, loadMat(mask, useRoi), d_buf);
+    cv::cuda::GpuMat d_buf;
+    double val = cv::cuda::norm(loadMat(src, useRoi), normCode, loadMat(mask, useRoi), d_buf);
 
     double val_gold = cv::norm(src, normCode, mask);
 
@@ -97,9 +97,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, Norm, testing::Combine(
 ////////////////////////////////////////////////////////////////////////////////
 // normDiff
 
-PARAM_TEST_CASE(NormDiff, cv::gpu::DeviceInfo, cv::Size, NormCode, UseRoi)
+PARAM_TEST_CASE(NormDiff, cv::cuda::DeviceInfo, cv::Size, NormCode, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int normCode;
     bool useRoi;
@@ -111,7 +111,7 @@ PARAM_TEST_CASE(NormDiff, cv::gpu::DeviceInfo, cv::Size, NormCode, UseRoi)
         normCode = GET_PARAM(2);
         useRoi = GET_PARAM(3);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -120,7 +120,7 @@ GPU_TEST_P(NormDiff, Accuracy)
     cv::Mat src1 = randomMat(size, CV_8UC1);
     cv::Mat src2 = randomMat(size, CV_8UC1);
 
-    double val = cv::gpu::norm(loadMat(src1, useRoi), loadMat(src2, useRoi), normCode);
+    double val = cv::cuda::norm(loadMat(src1, useRoi), loadMat(src2, useRoi), normCode);
 
     double val_gold = cv::norm(src1, src2, normCode);
 
@@ -216,9 +216,9 @@ namespace
     }
 }
 
-PARAM_TEST_CASE(Sum, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi)
+PARAM_TEST_CASE(Sum, cv::cuda::DeviceInfo, cv::Size, MatType, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int type;
     bool useRoi;
@@ -232,7 +232,7 @@ PARAM_TEST_CASE(Sum, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi)
         type = GET_PARAM(2);
         useRoi = GET_PARAM(3);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
 
         src = randomMat(size, type, -128.0, 128.0);
     }
@@ -240,7 +240,7 @@ PARAM_TEST_CASE(Sum, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi)
 
 GPU_TEST_P(Sum, Simple)
 {
-    cv::Scalar val = cv::gpu::sum(loadMat(src, useRoi));
+    cv::Scalar val = cv::cuda::sum(loadMat(src, useRoi));
 
     cv::Scalar val_gold = cv::sum(src);
 
@@ -249,7 +249,7 @@ GPU_TEST_P(Sum, Simple)
 
 GPU_TEST_P(Sum, Abs)
 {
-    cv::Scalar val = cv::gpu::absSum(loadMat(src, useRoi));
+    cv::Scalar val = cv::cuda::absSum(loadMat(src, useRoi));
 
     cv::Scalar val_gold = absSumGold(src);
 
@@ -258,7 +258,7 @@ GPU_TEST_P(Sum, Abs)
 
 GPU_TEST_P(Sum, Sqr)
 {
-    cv::Scalar val = cv::gpu::sqrSum(loadMat(src, useRoi));
+    cv::Scalar val = cv::cuda::sqrSum(loadMat(src, useRoi));
 
     cv::Scalar val_gold = sqrSumGold(src);
 
@@ -274,9 +274,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, Sum, testing::Combine(
 ////////////////////////////////////////////////////////////////////////////////
 // MinMax
 
-PARAM_TEST_CASE(MinMax, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
+PARAM_TEST_CASE(MinMax, cv::cuda::DeviceInfo, cv::Size, MatDepth, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int depth;
     bool useRoi;
@@ -288,7 +288,7 @@ PARAM_TEST_CASE(MinMax, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
         depth = GET_PARAM(2);
         useRoi = GET_PARAM(3);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -296,12 +296,12 @@ GPU_TEST_P(MinMax, WithoutMask)
 {
     cv::Mat src = randomMat(size, depth);
 
-    if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if (depth == CV_64F && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
         {
             double minVal, maxVal;
-            cv::gpu::minMax(loadMat(src), &minVal, &maxVal);
+            cv::cuda::minMax(loadMat(src), &minVal, &maxVal);
         }
         catch (const cv::Exception& e)
         {
@@ -311,7 +311,7 @@ GPU_TEST_P(MinMax, WithoutMask)
     else
     {
         double minVal, maxVal;
-        cv::gpu::minMax(loadMat(src, useRoi), &minVal, &maxVal);
+        cv::cuda::minMax(loadMat(src, useRoi), &minVal, &maxVal);
 
         double minVal_gold, maxVal_gold;
         minMaxLocGold(src, &minVal_gold, &maxVal_gold);
@@ -326,12 +326,12 @@ GPU_TEST_P(MinMax, WithMask)
     cv::Mat src = randomMat(size, depth);
     cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0);
 
-    if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if (depth == CV_64F && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
         {
             double minVal, maxVal;
-            cv::gpu::minMax(loadMat(src), &minVal, &maxVal, loadMat(mask));
+            cv::cuda::minMax(loadMat(src), &minVal, &maxVal, loadMat(mask));
         }
         catch (const cv::Exception& e)
         {
@@ -341,7 +341,7 @@ GPU_TEST_P(MinMax, WithMask)
     else
     {
         double minVal, maxVal;
-        cv::gpu::minMax(loadMat(src, useRoi), &minVal, &maxVal, loadMat(mask, useRoi));
+        cv::cuda::minMax(loadMat(src, useRoi), &minVal, &maxVal, loadMat(mask, useRoi));
 
         double minVal_gold, maxVal_gold;
         minMaxLocGold(src, &minVal_gold, &maxVal_gold, 0, 0, mask);
@@ -355,13 +355,13 @@ GPU_TEST_P(MinMax, NullPtr)
 {
     cv::Mat src = randomMat(size, depth);
 
-    if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if (depth == CV_64F && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
         {
             double minVal, maxVal;
-            cv::gpu::minMax(loadMat(src), &minVal, 0);
-            cv::gpu::minMax(loadMat(src), 0, &maxVal);
+            cv::cuda::minMax(loadMat(src), &minVal, 0);
+            cv::cuda::minMax(loadMat(src), 0, &maxVal);
         }
         catch (const cv::Exception& e)
         {
@@ -371,8 +371,8 @@ GPU_TEST_P(MinMax, NullPtr)
     else
     {
         double minVal, maxVal;
-        cv::gpu::minMax(loadMat(src, useRoi), &minVal, 0);
-        cv::gpu::minMax(loadMat(src, useRoi), 0, &maxVal);
+        cv::cuda::minMax(loadMat(src, useRoi), &minVal, 0);
+        cv::cuda::minMax(loadMat(src, useRoi), 0, &maxVal);
 
         double minVal_gold, maxVal_gold;
         minMaxLocGold(src, &minVal_gold, &maxVal_gold, 0, 0);
@@ -418,9 +418,9 @@ namespace
     }
 }
 
-PARAM_TEST_CASE(MinMaxLoc, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
+PARAM_TEST_CASE(MinMaxLoc, cv::cuda::DeviceInfo, cv::Size, MatDepth, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int depth;
     bool useRoi;
@@ -432,7 +432,7 @@ PARAM_TEST_CASE(MinMaxLoc, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
         depth = GET_PARAM(2);
         useRoi = GET_PARAM(3);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -440,13 +440,13 @@ GPU_TEST_P(MinMaxLoc, WithoutMask)
 {
     cv::Mat src = randomMat(size, depth);
 
-    if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if (depth == CV_64F && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
         {
             double minVal, maxVal;
             cv::Point minLoc, maxLoc;
-            cv::gpu::minMaxLoc(loadMat(src), &minVal, &maxVal, &minLoc, &maxLoc);
+            cv::cuda::minMaxLoc(loadMat(src), &minVal, &maxVal, &minLoc, &maxLoc);
         }
         catch (const cv::Exception& e)
         {
@@ -457,7 +457,7 @@ GPU_TEST_P(MinMaxLoc, WithoutMask)
     {
         double minVal, maxVal;
         cv::Point minLoc, maxLoc;
-        cv::gpu::minMaxLoc(loadMat(src, useRoi), &minVal, &maxVal, &minLoc, &maxLoc);
+        cv::cuda::minMaxLoc(loadMat(src, useRoi), &minVal, &maxVal, &minLoc, &maxLoc);
 
         double minVal_gold, maxVal_gold;
         cv::Point minLoc_gold, maxLoc_gold;
@@ -476,13 +476,13 @@ GPU_TEST_P(MinMaxLoc, WithMask)
     cv::Mat src = randomMat(size, depth);
     cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0);
 
-    if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if (depth == CV_64F && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
         {
             double minVal, maxVal;
             cv::Point minLoc, maxLoc;
-            cv::gpu::minMaxLoc(loadMat(src), &minVal, &maxVal, &minLoc, &maxLoc, loadMat(mask));
+            cv::cuda::minMaxLoc(loadMat(src), &minVal, &maxVal, &minLoc, &maxLoc, loadMat(mask));
         }
         catch (const cv::Exception& e)
         {
@@ -493,7 +493,7 @@ GPU_TEST_P(MinMaxLoc, WithMask)
     {
         double minVal, maxVal;
         cv::Point minLoc, maxLoc;
-        cv::gpu::minMaxLoc(loadMat(src, useRoi), &minVal, &maxVal, &minLoc, &maxLoc, loadMat(mask, useRoi));
+        cv::cuda::minMaxLoc(loadMat(src, useRoi), &minVal, &maxVal, &minLoc, &maxLoc, loadMat(mask, useRoi));
 
         double minVal_gold, maxVal_gold;
         cv::Point minLoc_gold, maxLoc_gold;
@@ -511,16 +511,16 @@ GPU_TEST_P(MinMaxLoc, NullPtr)
 {
     cv::Mat src = randomMat(size, depth);
 
-    if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if (depth == CV_64F && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
         {
             double minVal, maxVal;
             cv::Point minLoc, maxLoc;
-            cv::gpu::minMaxLoc(loadMat(src, useRoi), &minVal, 0, 0, 0);
-            cv::gpu::minMaxLoc(loadMat(src, useRoi), 0, &maxVal, 0, 0);
-            cv::gpu::minMaxLoc(loadMat(src, useRoi), 0, 0, &minLoc, 0);
-            cv::gpu::minMaxLoc(loadMat(src, useRoi), 0, 0, 0, &maxLoc);
+            cv::cuda::minMaxLoc(loadMat(src, useRoi), &minVal, 0, 0, 0);
+            cv::cuda::minMaxLoc(loadMat(src, useRoi), 0, &maxVal, 0, 0);
+            cv::cuda::minMaxLoc(loadMat(src, useRoi), 0, 0, &minLoc, 0);
+            cv::cuda::minMaxLoc(loadMat(src, useRoi), 0, 0, 0, &maxLoc);
         }
         catch (const cv::Exception& e)
         {
@@ -531,10 +531,10 @@ GPU_TEST_P(MinMaxLoc, NullPtr)
     {
         double minVal, maxVal;
         cv::Point minLoc, maxLoc;
-        cv::gpu::minMaxLoc(loadMat(src, useRoi), &minVal, 0, 0, 0);
-        cv::gpu::minMaxLoc(loadMat(src, useRoi), 0, &maxVal, 0, 0);
-        cv::gpu::minMaxLoc(loadMat(src, useRoi), 0, 0, &minLoc, 0);
-        cv::gpu::minMaxLoc(loadMat(src, useRoi), 0, 0, 0, &maxLoc);
+        cv::cuda::minMaxLoc(loadMat(src, useRoi), &minVal, 0, 0, 0);
+        cv::cuda::minMaxLoc(loadMat(src, useRoi), 0, &maxVal, 0, 0);
+        cv::cuda::minMaxLoc(loadMat(src, useRoi), 0, 0, &minLoc, 0);
+        cv::cuda::minMaxLoc(loadMat(src, useRoi), 0, 0, 0, &maxLoc);
 
         double minVal_gold, maxVal_gold;
         cv::Point minLoc_gold, maxLoc_gold;
@@ -557,9 +557,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, MinMaxLoc, testing::Combine(
 ////////////////////////////////////////////////////////////////////////////
 // CountNonZero
 
-PARAM_TEST_CASE(CountNonZero, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
+PARAM_TEST_CASE(CountNonZero, cv::cuda::DeviceInfo, cv::Size, MatDepth, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int depth;
     bool useRoi;
@@ -572,7 +572,7 @@ PARAM_TEST_CASE(CountNonZero, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
         depth = GET_PARAM(2);
         useRoi = GET_PARAM(3);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -582,11 +582,11 @@ GPU_TEST_P(CountNonZero, Accuracy)
     cv::Mat src;
     srcBase.convertTo(src, depth);
 
-    if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
+    if (depth == CV_64F && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     {
         try
         {
-            cv::gpu::countNonZero(loadMat(src));
+            cv::cuda::countNonZero(loadMat(src));
         }
         catch (const cv::Exception& e)
         {
@@ -595,7 +595,7 @@ GPU_TEST_P(CountNonZero, Accuracy)
     }
     else
     {
-        int val = cv::gpu::countNonZero(loadMat(src, useRoi));
+        int val = cv::cuda::countNonZero(loadMat(src, useRoi));
 
         int val_gold = cv::countNonZero(src);
 
@@ -615,9 +615,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, CountNonZero, testing::Combine(
 CV_ENUM(ReduceCode, cv::REDUCE_SUM, cv::REDUCE_AVG, cv::REDUCE_MAX, cv::REDUCE_MIN)
 #define ALL_REDUCE_CODES testing::Values(ReduceCode(cv::REDUCE_SUM), ReduceCode(cv::REDUCE_AVG), ReduceCode(cv::REDUCE_MAX), ReduceCode(cv::REDUCE_MIN))
 
-PARAM_TEST_CASE(Reduce, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels, ReduceCode, UseRoi)
+PARAM_TEST_CASE(Reduce, cv::cuda::DeviceInfo, cv::Size, MatDepth, Channels, ReduceCode, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int depth;
     int channels;
@@ -637,7 +637,7 @@ PARAM_TEST_CASE(Reduce, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels, Reduc
         reduceOp = GET_PARAM(4);
         useRoi = GET_PARAM(5);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
 
         type = CV_MAKE_TYPE(depth, channels);
 
@@ -657,8 +657,8 @@ GPU_TEST_P(Reduce, Rows)
 {
     cv::Mat src = randomMat(size, type);
 
-    cv::gpu::GpuMat dst = createMat(cv::Size(src.cols, 1), dst_type, useRoi);
-    cv::gpu::reduce(loadMat(src, useRoi), dst, 0, reduceOp, dst_depth);
+    cv::cuda::GpuMat dst = createMat(cv::Size(src.cols, 1), dst_type, useRoi);
+    cv::cuda::reduce(loadMat(src, useRoi), dst, 0, reduceOp, dst_depth);
 
     cv::Mat dst_gold;
     cv::reduce(src, dst_gold, 0, reduceOp, dst_depth);
@@ -670,8 +670,8 @@ GPU_TEST_P(Reduce, Cols)
 {
     cv::Mat src = randomMat(size, type);
 
-    cv::gpu::GpuMat dst = createMat(cv::Size(src.rows, 1), dst_type, useRoi);
-    cv::gpu::reduce(loadMat(src, useRoi), dst, 1, reduceOp, dst_depth);
+    cv::cuda::GpuMat dst = createMat(cv::Size(src.rows, 1), dst_type, useRoi);
+    cv::cuda::reduce(loadMat(src, useRoi), dst, 1, reduceOp, dst_depth);
 
     cv::Mat dst_gold;
     cv::reduce(src, dst_gold, 1, reduceOp, dst_depth);
@@ -697,9 +697,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, Reduce, testing::Combine(
 //////////////////////////////////////////////////////////////////////////////
 // Normalize
 
-PARAM_TEST_CASE(Normalize, cv::gpu::DeviceInfo, cv::Size, MatDepth, NormCode, UseRoi)
+PARAM_TEST_CASE(Normalize, cv::cuda::DeviceInfo, cv::Size, MatDepth, NormCode, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int type;
     int norm_type;
@@ -716,7 +716,7 @@ PARAM_TEST_CASE(Normalize, cv::gpu::DeviceInfo, cv::Size, MatDepth, NormCode, Us
         norm_type = GET_PARAM(3);
         useRoi = GET_PARAM(4);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
 
         alpha = 1;
         beta = 0;
@@ -728,8 +728,8 @@ GPU_TEST_P(Normalize, WithOutMask)
 {
     cv::Mat src = randomMat(size, type);
 
-    cv::gpu::GpuMat dst = createMat(size, type, useRoi);
-    cv::gpu::normalize(loadMat(src, useRoi), dst, alpha, beta, norm_type, type);
+    cv::cuda::GpuMat dst = createMat(size, type, useRoi);
+    cv::cuda::normalize(loadMat(src, useRoi), dst, alpha, beta, norm_type, type);
 
     cv::Mat dst_gold;
     cv::normalize(src, dst_gold, alpha, beta, norm_type, type);
@@ -742,9 +742,9 @@ GPU_TEST_P(Normalize, WithMask)
     cv::Mat src = randomMat(size, type);
     cv::Mat mask = randomMat(size, CV_8UC1, 0, 2);
 
-    cv::gpu::GpuMat dst = createMat(size, type, useRoi);
+    cv::cuda::GpuMat dst = createMat(size, type, useRoi);
     dst.setTo(cv::Scalar::all(0));
-    cv::gpu::normalize(loadMat(src, useRoi), dst, alpha, beta, norm_type, type, loadMat(mask, useRoi));
+    cv::cuda::normalize(loadMat(src, useRoi), dst, alpha, beta, norm_type, type, loadMat(mask, useRoi));
 
     cv::Mat dst_gold(size, type);
     dst_gold.setTo(cv::Scalar::all(0));
@@ -763,9 +763,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, Normalize, testing::Combine(
 ////////////////////////////////////////////////////////////////////////////////
 // MeanStdDev
 
-PARAM_TEST_CASE(MeanStdDev, cv::gpu::DeviceInfo, cv::Size, UseRoi)
+PARAM_TEST_CASE(MeanStdDev, cv::cuda::DeviceInfo, cv::Size, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     bool useRoi;
 
@@ -775,7 +775,7 @@ PARAM_TEST_CASE(MeanStdDev, cv::gpu::DeviceInfo, cv::Size, UseRoi)
         size = GET_PARAM(1);
         useRoi = GET_PARAM(2);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -783,13 +783,13 @@ GPU_TEST_P(MeanStdDev, Accuracy)
 {
     cv::Mat src = randomMat(size, CV_8UC1);
 
-    if (!supportFeature(devInfo, cv::gpu::FEATURE_SET_COMPUTE_13))
+    if (!supportFeature(devInfo, cv::cuda::FEATURE_SET_COMPUTE_13))
     {
         try
         {
             cv::Scalar mean;
             cv::Scalar stddev;
-            cv::gpu::meanStdDev(loadMat(src, useRoi), mean, stddev);
+            cv::cuda::meanStdDev(loadMat(src, useRoi), mean, stddev);
         }
         catch (const cv::Exception& e)
         {
@@ -800,7 +800,7 @@ GPU_TEST_P(MeanStdDev, Accuracy)
     {
         cv::Scalar mean;
         cv::Scalar stddev;
-        cv::gpu::meanStdDev(loadMat(src, useRoi), mean, stddev);
+        cv::cuda::meanStdDev(loadMat(src, useRoi), mean, stddev);
 
         cv::Scalar mean_gold;
         cv::Scalar stddev_gold;
diff --git a/modules/gpubgsegm/include/opencv2/gpubgsegm.hpp b/modules/gpubgsegm/include/opencv2/gpubgsegm.hpp
index c6d9223ddd..451ba4ecbd 100644
--- a/modules/gpubgsegm/include/opencv2/gpubgsegm.hpp
+++ b/modules/gpubgsegm/include/opencv2/gpubgsegm.hpp
@@ -50,7 +50,7 @@
 #include "opencv2/core/gpu.hpp"
 #include "opencv2/video/background_segm.hpp"
 
-namespace cv { namespace gpu {
+namespace cv { namespace cuda {
 
 ////////////////////////////////////////////////////
 // MOG
@@ -66,7 +66,7 @@ public:
     virtual void getBackgroundImage(OutputArray backgroundImage, Stream& stream) const = 0;
 };
 
-CV_EXPORTS Ptr<gpu::BackgroundSubtractorMOG>
+CV_EXPORTS Ptr<cuda::BackgroundSubtractorMOG>
     createBackgroundSubtractorMOG(int history = 200, int nmixtures = 5,
                                   double backgroundRatio = 0.7, double noiseSigma = 0);
 
@@ -84,7 +84,7 @@ public:
     virtual void getBackgroundImage(OutputArray backgroundImage, Stream& stream) const = 0;
 };
 
-CV_EXPORTS Ptr<gpu::BackgroundSubtractorMOG2>
+CV_EXPORTS Ptr<cuda::BackgroundSubtractorMOG2>
     createBackgroundSubtractorMOG2(int history = 500, double varThreshold = 16,
                                    bool detectShadows = true);
 
@@ -99,7 +99,7 @@ public:
     virtual void apply(InputArray image, OutputArray fgmask, double learningRate, Stream& stream) = 0;
 };
 
-CV_EXPORTS Ptr<gpu::BackgroundSubtractorGMG>
+CV_EXPORTS Ptr<cuda::BackgroundSubtractorGMG>
     createBackgroundSubtractorGMG(int initializationFrames = 120, double decisionThreshold = 0.8);
 
 ////////////////////////////////////////////////////
@@ -144,9 +144,9 @@ struct CV_EXPORTS FGDParams
     FGDParams();
 };
 
-CV_EXPORTS Ptr<gpu::BackgroundSubtractorFGD>
+CV_EXPORTS Ptr<cuda::BackgroundSubtractorFGD>
     createBackgroundSubtractorFGD(const FGDParams& params = FGDParams());
 
-}} // namespace cv { namespace gpu {
+}} // namespace cv { namespace cuda {
 
 #endif /* __OPENCV_GPUBGSEGM_HPP__ */
diff --git a/modules/gpubgsegm/perf/perf_bgsegm.cpp b/modules/gpubgsegm/perf/perf_bgsegm.cpp
index 6f864fd75c..f6b59a3d20 100644
--- a/modules/gpubgsegm/perf/perf_bgsegm.cpp
+++ b/modules/gpubgsegm/perf/perf_bgsegm.cpp
@@ -102,9 +102,9 @@ PERF_TEST_P(Video, FGDStatModel,
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_frame(frame), foreground;
+        cv::cuda::GpuMat d_frame(frame), foreground;
 
-        cv::Ptr<cv::gpu::BackgroundSubtractorFGD> d_fgd = cv::gpu::createBackgroundSubtractorFGD();
+        cv::Ptr<cv::cuda::BackgroundSubtractorFGD> d_fgd = cv::cuda::createBackgroundSubtractorFGD();
         d_fgd->apply(d_frame, foreground);
 
         for (int i = 0; i < 10; ++i)
@@ -122,9 +122,9 @@ PERF_TEST_P(Video, FGDStatModel,
         GPU_SANITY_CHECK(foreground, 1e-2, ERROR_RELATIVE);
 
 #ifdef HAVE_OPENCV_GPUIMGPROC
-        cv::gpu::GpuMat background3, background;
+        cv::cuda::GpuMat background3, background;
         d_fgd->getBackgroundImage(background3);
-        cv::gpu::cvtColor(background3, background, cv::COLOR_BGR2BGRA);
+        cv::cuda::cvtColor(background3, background, cv::COLOR_BGR2BGRA);
         GPU_SANITY_CHECK(background, 1e-2, ERROR_RELATIVE);
 #endif
     }
@@ -195,10 +195,10 @@ PERF_TEST_P(Video_Cn_LearningRate, MOG,
 
     if (PERF_RUN_GPU())
     {
-        cv::Ptr<cv::BackgroundSubtractor> d_mog = cv::gpu::createBackgroundSubtractorMOG();
+        cv::Ptr<cv::BackgroundSubtractor> d_mog = cv::cuda::createBackgroundSubtractorMOG();
 
-        cv::gpu::GpuMat d_frame(frame);
-        cv::gpu::GpuMat foreground;
+        cv::cuda::GpuMat d_frame(frame);
+        cv::cuda::GpuMat foreground;
 
         d_mog->apply(d_frame, foreground, learningRate);
 
@@ -293,11 +293,11 @@ PERF_TEST_P(Video_Cn, MOG2,
 
     if (PERF_RUN_GPU())
     {
-        cv::Ptr<cv::BackgroundSubtractorMOG2> d_mog2 = cv::gpu::createBackgroundSubtractorMOG2();
+        cv::Ptr<cv::BackgroundSubtractorMOG2> d_mog2 = cv::cuda::createBackgroundSubtractorMOG2();
         d_mog2->setDetectShadows(false);
 
-        cv::gpu::GpuMat d_frame(frame);
-        cv::gpu::GpuMat foreground;
+        cv::cuda::GpuMat d_frame(frame);
+        cv::cuda::GpuMat foreground;
 
         d_mog2->apply(d_frame, foreground);
 
@@ -379,10 +379,10 @@ PERF_TEST_P(Video_Cn, MOG2GetBackgroundImage,
 
     if (PERF_RUN_GPU())
     {
-        cv::Ptr<cv::BackgroundSubtractor> d_mog2 = cv::gpu::createBackgroundSubtractorMOG2();
+        cv::Ptr<cv::BackgroundSubtractor> d_mog2 = cv::cuda::createBackgroundSubtractorMOG2();
 
-        cv::gpu::GpuMat d_frame;
-        cv::gpu::GpuMat d_foreground;
+        cv::cuda::GpuMat d_frame;
+        cv::cuda::GpuMat d_foreground;
 
         for (int i = 0; i < 10; ++i)
         {
@@ -404,7 +404,7 @@ PERF_TEST_P(Video_Cn, MOG2GetBackgroundImage,
             d_mog2->apply(d_frame, d_foreground);
         }
 
-        cv::gpu::GpuMat background;
+        cv::cuda::GpuMat background;
 
         TEST_CYCLE() d_mog2->getBackgroundImage(background);
 
@@ -478,10 +478,10 @@ PERF_TEST_P(Video_Cn_MaxFeatures, GMG,
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat d_frame(frame);
-        cv::gpu::GpuMat foreground;
+        cv::cuda::GpuMat d_frame(frame);
+        cv::cuda::GpuMat foreground;
 
-        cv::Ptr<cv::BackgroundSubtractorGMG> d_gmg = cv::gpu::createBackgroundSubtractorGMG();
+        cv::Ptr<cv::BackgroundSubtractorGMG> d_gmg = cv::cuda::createBackgroundSubtractorGMG();
         d_gmg->setMaxFeatures(maxFeatures);
 
         d_gmg->apply(d_frame, foreground);
diff --git a/modules/gpubgsegm/src/cuda/fgd.cu b/modules/gpubgsegm/src/cuda/fgd.cu
index 7dd616c712..97714c07db 100644
--- a/modules/gpubgsegm/src/cuda/fgd.cu
+++ b/modules/gpubgsegm/src/cuda/fgd.cu
@@ -50,8 +50,8 @@
 #include "opencv2/core/cuda/functional.hpp"
 #include "fgd.hpp"
 
-using namespace cv::gpu;
-using namespace cv::gpu::cudev;
+using namespace cv::cuda;
+using namespace cv::cuda::cudev;
 
 namespace fgd
 {
diff --git a/modules/gpubgsegm/src/cuda/fgd.hpp b/modules/gpubgsegm/src/cuda/fgd.hpp
index 50b9838cdf..07b9135deb 100644
--- a/modules/gpubgsegm/src/cuda/fgd.hpp
+++ b/modules/gpubgsegm/src/cuda/fgd.hpp
@@ -164,24 +164,24 @@ namespace fgd
     const int HISTOGRAM_BIN_COUNT = 256;
 
     template <typename PT, typename CT>
-    void calcDiffHistogram_gpu(cv::gpu::PtrStepSzb prevFrame, cv::gpu::PtrStepSzb curFrame,
+    void calcDiffHistogram_gpu(cv::cuda::PtrStepSzb prevFrame, cv::cuda::PtrStepSzb curFrame,
                                unsigned int* hist0, unsigned int* hist1, unsigned int* hist2,
                                unsigned int* partialBuf0, unsigned int* partialBuf1, unsigned int* partialBuf2,
                                bool cc20, cudaStream_t stream);
 
     template <typename PT, typename CT>
-    void calcDiffThreshMask_gpu(cv::gpu::PtrStepSzb prevFrame, cv::gpu::PtrStepSzb curFrame, uchar3 bestThres, cv::gpu::PtrStepSzb changeMask, cudaStream_t stream);
+    void calcDiffThreshMask_gpu(cv::cuda::PtrStepSzb prevFrame, cv::cuda::PtrStepSzb curFrame, uchar3 bestThres, cv::cuda::PtrStepSzb changeMask, cudaStream_t stream);
 
     void setBGPixelStat(const BGPixelStat& stat);
 
     template <typename PT, typename CT, typename OT>
-    void bgfgClassification_gpu(cv::gpu::PtrStepSzb prevFrame, cv::gpu::PtrStepSzb curFrame,
-                                cv::gpu::PtrStepSzb Ftd, cv::gpu::PtrStepSzb Fbd, cv::gpu::PtrStepSzb foreground,
+    void bgfgClassification_gpu(cv::cuda::PtrStepSzb prevFrame, cv::cuda::PtrStepSzb curFrame,
+                                cv::cuda::PtrStepSzb Ftd, cv::cuda::PtrStepSzb Fbd, cv::cuda::PtrStepSzb foreground,
                                 int deltaC, int deltaCC, float alpha2, int N1c, int N1cc, cudaStream_t stream);
 
     template <typename PT, typename CT, typename OT>
-    void updateBackgroundModel_gpu(cv::gpu::PtrStepSzb prevFrame, cv::gpu::PtrStepSzb curFrame,
-                                   cv::gpu::PtrStepSzb Ftd, cv::gpu::PtrStepSzb Fbd, cv::gpu::PtrStepSzb foreground, cv::gpu::PtrStepSzb background,
+    void updateBackgroundModel_gpu(cv::cuda::PtrStepSzb prevFrame, cv::cuda::PtrStepSzb curFrame,
+                                   cv::cuda::PtrStepSzb Ftd, cv::cuda::PtrStepSzb Fbd, cv::cuda::PtrStepSzb foreground, cv::cuda::PtrStepSzb background,
                                    int deltaC, int deltaCC, float alpha1, float alpha2, float alpha3, int N1c, int N1cc, int N2c, int N2cc, float T,
                                    cudaStream_t stream);
 }
diff --git a/modules/gpubgsegm/src/cuda/gmg.cu b/modules/gpubgsegm/src/cuda/gmg.cu
index 235c1f0e2e..4d07ca9ac5 100644
--- a/modules/gpubgsegm/src/cuda/gmg.cu
+++ b/modules/gpubgsegm/src/cuda/gmg.cu
@@ -46,7 +46,7 @@
 #include "opencv2/core/cuda/vec_traits.hpp"
 #include "opencv2/core/cuda/limits.hpp"
 
-namespace cv { namespace gpu { namespace cudev {
+namespace cv { namespace cuda { namespace cudev {
     namespace gmg
     {
         __constant__ int   c_width;
diff --git a/modules/gpubgsegm/src/cuda/mog.cu b/modules/gpubgsegm/src/cuda/mog.cu
index ed0720c30c..8ecf83ca85 100644
--- a/modules/gpubgsegm/src/cuda/mog.cu
+++ b/modules/gpubgsegm/src/cuda/mog.cu
@@ -47,7 +47,7 @@
 #include "opencv2/core/cuda/vec_math.hpp"
 #include "opencv2/core/cuda/limits.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace mog
     {
diff --git a/modules/gpubgsegm/src/cuda/mog2.cu b/modules/gpubgsegm/src/cuda/mog2.cu
index 50cb9fa56d..207ad56dc1 100644
--- a/modules/gpubgsegm/src/cuda/mog2.cu
+++ b/modules/gpubgsegm/src/cuda/mog2.cu
@@ -47,7 +47,7 @@
 #include "opencv2/core/cuda/vec_math.hpp"
 #include "opencv2/core/cuda/limits.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace mog2
     {
diff --git a/modules/gpubgsegm/src/fgd.cpp b/modules/gpubgsegm/src/fgd.cpp
index ef203e074f..fd0128c8f3 100644
--- a/modules/gpubgsegm/src/fgd.cpp
+++ b/modules/gpubgsegm/src/fgd.cpp
@@ -43,13 +43,13 @@
 #include "precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 #if !defined(HAVE_CUDA) || defined(CUDA_DISABLER) || !defined(HAVE_OPENCV_IMGPROC) || !defined(HAVE_OPENCV_GPUARITHM) || !defined(HAVE_OPENCV_GPUIMGPROC)
 
-cv::gpu::FGDParams::FGDParams() { throw_no_cuda(); }
+cv::cuda::FGDParams::FGDParams() { throw_no_cuda(); }
 
-Ptr<gpu::BackgroundSubtractorFGD> cv::gpu::createBackgroundSubtractorFGD(const FGDParams&) { throw_no_cuda(); return Ptr<gpu::BackgroundSubtractorFGD>(); }
+Ptr<cuda::BackgroundSubtractorFGD> cv::cuda::createBackgroundSubtractorFGD(const FGDParams&) { throw_no_cuda(); return Ptr<cuda::BackgroundSubtractorFGD>(); }
 
 #else
 
@@ -87,7 +87,7 @@ namespace
     const float BGFG_FGD_MINAREA= 15.0f;
 }
 
-cv::gpu::FGDParams::FGDParams()
+cv::cuda::FGDParams::FGDParams()
 {
     Lc      = BGFG_FGD_LC;
     N1c     = BGFG_FGD_N1C;
@@ -122,7 +122,7 @@ namespace
         if (dst_cn < 0)
             dst_cn = src_cn;
 
-        gpu::ensureSizeIsEnough(src.size(), CV_MAKE_TYPE(src.depth(), dst_cn), dst);
+        cuda::ensureSizeIsEnough(src.size(), CV_MAKE_TYPE(src.depth(), dst_cn), dst);
 
         if (src_cn == dst_cn)
         {
@@ -141,7 +141,7 @@ namespace
             const int cvt_code = cvt_codes[src_cn - 1][dst_cn - 1];
             CV_DbgAssert( cvt_code >= 0 );
 
-            gpu::cvtColor(src, dst, cvt_code, dst_cn);
+            cuda::cvtColor(src, dst, cvt_code, dst_cn);
         }
     }
 }
@@ -298,9 +298,9 @@ namespace
                                                                              deltaC, deltaCC, params.alpha2,
                                                                              params.N1c, params.N1cc, 0);
 
-        int count = gpu::countNonZero(foreground, countBuf);
+        int count = cuda::countNonZero(foreground, countBuf);
 
-        gpu::multiply(foreground, Scalar::all(255), foreground);
+        cuda::multiply(foreground, Scalar::all(255), foreground);
 
         return count;
     }
@@ -313,14 +313,14 @@ namespace
 
 namespace
 {
-    void morphology(const GpuMat& src, GpuMat& dst, GpuMat& filterBrd, int brd, Ptr<gpu::Filter>& filter, Scalar brdVal)
+    void morphology(const GpuMat& src, GpuMat& dst, GpuMat& filterBrd, int brd, Ptr<cuda::Filter>& filter, Scalar brdVal)
     {
-        gpu::copyMakeBorder(src, filterBrd, brd, brd, brd, brd, BORDER_CONSTANT, brdVal);
+        cuda::copyMakeBorder(src, filterBrd, brd, brd, brd, brd, BORDER_CONSTANT, brdVal);
         filter->apply(filterBrd(Rect(brd, brd, src.cols, src.rows)), dst);
     }
 
     void smoothForeground(GpuMat& foreground, GpuMat& filterBrd, GpuMat& buf,
-                          Ptr<gpu::Filter>& erodeFilter, Ptr<gpu::Filter>& dilateFilter,
+                          Ptr<cuda::Filter>& erodeFilter, Ptr<cuda::Filter>& dilateFilter,
                           const FGDParams& params)
     {
         const int brd = params.perform_morphing;
@@ -491,37 +491,37 @@ namespace
 
     void BGPixelStat::create(Size size, const FGDParams& params)
     {
-        gpu::ensureSizeIsEnough(size, CV_32FC1, Pbc_);
+        cuda::ensureSizeIsEnough(size, CV_32FC1, Pbc_);
         Pbc_.setTo(Scalar::all(0));
 
-        gpu::ensureSizeIsEnough(size, CV_32FC1, Pbcc_);
+        cuda::ensureSizeIsEnough(size, CV_32FC1, Pbcc_);
         Pbcc_.setTo(Scalar::all(0));
 
-        gpu::ensureSizeIsEnough(size, CV_8UC1, is_trained_st_model_);
+        cuda::ensureSizeIsEnough(size, CV_8UC1, is_trained_st_model_);
         is_trained_st_model_.setTo(Scalar::all(0));
 
-        gpu::ensureSizeIsEnough(size, CV_8UC1, is_trained_dyn_model_);
+        cuda::ensureSizeIsEnough(size, CV_8UC1, is_trained_dyn_model_);
         is_trained_dyn_model_.setTo(Scalar::all(0));
 
-        gpu::ensureSizeIsEnough(params.N2c * size.height, size.width, CV_32FC1, ctable_Pv_);
+        cuda::ensureSizeIsEnough(params.N2c * size.height, size.width, CV_32FC1, ctable_Pv_);
         ctable_Pv_.setTo(Scalar::all(0));
 
-        gpu::ensureSizeIsEnough(params.N2c * size.height, size.width, CV_32FC1, ctable_Pvb_);
+        cuda::ensureSizeIsEnough(params.N2c * size.height, size.width, CV_32FC1, ctable_Pvb_);
         ctable_Pvb_.setTo(Scalar::all(0));
 
-        gpu::ensureSizeIsEnough(params.N2c * size.height, size.width, CV_8UC4, ctable_v_);
+        cuda::ensureSizeIsEnough(params.N2c * size.height, size.width, CV_8UC4, ctable_v_);
         ctable_v_.setTo(Scalar::all(0));
 
-        gpu::ensureSizeIsEnough(params.N2cc * size.height, size.width, CV_32FC1, cctable_Pv_);
+        cuda::ensureSizeIsEnough(params.N2cc * size.height, size.width, CV_32FC1, cctable_Pv_);
         cctable_Pv_.setTo(Scalar::all(0));
 
-        gpu::ensureSizeIsEnough(params.N2cc * size.height, size.width, CV_32FC1, cctable_Pvb_);
+        cuda::ensureSizeIsEnough(params.N2cc * size.height, size.width, CV_32FC1, cctable_Pvb_);
         cctable_Pvb_.setTo(Scalar::all(0));
 
-        gpu::ensureSizeIsEnough(params.N2cc * size.height, size.width, CV_8UC4, cctable_v1_);
+        cuda::ensureSizeIsEnough(params.N2cc * size.height, size.width, CV_8UC4, cctable_v1_);
         cctable_v1_.setTo(Scalar::all(0));
 
-        gpu::ensureSizeIsEnough(params.N2cc * size.height, size.width, CV_8UC4, cctable_v2_);
+        cuda::ensureSizeIsEnough(params.N2cc * size.height, size.width, CV_8UC4, cctable_v2_);
         cctable_v2_.setTo(Scalar::all(0));
     }
 
@@ -573,7 +573,7 @@ namespace
         return stat;
     }
 
-    class FGDImpl : public gpu::BackgroundSubtractorFGD
+    class FGDImpl : public cuda::BackgroundSubtractorFGD
     {
     public:
         explicit FGDImpl(const FGDParams& params);
@@ -611,8 +611,8 @@ namespace
         GpuMat filterBrd_;
 
 #ifdef HAVE_OPENCV_GPUFILTERS
-        Ptr<gpu::Filter> dilateFilter_;
-        Ptr<gpu::Filter> erodeFilter_;
+        Ptr<cuda::Filter> dilateFilter_;
+        Ptr<cuda::Filter> erodeFilter_;
 #endif
 
         CvMemStorage* storage_;
@@ -673,7 +673,7 @@ namespace
 
     void FGDImpl::getBackgroundImage(OutputArray backgroundImage) const
     {
-        gpu::cvtColor(background_, backgroundImage, COLOR_BGRA2BGR);
+        cuda::cvtColor(background_, backgroundImage, COLOR_BGRA2BGR);
     }
 
     void FGDImpl::getForegroundRegions(OutputArrayOfArrays dst)
@@ -699,13 +699,13 @@ namespace
 
         frameSize_ = firstFrame.size();
 
-        gpu::ensureSizeIsEnough(firstFrame.size(), CV_8UC1, foreground_);
+        cuda::ensureSizeIsEnough(firstFrame.size(), CV_8UC1, foreground_);
 
         copyChannels(firstFrame, background_, 4);
         copyChannels(firstFrame, prevFrame_, 4);
 
-        gpu::ensureSizeIsEnough(firstFrame.size(), CV_8UC1, Ftd_);
-        gpu::ensureSizeIsEnough(firstFrame.size(), CV_8UC1, Fbd_);
+        cuda::ensureSizeIsEnough(firstFrame.size(), CV_8UC1, Ftd_);
+        cuda::ensureSizeIsEnough(firstFrame.size(), CV_8UC1, Fbd_);
 
         stat_.create(firstFrame.size(), params_);
         fgd::setBGPixelStat(stat_);
@@ -716,14 +716,14 @@ namespace
             Mat kernel = getStructuringElement(MORPH_RECT, Size(1 + params_.perform_morphing * 2, 1 + params_.perform_morphing * 2));
             Point anchor(params_.perform_morphing, params_.perform_morphing);
 
-            dilateFilter_ = gpu::createMorphologyFilter(MORPH_DILATE, CV_8UC1, kernel, anchor);
-            erodeFilter_ = gpu::createMorphologyFilter(MORPH_ERODE, CV_8UC1, kernel, anchor);
+            dilateFilter_ = cuda::createMorphologyFilter(MORPH_DILATE, CV_8UC1, kernel, anchor);
+            erodeFilter_ = cuda::createMorphologyFilter(MORPH_ERODE, CV_8UC1, kernel, anchor);
         }
 #endif
     }
 }
 
-Ptr<gpu::BackgroundSubtractorFGD> cv::gpu::createBackgroundSubtractorFGD(const FGDParams& params)
+Ptr<cuda::BackgroundSubtractorFGD> cv::cuda::createBackgroundSubtractorFGD(const FGDParams& params)
 {
     return new FGDImpl(params);
 }
diff --git a/modules/gpubgsegm/src/gmg.cpp b/modules/gpubgsegm/src/gmg.cpp
index 2cc0f4d574..f7515814df 100644
--- a/modules/gpubgsegm/src/gmg.cpp
+++ b/modules/gpubgsegm/src/gmg.cpp
@@ -43,15 +43,15 @@
 #include "precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 #if !defined HAVE_CUDA || defined(CUDA_DISABLER)
 
-Ptr<gpu::BackgroundSubtractorGMG> cv::gpu::createBackgroundSubtractorGMG(int, double) { throw_no_cuda(); return Ptr<gpu::BackgroundSubtractorGMG>(); }
+Ptr<cuda::BackgroundSubtractorGMG> cv::cuda::createBackgroundSubtractorGMG(int, double) { throw_no_cuda(); return Ptr<cuda::BackgroundSubtractorGMG>(); }
 
 #else
 
-namespace cv { namespace gpu { namespace cudev {
+namespace cv { namespace cuda { namespace cudev {
     namespace gmg
     {
         void loadConstants(int width, int height, float minVal, float maxVal, int quantizationLevels, float backgroundPrior,
@@ -65,7 +65,7 @@ namespace cv { namespace gpu { namespace cudev {
 
 namespace
 {
-    class GMGImpl : public gpu::BackgroundSubtractorGMG
+    class GMGImpl : public cuda::BackgroundSubtractorGMG
     {
     public:
         GMGImpl(int initializationFrames, double decisionThreshold);
@@ -142,7 +142,7 @@ namespace
         GpuMat weights_;
 
 #if defined(HAVE_OPENCV_GPUFILTERS) && defined(HAVE_OPENCV_GPUARITHM)
-        Ptr<gpu::Filter> boxFilter_;
+        Ptr<cuda::Filter> boxFilter_;
         GpuMat buf_;
 #endif
     };
@@ -167,7 +167,7 @@ namespace
 
     void GMGImpl::apply(InputArray _frame, OutputArray _fgmask, double newLearningRate, Stream& stream)
     {
-        using namespace cv::gpu::cudev::gmg;
+        using namespace cv::cuda::cudev::gmg;
 
         typedef void (*func_t)(PtrStepSzb frame, PtrStepb fgmask, PtrStepSzi colors, PtrStepf weights, PtrStepi nfeatures,
                                int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
@@ -221,7 +221,7 @@ namespace
             boxFilter_->apply(fgmask, buf_, stream);
             const int minCount = (smoothingRadius_ * smoothingRadius_ + 1) / 2;
             const double thresh = 255.0 * minCount / (smoothingRadius_ * smoothingRadius_);
-            gpu::threshold(buf_, fgmask, thresh, 255.0, THRESH_BINARY, stream);
+            cuda::threshold(buf_, fgmask, thresh, 255.0, THRESH_BINARY, stream);
         }
 #endif
 
@@ -237,7 +237,7 @@ namespace
 
     void GMGImpl::initialize(Size frameSize, float min, float max)
     {
-        using namespace cv::gpu::cudev::gmg;
+        using namespace cv::cuda::cudev::gmg;
 
         CV_Assert( maxFeatures_ > 0 );
         CV_Assert( learningRate_ >= 0.0f && learningRate_ <= 1.0f);
@@ -261,7 +261,7 @@ namespace
 
 #if defined(HAVE_OPENCV_GPUFILTERS) && defined(HAVE_OPENCV_GPUARITHM)
         if (smoothingRadius_ > 0)
-            boxFilter_ = gpu::createBoxFilter(CV_8UC1, -1, Size(smoothingRadius_, smoothingRadius_));
+            boxFilter_ = cuda::createBoxFilter(CV_8UC1, -1, Size(smoothingRadius_, smoothingRadius_));
 #endif
 
         loadConstants(frameSize_.width, frameSize_.height, minVal_, maxVal_,
@@ -269,7 +269,7 @@ namespace
     }
 }
 
-Ptr<gpu::BackgroundSubtractorGMG> cv::gpu::createBackgroundSubtractorGMG(int initializationFrames, double decisionThreshold)
+Ptr<cuda::BackgroundSubtractorGMG> cv::cuda::createBackgroundSubtractorGMG(int initializationFrames, double decisionThreshold)
 {
     return new GMGImpl(initializationFrames, decisionThreshold);
 }
diff --git a/modules/gpubgsegm/src/mog.cpp b/modules/gpubgsegm/src/mog.cpp
index 65adb9425c..fa9bc95e59 100644
--- a/modules/gpubgsegm/src/mog.cpp
+++ b/modules/gpubgsegm/src/mog.cpp
@@ -43,15 +43,15 @@
 #include "precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 #if !defined HAVE_CUDA || defined(CUDA_DISABLER)
 
-Ptr<gpu::BackgroundSubtractorMOG> cv::gpu::createBackgroundSubtractorMOG(int, int, double, double)  { throw_no_cuda(); return Ptr<gpu::BackgroundSubtractorMOG>(); }
+Ptr<cuda::BackgroundSubtractorMOG> cv::cuda::createBackgroundSubtractorMOG(int, int, double, double)  { throw_no_cuda(); return Ptr<cuda::BackgroundSubtractorMOG>(); }
 
 #else
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace mog
     {
@@ -71,7 +71,7 @@ namespace
     const float defaultNoiseSigma = 30.0f * 0.5f;
     const float defaultInitialWeight = 0.05f;
 
-    class MOGImpl : public gpu::BackgroundSubtractorMOG
+    class MOGImpl : public cuda::BackgroundSubtractorMOG
     {
     public:
         MOGImpl(int history, int nmixtures, double backgroundRatio, double noiseSigma);
@@ -133,7 +133,7 @@ namespace
 
     void MOGImpl::apply(InputArray _frame, OutputArray _fgmask, double learningRate, Stream& stream)
     {
-        using namespace cv::gpu::cudev::mog;
+        using namespace cv::cuda::cudev::mog;
 
         GpuMat frame = _frame.getGpuMat();
 
@@ -164,7 +164,7 @@ namespace
 
     void MOGImpl::getBackgroundImage(OutputArray _backgroundImage, Stream& stream) const
     {
-        using namespace cv::gpu::cudev::mog;
+        using namespace cv::cuda::cudev::mog;
 
         _backgroundImage.create(frameSize_, frameType_);
         GpuMat backgroundImage = _backgroundImage.getGpuMat();
@@ -201,7 +201,7 @@ namespace
     }
 }
 
-Ptr<gpu::BackgroundSubtractorMOG> cv::gpu::createBackgroundSubtractorMOG(int history, int nmixtures, double backgroundRatio, double noiseSigma)
+Ptr<cuda::BackgroundSubtractorMOG> cv::cuda::createBackgroundSubtractorMOG(int history, int nmixtures, double backgroundRatio, double noiseSigma)
 {
     return new MOGImpl(history, nmixtures, backgroundRatio, noiseSigma);
 }
diff --git a/modules/gpubgsegm/src/mog2.cpp b/modules/gpubgsegm/src/mog2.cpp
index 5ab731f7e3..e93e87e78b 100644
--- a/modules/gpubgsegm/src/mog2.cpp
+++ b/modules/gpubgsegm/src/mog2.cpp
@@ -43,15 +43,15 @@
 #include "precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 #if !defined HAVE_CUDA || defined(CUDA_DISABLER)
 
-Ptr<gpu::BackgroundSubtractorMOG2> cv::gpu::createBackgroundSubtractorMOG2(int, double, bool) { throw_no_cuda(); return Ptr<gpu::BackgroundSubtractorMOG2>(); }
+Ptr<cuda::BackgroundSubtractorMOG2> cv::cuda::createBackgroundSubtractorMOG2(int, double, bool) { throw_no_cuda(); return Ptr<cuda::BackgroundSubtractorMOG2>(); }
 
 #else
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace mog2
     {
@@ -78,7 +78,7 @@ namespace
     const unsigned char defaultShadowValue = 127; // value to use in the segmentation mask for shadows, set 0 not to do shadow detection
     const float defaultShadowThreshold = 0.5f; // Tau - shadow threshold, see the paper for explanation
 
-    class MOG2Impl : public gpu::BackgroundSubtractorMOG2
+    class MOG2Impl : public cuda::BackgroundSubtractorMOG2
     {
     public:
         MOG2Impl(int history, double varThreshold, bool detectShadows);
@@ -178,7 +178,7 @@ namespace
 
     void MOG2Impl::apply(InputArray _frame, OutputArray _fgmask, double learningRate, Stream& stream)
     {
-        using namespace cv::gpu::cudev::mog2;
+        using namespace cv::cuda::cudev::mog2;
 
         GpuMat frame = _frame.getGpuMat();
 
@@ -208,7 +208,7 @@ namespace
 
     void MOG2Impl::getBackgroundImage(OutputArray _backgroundImage, Stream& stream) const
     {
-        using namespace cv::gpu::cudev::mog2;
+        using namespace cv::cuda::cudev::mog2;
 
         _backgroundImage.create(frameSize_, frameType_);
         GpuMat backgroundImage = _backgroundImage.getGpuMat();
@@ -218,7 +218,7 @@ namespace
 
     void MOG2Impl::initialize(cv::Size frameSize, int frameType)
     {
-        using namespace cv::gpu::cudev::mog2;
+        using namespace cv::cuda::cudev::mog2;
 
         CV_Assert( frameType == CV_8UC1 || frameType == CV_8UC3 || frameType == CV_8UC4 );
 
@@ -245,7 +245,7 @@ namespace
     }
 }
 
-Ptr<gpu::BackgroundSubtractorMOG2> cv::gpu::createBackgroundSubtractorMOG2(int history, double varThreshold, bool detectShadows)
+Ptr<cuda::BackgroundSubtractorMOG2> cv::cuda::createBackgroundSubtractorMOG2(int history, double varThreshold, bool detectShadows)
 {
     return new MOG2Impl(history, varThreshold, detectShadows);
 }
diff --git a/modules/gpubgsegm/test/test_bgsegm.cpp b/modules/gpubgsegm/test/test_bgsegm.cpp
index 3b5526b6c6..2778ecc1de 100644
--- a/modules/gpubgsegm/test/test_bgsegm.cpp
+++ b/modules/gpubgsegm/test/test_bgsegm.cpp
@@ -76,15 +76,15 @@ namespace cv
     }
 }
 
-PARAM_TEST_CASE(FGDStatModel, cv::gpu::DeviceInfo, std::string)
+PARAM_TEST_CASE(FGDStatModel, cv::cuda::DeviceInfo, std::string)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     std::string inputFile;
 
     virtual void SetUp()
     {
         devInfo = GET_PARAM(0);
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
 
         inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + "video/" + GET_PARAM(1);
     }
@@ -102,9 +102,9 @@ GPU_TEST_P(FGDStatModel, Update)
     IplImage ipl_frame = frame;
     cv::Ptr<CvBGStatModel> model(cvCreateFGDStatModel(&ipl_frame));
 
-    cv::gpu::GpuMat d_frame(frame);
-    cv::Ptr<cv::gpu::BackgroundSubtractorFGD> d_fgd = cv::gpu::createBackgroundSubtractorFGD();
-    cv::gpu::GpuMat d_foreground, d_background;
+    cv::cuda::GpuMat d_frame(frame);
+    cv::Ptr<cv::cuda::BackgroundSubtractorFGD> d_fgd = cv::cuda::createBackgroundSubtractorFGD();
+    cv::cuda::GpuMat d_foreground, d_background;
     std::vector< std::vector<cv::Point> > foreground_regions;
     d_fgd->apply(d_frame, d_foreground);
 
@@ -148,9 +148,9 @@ namespace
     IMPLEMENT_PARAM_CLASS(LearningRate, double)
 }
 
-PARAM_TEST_CASE(MOG, cv::gpu::DeviceInfo, std::string, UseGray, LearningRate, UseRoi)
+PARAM_TEST_CASE(MOG, cv::cuda::DeviceInfo, std::string, UseGray, LearningRate, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     std::string inputFile;
     bool useGray;
     double learningRate;
@@ -159,7 +159,7 @@ PARAM_TEST_CASE(MOG, cv::gpu::DeviceInfo, std::string, UseGray, LearningRate, Us
     virtual void SetUp()
     {
         devInfo = GET_PARAM(0);
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
 
         inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + "video/" + GET_PARAM(1);
 
@@ -180,8 +180,8 @@ GPU_TEST_P(MOG, Update)
     cap >> frame;
     ASSERT_FALSE(frame.empty());
 
-    cv::Ptr<cv::BackgroundSubtractorMOG> mog = cv::gpu::createBackgroundSubtractorMOG();
-    cv::gpu::GpuMat foreground = createMat(frame.size(), CV_8UC1, useRoi);
+    cv::Ptr<cv::BackgroundSubtractorMOG> mog = cv::cuda::createBackgroundSubtractorMOG();
+    cv::cuda::GpuMat foreground = createMat(frame.size(), CV_8UC1, useRoi);
 
     cv::Ptr<cv::BackgroundSubtractorMOG> mog_gold = cv::createBackgroundSubtractorMOG();
     cv::Mat foreground_gold;
@@ -225,9 +225,9 @@ namespace
     IMPLEMENT_PARAM_CLASS(DetectShadow, bool)
 }
 
-PARAM_TEST_CASE(MOG2, cv::gpu::DeviceInfo, std::string, UseGray, DetectShadow, UseRoi)
+PARAM_TEST_CASE(MOG2, cv::cuda::DeviceInfo, std::string, UseGray, DetectShadow, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     std::string inputFile;
     bool useGray;
     bool detectShadow;
@@ -236,7 +236,7 @@ PARAM_TEST_CASE(MOG2, cv::gpu::DeviceInfo, std::string, UseGray, DetectShadow, U
     virtual void SetUp()
     {
         devInfo = GET_PARAM(0);
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
 
         inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + "video/" + GET_PARAM(1);
         useGray = GET_PARAM(2);
@@ -254,9 +254,9 @@ GPU_TEST_P(MOG2, Update)
     cap >> frame;
     ASSERT_FALSE(frame.empty());
 
-    cv::Ptr<cv::BackgroundSubtractorMOG2> mog2 = cv::gpu::createBackgroundSubtractorMOG2();
+    cv::Ptr<cv::BackgroundSubtractorMOG2> mog2 = cv::cuda::createBackgroundSubtractorMOG2();
     mog2->setDetectShadows(detectShadow);
-    cv::gpu::GpuMat foreground = createMat(frame.size(), CV_8UC1, useRoi);
+    cv::cuda::GpuMat foreground = createMat(frame.size(), CV_8UC1, useRoi);
 
     cv::Ptr<cv::BackgroundSubtractorMOG2> mog2_gold = cv::createBackgroundSubtractorMOG2();
     mog2_gold->setDetectShadows(detectShadow);
@@ -299,9 +299,9 @@ GPU_TEST_P(MOG2, getBackgroundImage)
 
     cv::Mat frame;
 
-    cv::Ptr<cv::BackgroundSubtractorMOG2> mog2 = cv::gpu::createBackgroundSubtractorMOG2();
+    cv::Ptr<cv::BackgroundSubtractorMOG2> mog2 = cv::cuda::createBackgroundSubtractorMOG2();
     mog2->setDetectShadows(detectShadow);
-    cv::gpu::GpuMat foreground;
+    cv::cuda::GpuMat foreground;
 
     cv::Ptr<cv::BackgroundSubtractorMOG2> mog2_gold = cv::createBackgroundSubtractorMOG2();
     mog2_gold->setDetectShadows(detectShadow);
@@ -317,7 +317,7 @@ GPU_TEST_P(MOG2, getBackgroundImage)
         mog2_gold->apply(frame, foreground_gold);
     }
 
-    cv::gpu::GpuMat background = createMat(frame.size(), frame.type(), useRoi);
+    cv::cuda::GpuMat background = createMat(frame.size(), frame.type(), useRoi);
     mog2->getBackgroundImage(background);
 
     cv::Mat background_gold;
@@ -338,14 +338,14 @@ INSTANTIATE_TEST_CASE_P(GPU_BgSegm, MOG2, testing::Combine(
 //////////////////////////////////////////////////////
 // GMG
 
-PARAM_TEST_CASE(GMG, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels, UseRoi)
+PARAM_TEST_CASE(GMG, cv::cuda::DeviceInfo, cv::Size, MatDepth, Channels, UseRoi)
 {
 };
 
 GPU_TEST_P(GMG, Accuracy)
 {
-    const cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::gpu::setDevice(devInfo.deviceID());
+    const cv::cuda::DeviceInfo devInfo = GET_PARAM(0);
+    cv::cuda::setDevice(devInfo.deviceID());
     const cv::Size size = GET_PARAM(1);
     const int depth = GET_PARAM(2);
     const int channels = GET_PARAM(3);
@@ -357,13 +357,13 @@ GPU_TEST_P(GMG, Accuracy)
     const cv::Mat fullfg(size, CV_8UC1, cv::Scalar::all(255));
 
     cv::Mat frame = randomMat(size, type, 0, 100);
-    cv::gpu::GpuMat d_frame = loadMat(frame, useRoi);
+    cv::cuda::GpuMat d_frame = loadMat(frame, useRoi);
 
-    cv::Ptr<cv::BackgroundSubtractorGMG> gmg = cv::gpu::createBackgroundSubtractorGMG();
+    cv::Ptr<cv::BackgroundSubtractorGMG> gmg = cv::cuda::createBackgroundSubtractorGMG();
     gmg->setNumFrames(5);
     gmg->setSmoothingRadius(0);
 
-    cv::gpu::GpuMat d_fgmask = createMat(size, CV_8UC1, useRoi);
+    cv::cuda::GpuMat d_fgmask = createMat(size, CV_8UC1, useRoi);
 
     for (int i = 0; i < gmg->getNumFrames(); ++i)
     {
diff --git a/modules/gpucodec/perf/perf_video.cpp b/modules/gpucodec/perf/perf_video.cpp
index c4ab227c84..6e60ed7dcc 100644
--- a/modules/gpucodec/perf/perf_video.cpp
+++ b/modules/gpucodec/perf/perf_video.cpp
@@ -77,7 +77,7 @@ PERF_TEST_P(FileName, VideoReader, Values("gpu/video/768x576.avi", "gpu/video/19
     {
         cv::Ptr<cv::gpucodec::VideoReader> d_reader = cv::gpucodec::createVideoReader(inputFile);
 
-        cv::gpu::GpuMat frame;
+        cv::cuda::GpuMat frame;
 
         TEST_CYCLE_N(10) d_reader->nextFrame(frame);
 
@@ -121,7 +121,7 @@ PERF_TEST_P(FileName, VideoWriter, Values("gpu/video/768x576.avi", "gpu/video/19
     {
         cv::Ptr<cv::gpucodec::VideoWriter> d_writer;
 
-        cv::gpu::GpuMat d_frame;
+        cv::cuda::GpuMat d_frame;
 
         for (int i = 0; i < 10; ++i)
         {
diff --git a/modules/gpucodec/src/cuda/nv12_to_rgb.cu b/modules/gpucodec/src/cuda/nv12_to_rgb.cu
index 1de916e5a3..338ec23a6b 100644
--- a/modules/gpucodec/src/cuda/nv12_to_rgb.cu
+++ b/modules/gpucodec/src/cuda/nv12_to_rgb.cu
@@ -49,7 +49,7 @@
 
 #include "opencv2/core/cuda/common.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     __constant__ float constHueColorSpaceMat[9] = {1.1644f, 0.0f, 1.596f, 1.1644f, -0.3918f, -0.813f, 1.1644f, 2.0172f, 0.0f};
 
diff --git a/modules/gpucodec/src/cuda/rgb_to_yv12.cu b/modules/gpucodec/src/cuda/rgb_to_yv12.cu
index 8787b1e685..20cfc48258 100644
--- a/modules/gpucodec/src/cuda/rgb_to_yv12.cu
+++ b/modules/gpucodec/src/cuda/rgb_to_yv12.cu
@@ -43,7 +43,7 @@
 #include "opencv2/core/cuda/common.hpp"
 #include "opencv2/core/cuda/vec_traits.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     __device__ __forceinline__ void rgb_to_y(const uchar b, const uchar g, const uchar r, uchar& y)
     {
diff --git a/modules/gpucodec/src/video_decoder.hpp b/modules/gpucodec/src/video_decoder.hpp
index 05a92f2664..468f6f1e10 100644
--- a/modules/gpucodec/src/video_decoder.hpp
+++ b/modules/gpucodec/src/video_decoder.hpp
@@ -85,17 +85,17 @@ public:
         return cuvidDecodePicture(decoder_, picParams) == CUDA_SUCCESS;
     }
 
-    gpu::GpuMat mapFrame(int picIdx, CUVIDPROCPARAMS& videoProcParams)
+    cuda::GpuMat mapFrame(int picIdx, CUVIDPROCPARAMS& videoProcParams)
     {
         CUdeviceptr ptr;
         unsigned int pitch;
 
         cuSafeCall( cuvidMapVideoFrame(decoder_, picIdx, &ptr, &pitch, &videoProcParams) );
 
-        return gpu::GpuMat(targetHeight() * 3 / 2, targetWidth(), CV_8UC1, (void*) ptr, pitch);
+        return cuda::GpuMat(targetHeight() * 3 / 2, targetWidth(), CV_8UC1, (void*) ptr, pitch);
     }
 
-    void unmapFrame(gpu::GpuMat& frame)
+    void unmapFrame(cuda::GpuMat& frame)
     {
         cuSafeCall( cuvidUnmapVideoFrame(decoder_, (CUdeviceptr) frame.data) );
         frame.release();
diff --git a/modules/gpucodec/src/video_reader.cpp b/modules/gpucodec/src/video_reader.cpp
index 67e9cd1078..9606d89465 100644
--- a/modules/gpucodec/src/video_reader.cpp
+++ b/modules/gpucodec/src/video_reader.cpp
@@ -43,7 +43,7 @@
 #include "precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 using namespace cv::gpucodec;
 
 #ifndef HAVE_NVCUVID
@@ -53,7 +53,7 @@ Ptr<VideoReader> cv::gpucodec::createVideoReader(const Ptr<RawVideoSource>&) { t
 
 #else // HAVE_NVCUVID
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     void NV12_to_RGB(const PtrStepb decodedFrame, PtrStepSz<uint> interopFrame, cudaStream_t stream = 0);
 }}}
@@ -125,7 +125,7 @@ namespace
 
     void cudaPostProcessFrame(const GpuMat& decodedFrame, OutputArray _outFrame, int width, int height)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
 
         // Final Stage: NV12toARGB color space conversion
 
diff --git a/modules/gpucodec/src/video_writer.cpp b/modules/gpucodec/src/video_writer.cpp
index 6ffb7c12d7..de934b67e1 100644
--- a/modules/gpucodec/src/video_writer.cpp
+++ b/modules/gpucodec/src/video_writer.cpp
@@ -44,7 +44,7 @@
 #include "precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 using namespace cv::gpucodec;
 
 #if !defined(HAVE_NVCUVID) || !defined(WIN32)
@@ -62,7 +62,7 @@ Ptr<VideoWriter> cv::gpucodec::createVideoWriter(const Ptr<EncoderCallBack>&, Si
 
 #else // !defined HAVE_CUDA || !defined WIN32
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     void RGB_to_YV12(const PtrStepSzb src, int cn, PtrStepSzb dst, cudaStream_t stream = 0);
 }}}
diff --git a/modules/gpucodec/test/test_video.cpp b/modules/gpucodec/test/test_video.cpp
index 26bcc02d58..1313be766f 100644
--- a/modules/gpucodec/test/test_video.cpp
+++ b/modules/gpucodec/test/test_video.cpp
@@ -44,7 +44,7 @@
 
 #ifdef HAVE_NVCUVID
 
-PARAM_TEST_CASE(Video, cv::gpu::DeviceInfo, std::string)
+PARAM_TEST_CASE(Video, cv::cuda::DeviceInfo, std::string)
 {
 };
 
@@ -53,13 +53,13 @@ PARAM_TEST_CASE(Video, cv::gpu::DeviceInfo, std::string)
 
 GPU_TEST_P(Video, Reader)
 {
-    cv::gpu::setDevice(GET_PARAM(0).deviceID());
+    cv::cuda::setDevice(GET_PARAM(0).deviceID());
 
     const std::string inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + "video/" + GET_PARAM(1);
 
     cv::Ptr<cv::gpucodec::VideoReader> reader = cv::gpucodec::createVideoReader(inputFile);
 
-    cv::gpu::GpuMat frame;
+    cv::cuda::GpuMat frame;
 
     for (int i = 0; i < 10; ++i)
     {
@@ -75,7 +75,7 @@ GPU_TEST_P(Video, Reader)
 
 GPU_TEST_P(Video, Writer)
 {
-    cv::gpu::setDevice(GET_PARAM(0).deviceID());
+    cv::cuda::setDevice(GET_PARAM(0).deviceID());
 
     const std::string inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + "video/" + GET_PARAM(1);
 
@@ -88,7 +88,7 @@ GPU_TEST_P(Video, Writer)
     cv::Ptr<cv::gpucodec::VideoWriter> d_writer;
 
     cv::Mat frame;
-    cv::gpu::GpuMat d_frame;
+    cv::cuda::GpuMat d_frame;
 
     for (int i = 0; i < 10; ++i)
     {
diff --git a/modules/gpufeatures2d/include/opencv2/gpufeatures2d.hpp b/modules/gpufeatures2d/include/opencv2/gpufeatures2d.hpp
index cc73da9d9e..fe20be63a3 100644
--- a/modules/gpufeatures2d/include/opencv2/gpufeatures2d.hpp
+++ b/modules/gpufeatures2d/include/opencv2/gpufeatures2d.hpp
@@ -50,7 +50,7 @@
 #include "opencv2/core/gpu.hpp"
 #include "opencv2/gpufilters.hpp"
 
-namespace cv { namespace gpu {
+namespace cv { namespace cuda {
 
 class CV_EXPORTS BFMatcher_GPU
 {
@@ -351,11 +351,11 @@ private:
 
     FAST_GPU fastDetector_;
 
-    Ptr<gpu::Filter> blurFilter;
+    Ptr<cuda::Filter> blurFilter;
 
     GpuMat d_keypoints_;
 };
 
-}} // namespace cv { namespace gpu {
+}} // namespace cv { namespace cuda {
 
 #endif /* __OPENCV_GPUFEATURES2D_HPP__ */
diff --git a/modules/gpufeatures2d/perf/perf_features2d.cpp b/modules/gpufeatures2d/perf/perf_features2d.cpp
index fd28526335..2c2a6a0e38 100644
--- a/modules/gpufeatures2d/perf/perf_features2d.cpp
+++ b/modules/gpufeatures2d/perf/perf_features2d.cpp
@@ -64,12 +64,12 @@ PERF_TEST_P(Image_Threshold_NonMaxSupression, FAST,
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::FAST_GPU d_fast(threshold, nonMaxSuppersion, 0.5);
+        cv::cuda::FAST_GPU d_fast(threshold, nonMaxSuppersion, 0.5);
 
-        const cv::gpu::GpuMat d_img(img);
-        cv::gpu::GpuMat d_keypoints;
+        const cv::cuda::GpuMat d_img(img);
+        cv::cuda::GpuMat d_keypoints;
 
-        TEST_CYCLE() d_fast(d_img, cv::gpu::GpuMat(), d_keypoints);
+        TEST_CYCLE() d_fast(d_img, cv::cuda::GpuMat(), d_keypoints);
 
         std::vector<cv::KeyPoint> gpu_keypoints;
         d_fast.downloadKeypoints(d_keypoints, gpu_keypoints);
@@ -106,12 +106,12 @@ PERF_TEST_P(Image_NFeatures, ORB,
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::ORB_GPU d_orb(nFeatures);
+        cv::cuda::ORB_GPU d_orb(nFeatures);
 
-        const cv::gpu::GpuMat d_img(img);
-        cv::gpu::GpuMat d_keypoints, d_descriptors;
+        const cv::cuda::GpuMat d_img(img);
+        cv::cuda::GpuMat d_keypoints, d_descriptors;
 
-        TEST_CYCLE() d_orb(d_img, cv::gpu::GpuMat(), d_keypoints, d_descriptors);
+        TEST_CYCLE() d_orb(d_img, cv::cuda::GpuMat(), d_keypoints, d_descriptors);
 
         std::vector<cv::KeyPoint> gpu_keypoints;
         d_orb.downloadKeyPoints(d_keypoints, gpu_keypoints);
@@ -164,11 +164,11 @@ PERF_TEST_P(DescSize_Norm, BFMatch,
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::BFMatcher_GPU d_matcher(normType);
+        cv::cuda::BFMatcher_GPU d_matcher(normType);
 
-        const cv::gpu::GpuMat d_query(query);
-        const cv::gpu::GpuMat d_train(train);
-        cv::gpu::GpuMat d_trainIdx, d_distance;
+        const cv::cuda::GpuMat d_query(query);
+        const cv::cuda::GpuMat d_train(train);
+        cv::cuda::GpuMat d_trainIdx, d_distance;
 
         TEST_CYCLE() d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance);
 
@@ -223,11 +223,11 @@ PERF_TEST_P(DescSize_K_Norm, BFKnnMatch,
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::BFMatcher_GPU d_matcher(normType);
+        cv::cuda::BFMatcher_GPU d_matcher(normType);
 
-        const cv::gpu::GpuMat d_query(query);
-        const cv::gpu::GpuMat d_train(train);
-        cv::gpu::GpuMat d_trainIdx, d_distance, d_allDist;
+        const cv::cuda::GpuMat d_query(query);
+        const cv::cuda::GpuMat d_train(train);
+        cv::cuda::GpuMat d_trainIdx, d_distance, d_allDist;
 
         TEST_CYCLE() d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, k);
 
@@ -277,11 +277,11 @@ PERF_TEST_P(DescSize_Norm, BFRadiusMatch,
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::BFMatcher_GPU d_matcher(normType);
+        cv::cuda::BFMatcher_GPU d_matcher(normType);
 
-        const cv::gpu::GpuMat d_query(query);
-        const cv::gpu::GpuMat d_train(train);
-        cv::gpu::GpuMat d_trainIdx, d_nMatches, d_distance;
+        const cv::cuda::GpuMat d_query(query);
+        const cv::cuda::GpuMat d_train(train);
+        cv::cuda::GpuMat d_trainIdx, d_nMatches, d_distance;
 
         TEST_CYCLE() d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, maxDistance);
 
diff --git a/modules/gpufeatures2d/src/brute_force_matcher.cpp b/modules/gpufeatures2d/src/brute_force_matcher.cpp
index feb0cc6928..0476e5d403 100644
--- a/modules/gpufeatures2d/src/brute_force_matcher.cpp
+++ b/modules/gpufeatures2d/src/brute_force_matcher.cpp
@@ -43,45 +43,45 @@
 #include "precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
 
-cv::gpu::BFMatcher_GPU::BFMatcher_GPU(int) { throw_no_cuda(); }
-void cv::gpu::BFMatcher_GPU::add(const std::vector<GpuMat>&) { throw_no_cuda(); }
-const std::vector<GpuMat>& cv::gpu::BFMatcher_GPU::getTrainDescriptors() const { throw_no_cuda(); return trainDescCollection; }
-void cv::gpu::BFMatcher_GPU::clear() { throw_no_cuda(); }
-bool cv::gpu::BFMatcher_GPU::empty() const { throw_no_cuda(); return true; }
-bool cv::gpu::BFMatcher_GPU::isMaskSupported() const { throw_no_cuda(); return true; }
-void cv::gpu::BFMatcher_GPU::matchSingle(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_no_cuda(); }
-void cv::gpu::BFMatcher_GPU::matchDownload(const GpuMat&, const GpuMat&, std::vector<DMatch>&) { throw_no_cuda(); }
-void cv::gpu::BFMatcher_GPU::matchConvert(const Mat&, const Mat&, std::vector<DMatch>&) { throw_no_cuda(); }
-void cv::gpu::BFMatcher_GPU::match(const GpuMat&, const GpuMat&, std::vector<DMatch>&, const GpuMat&) { throw_no_cuda(); }
-void cv::gpu::BFMatcher_GPU::makeGpuCollection(GpuMat&, GpuMat&, const std::vector<GpuMat>&) { throw_no_cuda(); }
-void cv::gpu::BFMatcher_GPU::matchCollection(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_no_cuda(); }
-void cv::gpu::BFMatcher_GPU::matchDownload(const GpuMat&, const GpuMat&, const GpuMat&, std::vector<DMatch>&) { throw_no_cuda(); }
-void cv::gpu::BFMatcher_GPU::matchConvert(const Mat&, const Mat&, const Mat&, std::vector<DMatch>&) { throw_no_cuda(); }
-void cv::gpu::BFMatcher_GPU::match(const GpuMat&, std::vector<DMatch>&, const std::vector<GpuMat>&) { throw_no_cuda(); }
-void cv::gpu::BFMatcher_GPU::knnMatchSingle(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat&, int, const GpuMat&, Stream&) { throw_no_cuda(); }
-void cv::gpu::BFMatcher_GPU::knnMatchDownload(const GpuMat&, const GpuMat&, std::vector< std::vector<DMatch> >&, bool) { throw_no_cuda(); }
-void cv::gpu::BFMatcher_GPU::knnMatchConvert(const Mat&, const Mat&, std::vector< std::vector<DMatch> >&, bool) { throw_no_cuda(); }
-void cv::gpu::BFMatcher_GPU::knnMatch(const GpuMat&, const GpuMat&, std::vector< std::vector<DMatch> >&, int, const GpuMat&, bool) { throw_no_cuda(); }
-void cv::gpu::BFMatcher_GPU::knnMatch2Collection(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_no_cuda(); }
-void cv::gpu::BFMatcher_GPU::knnMatch2Download(const GpuMat&, const GpuMat&, const GpuMat&, std::vector< std::vector<DMatch> >&, bool) { throw_no_cuda(); }
-void cv::gpu::BFMatcher_GPU::knnMatch2Convert(const Mat&, const Mat&, const Mat&, std::vector< std::vector<DMatch> >&, bool) { throw_no_cuda(); }
-void cv::gpu::BFMatcher_GPU::knnMatch(const GpuMat&, std::vector< std::vector<DMatch> >&, int, const std::vector<GpuMat>&, bool) { throw_no_cuda(); }
-void cv::gpu::BFMatcher_GPU::radiusMatchSingle(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat&, float, const GpuMat&, Stream&) { throw_no_cuda(); }
-void cv::gpu::BFMatcher_GPU::radiusMatchDownload(const GpuMat&, const GpuMat&, const GpuMat&, std::vector< std::vector<DMatch> >&, bool) { throw_no_cuda(); }
-void cv::gpu::BFMatcher_GPU::radiusMatchConvert(const Mat&, const Mat&, const Mat&, std::vector< std::vector<DMatch> >&, bool) { throw_no_cuda(); }
-void cv::gpu::BFMatcher_GPU::radiusMatch(const GpuMat&, const GpuMat&, std::vector< std::vector<DMatch> >&, float, const GpuMat&, bool) { throw_no_cuda(); }
-void cv::gpu::BFMatcher_GPU::radiusMatchCollection(const GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, float, const std::vector<GpuMat>&, Stream&) { throw_no_cuda(); }
-void cv::gpu::BFMatcher_GPU::radiusMatchDownload(const GpuMat&, const GpuMat&, const GpuMat&, const GpuMat&, std::vector< std::vector<DMatch> >&, bool) { throw_no_cuda(); }
-void cv::gpu::BFMatcher_GPU::radiusMatchConvert(const Mat&, const Mat&, const Mat&, const Mat&, std::vector< std::vector<DMatch> >&, bool) { throw_no_cuda(); }
-void cv::gpu::BFMatcher_GPU::radiusMatch(const GpuMat&, std::vector< std::vector<DMatch> >&, float, const std::vector<GpuMat>&, bool) { throw_no_cuda(); }
+cv::cuda::BFMatcher_GPU::BFMatcher_GPU(int) { throw_no_cuda(); }
+void cv::cuda::BFMatcher_GPU::add(const std::vector<GpuMat>&) { throw_no_cuda(); }
+const std::vector<GpuMat>& cv::cuda::BFMatcher_GPU::getTrainDescriptors() const { throw_no_cuda(); return trainDescCollection; }
+void cv::cuda::BFMatcher_GPU::clear() { throw_no_cuda(); }
+bool cv::cuda::BFMatcher_GPU::empty() const { throw_no_cuda(); return true; }
+bool cv::cuda::BFMatcher_GPU::isMaskSupported() const { throw_no_cuda(); return true; }
+void cv::cuda::BFMatcher_GPU::matchSingle(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_no_cuda(); }
+void cv::cuda::BFMatcher_GPU::matchDownload(const GpuMat&, const GpuMat&, std::vector<DMatch>&) { throw_no_cuda(); }
+void cv::cuda::BFMatcher_GPU::matchConvert(const Mat&, const Mat&, std::vector<DMatch>&) { throw_no_cuda(); }
+void cv::cuda::BFMatcher_GPU::match(const GpuMat&, const GpuMat&, std::vector<DMatch>&, const GpuMat&) { throw_no_cuda(); }
+void cv::cuda::BFMatcher_GPU::makeGpuCollection(GpuMat&, GpuMat&, const std::vector<GpuMat>&) { throw_no_cuda(); }
+void cv::cuda::BFMatcher_GPU::matchCollection(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_no_cuda(); }
+void cv::cuda::BFMatcher_GPU::matchDownload(const GpuMat&, const GpuMat&, const GpuMat&, std::vector<DMatch>&) { throw_no_cuda(); }
+void cv::cuda::BFMatcher_GPU::matchConvert(const Mat&, const Mat&, const Mat&, std::vector<DMatch>&) { throw_no_cuda(); }
+void cv::cuda::BFMatcher_GPU::match(const GpuMat&, std::vector<DMatch>&, const std::vector<GpuMat>&) { throw_no_cuda(); }
+void cv::cuda::BFMatcher_GPU::knnMatchSingle(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat&, int, const GpuMat&, Stream&) { throw_no_cuda(); }
+void cv::cuda::BFMatcher_GPU::knnMatchDownload(const GpuMat&, const GpuMat&, std::vector< std::vector<DMatch> >&, bool) { throw_no_cuda(); }
+void cv::cuda::BFMatcher_GPU::knnMatchConvert(const Mat&, const Mat&, std::vector< std::vector<DMatch> >&, bool) { throw_no_cuda(); }
+void cv::cuda::BFMatcher_GPU::knnMatch(const GpuMat&, const GpuMat&, std::vector< std::vector<DMatch> >&, int, const GpuMat&, bool) { throw_no_cuda(); }
+void cv::cuda::BFMatcher_GPU::knnMatch2Collection(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_no_cuda(); }
+void cv::cuda::BFMatcher_GPU::knnMatch2Download(const GpuMat&, const GpuMat&, const GpuMat&, std::vector< std::vector<DMatch> >&, bool) { throw_no_cuda(); }
+void cv::cuda::BFMatcher_GPU::knnMatch2Convert(const Mat&, const Mat&, const Mat&, std::vector< std::vector<DMatch> >&, bool) { throw_no_cuda(); }
+void cv::cuda::BFMatcher_GPU::knnMatch(const GpuMat&, std::vector< std::vector<DMatch> >&, int, const std::vector<GpuMat>&, bool) { throw_no_cuda(); }
+void cv::cuda::BFMatcher_GPU::radiusMatchSingle(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat&, float, const GpuMat&, Stream&) { throw_no_cuda(); }
+void cv::cuda::BFMatcher_GPU::radiusMatchDownload(const GpuMat&, const GpuMat&, const GpuMat&, std::vector< std::vector<DMatch> >&, bool) { throw_no_cuda(); }
+void cv::cuda::BFMatcher_GPU::radiusMatchConvert(const Mat&, const Mat&, const Mat&, std::vector< std::vector<DMatch> >&, bool) { throw_no_cuda(); }
+void cv::cuda::BFMatcher_GPU::radiusMatch(const GpuMat&, const GpuMat&, std::vector< std::vector<DMatch> >&, float, const GpuMat&, bool) { throw_no_cuda(); }
+void cv::cuda::BFMatcher_GPU::radiusMatchCollection(const GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, float, const std::vector<GpuMat>&, Stream&) { throw_no_cuda(); }
+void cv::cuda::BFMatcher_GPU::radiusMatchDownload(const GpuMat&, const GpuMat&, const GpuMat&, const GpuMat&, std::vector< std::vector<DMatch> >&, bool) { throw_no_cuda(); }
+void cv::cuda::BFMatcher_GPU::radiusMatchConvert(const Mat&, const Mat&, const Mat&, const Mat&, std::vector< std::vector<DMatch> >&, bool) { throw_no_cuda(); }
+void cv::cuda::BFMatcher_GPU::radiusMatch(const GpuMat&, std::vector< std::vector<DMatch> >&, float, const std::vector<GpuMat>&, bool) { throw_no_cuda(); }
 
 #else /* !defined (HAVE_CUDA) */
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace bf_match
     {
@@ -158,31 +158,31 @@ namespace cv { namespace gpu { namespace cudev
 ////////////////////////////////////////////////////////////////////
 // Train collection
 
-cv::gpu::BFMatcher_GPU::BFMatcher_GPU(int norm_) : norm(norm_)
+cv::cuda::BFMatcher_GPU::BFMatcher_GPU(int norm_) : norm(norm_)
 {
 }
 
-void cv::gpu::BFMatcher_GPU::add(const std::vector<GpuMat>& descCollection)
+void cv::cuda::BFMatcher_GPU::add(const std::vector<GpuMat>& descCollection)
 {
     trainDescCollection.insert(trainDescCollection.end(), descCollection.begin(), descCollection.end());
 }
 
-const std::vector<GpuMat>& cv::gpu::BFMatcher_GPU::getTrainDescriptors() const
+const std::vector<GpuMat>& cv::cuda::BFMatcher_GPU::getTrainDescriptors() const
 {
     return trainDescCollection;
 }
 
-void cv::gpu::BFMatcher_GPU::clear()
+void cv::cuda::BFMatcher_GPU::clear()
 {
     trainDescCollection.clear();
 }
 
-bool cv::gpu::BFMatcher_GPU::empty() const
+bool cv::cuda::BFMatcher_GPU::empty() const
 {
     return trainDescCollection.empty();
 }
 
-bool cv::gpu::BFMatcher_GPU::isMaskSupported() const
+bool cv::cuda::BFMatcher_GPU::isMaskSupported() const
 {
     return true;
 }
@@ -190,14 +190,14 @@ bool cv::gpu::BFMatcher_GPU::isMaskSupported() const
 ////////////////////////////////////////////////////////////////////
 // Match
 
-void cv::gpu::BFMatcher_GPU::matchSingle(const GpuMat& query, const GpuMat& train,
+void cv::cuda::BFMatcher_GPU::matchSingle(const GpuMat& query, const GpuMat& train,
     GpuMat& trainIdx, GpuMat& distance,
     const GpuMat& mask, Stream& stream)
 {
     if (query.empty() || train.empty())
         return;
 
-    using namespace cv::gpu::cudev::bf_match;
+    using namespace cv::cuda::cudev::bf_match;
 
     typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask,
                              const PtrStepSzi& trainIdx, const PtrStepSzf& distance,
@@ -240,7 +240,7 @@ void cv::gpu::BFMatcher_GPU::matchSingle(const GpuMat& query, const GpuMat& trai
     func(query, train, mask, trainIdx, distance, StreamAccessor::getStream(stream));
 }
 
-void cv::gpu::BFMatcher_GPU::matchDownload(const GpuMat& trainIdx, const GpuMat& distance, std::vector<DMatch>& matches)
+void cv::cuda::BFMatcher_GPU::matchDownload(const GpuMat& trainIdx, const GpuMat& distance, std::vector<DMatch>& matches)
 {
     if (trainIdx.empty() || distance.empty())
         return;
@@ -251,7 +251,7 @@ void cv::gpu::BFMatcher_GPU::matchDownload(const GpuMat& trainIdx, const GpuMat&
     matchConvert(trainIdxCPU, distanceCPU, matches);
 }
 
-void cv::gpu::BFMatcher_GPU::matchConvert(const Mat& trainIdx, const Mat& distance, std::vector<DMatch>& matches)
+void cv::cuda::BFMatcher_GPU::matchConvert(const Mat& trainIdx, const Mat& distance, std::vector<DMatch>& matches)
 {
     if (trainIdx.empty() || distance.empty())
         return;
@@ -281,7 +281,7 @@ void cv::gpu::BFMatcher_GPU::matchConvert(const Mat& trainIdx, const Mat& distan
     }
 }
 
-void cv::gpu::BFMatcher_GPU::match(const GpuMat& query, const GpuMat& train,
+void cv::cuda::BFMatcher_GPU::match(const GpuMat& query, const GpuMat& train,
     std::vector<DMatch>& matches, const GpuMat& mask)
 {
     GpuMat trainIdx, distance;
@@ -289,7 +289,7 @@ void cv::gpu::BFMatcher_GPU::match(const GpuMat& query, const GpuMat& train,
     matchDownload(trainIdx, distance, matches);
 }
 
-void cv::gpu::BFMatcher_GPU::makeGpuCollection(GpuMat& trainCollection, GpuMat& maskCollection,
+void cv::cuda::BFMatcher_GPU::makeGpuCollection(GpuMat& trainCollection, GpuMat& maskCollection,
     const std::vector<GpuMat>& masks)
 {
     if (empty())
@@ -333,14 +333,14 @@ void cv::gpu::BFMatcher_GPU::makeGpuCollection(GpuMat& trainCollection, GpuMat&
     }
 }
 
-void cv::gpu::BFMatcher_GPU::matchCollection(const GpuMat& query, const GpuMat& trainCollection,
+void cv::cuda::BFMatcher_GPU::matchCollection(const GpuMat& query, const GpuMat& trainCollection,
     GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance,
     const GpuMat& masks, Stream& stream)
 {
     if (query.empty() || trainCollection.empty())
         return;
 
-    using namespace cv::gpu::cudev::bf_match;
+    using namespace cv::cuda::cudev::bf_match;
 
     typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
                              const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance,
@@ -382,7 +382,7 @@ void cv::gpu::BFMatcher_GPU::matchCollection(const GpuMat& query, const GpuMat&
     func(query, trainCollection, masks, trainIdx, imgIdx, distance, StreamAccessor::getStream(stream));
 }
 
-void cv::gpu::BFMatcher_GPU::matchDownload(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, std::vector<DMatch>& matches)
+void cv::cuda::BFMatcher_GPU::matchDownload(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, std::vector<DMatch>& matches)
 {
     if (trainIdx.empty() || imgIdx.empty() || distance.empty())
         return;
@@ -394,7 +394,7 @@ void cv::gpu::BFMatcher_GPU::matchDownload(const GpuMat& trainIdx, const GpuMat&
     matchConvert(trainIdxCPU, imgIdxCPU, distanceCPU, matches);
 }
 
-void cv::gpu::BFMatcher_GPU::matchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, std::vector<DMatch>& matches)
+void cv::cuda::BFMatcher_GPU::matchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, std::vector<DMatch>& matches)
 {
     if (trainIdx.empty() || imgIdx.empty() || distance.empty())
         return;
@@ -428,7 +428,7 @@ void cv::gpu::BFMatcher_GPU::matchConvert(const Mat& trainIdx, const Mat& imgIdx
     }
 }
 
-void cv::gpu::BFMatcher_GPU::match(const GpuMat& query, std::vector<DMatch>& matches, const std::vector<GpuMat>& masks)
+void cv::cuda::BFMatcher_GPU::match(const GpuMat& query, std::vector<DMatch>& matches, const std::vector<GpuMat>& masks)
 {
     GpuMat trainCollection;
     GpuMat maskCollection;
@@ -444,14 +444,14 @@ void cv::gpu::BFMatcher_GPU::match(const GpuMat& query, std::vector<DMatch>& mat
 ////////////////////////////////////////////////////////////////////
 // KnnMatch
 
-void cv::gpu::BFMatcher_GPU::knnMatchSingle(const GpuMat& query, const GpuMat& train,
+void cv::cuda::BFMatcher_GPU::knnMatchSingle(const GpuMat& query, const GpuMat& train,
     GpuMat& trainIdx, GpuMat& distance, GpuMat& allDist, int k,
     const GpuMat& mask, Stream& stream)
 {
     if (query.empty() || train.empty())
         return;
 
-    using namespace cv::gpu::cudev::bf_knnmatch;
+    using namespace cv::cuda::cudev::bf_knnmatch;
 
     typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask,
                              const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist,
@@ -505,7 +505,7 @@ void cv::gpu::BFMatcher_GPU::knnMatchSingle(const GpuMat& query, const GpuMat& t
     func(query, train, k, mask, trainIdx, distance, allDist, StreamAccessor::getStream(stream));
 }
 
-void cv::gpu::BFMatcher_GPU::knnMatchDownload(const GpuMat& trainIdx, const GpuMat& distance,
+void cv::cuda::BFMatcher_GPU::knnMatchDownload(const GpuMat& trainIdx, const GpuMat& distance,
     std::vector< std::vector<DMatch> >& matches, bool compactResult)
 {
     if (trainIdx.empty() || distance.empty())
@@ -517,7 +517,7 @@ void cv::gpu::BFMatcher_GPU::knnMatchDownload(const GpuMat& trainIdx, const GpuM
     knnMatchConvert(trainIdxCPU, distanceCPU, matches, compactResult);
 }
 
-void cv::gpu::BFMatcher_GPU::knnMatchConvert(const Mat& trainIdx, const Mat& distance,
+void cv::cuda::BFMatcher_GPU::knnMatchConvert(const Mat& trainIdx, const Mat& distance,
     std::vector< std::vector<DMatch> >& matches, bool compactResult)
 {
     if (trainIdx.empty() || distance.empty())
@@ -562,7 +562,7 @@ void cv::gpu::BFMatcher_GPU::knnMatchConvert(const Mat& trainIdx, const Mat& dis
     }
 }
 
-void cv::gpu::BFMatcher_GPU::knnMatch(const GpuMat& query, const GpuMat& train,
+void cv::cuda::BFMatcher_GPU::knnMatch(const GpuMat& query, const GpuMat& train,
     std::vector< std::vector<DMatch> >& matches, int k, const GpuMat& mask, bool compactResult)
 {
     GpuMat trainIdx, distance, allDist;
@@ -570,14 +570,14 @@ void cv::gpu::BFMatcher_GPU::knnMatch(const GpuMat& query, const GpuMat& train,
     knnMatchDownload(trainIdx, distance, matches, compactResult);
 }
 
-void cv::gpu::BFMatcher_GPU::knnMatch2Collection(const GpuMat& query, const GpuMat& trainCollection,
+void cv::cuda::BFMatcher_GPU::knnMatch2Collection(const GpuMat& query, const GpuMat& trainCollection,
     GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance,
     const GpuMat& maskCollection, Stream& stream)
 {
     if (query.empty() || trainCollection.empty())
         return;
 
-    using namespace cv::gpu::cudev::bf_knnmatch;
+    using namespace cv::cuda::cudev::bf_knnmatch;
 
     typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
                              const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance,
@@ -621,7 +621,7 @@ void cv::gpu::BFMatcher_GPU::knnMatch2Collection(const GpuMat& query, const GpuM
     func(query, trainCollection, maskCollection, trainIdx, imgIdx, distance, StreamAccessor::getStream(stream));
 }
 
-void cv::gpu::BFMatcher_GPU::knnMatch2Download(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance,
+void cv::cuda::BFMatcher_GPU::knnMatch2Download(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance,
     std::vector< std::vector<DMatch> >& matches, bool compactResult)
 {
     if (trainIdx.empty() || imgIdx.empty() || distance.empty())
@@ -634,7 +634,7 @@ void cv::gpu::BFMatcher_GPU::knnMatch2Download(const GpuMat& trainIdx, const Gpu
     knnMatch2Convert(trainIdxCPU, imgIdxCPU, distanceCPU, matches, compactResult);
 }
 
-void cv::gpu::BFMatcher_GPU::knnMatch2Convert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance,
+void cv::cuda::BFMatcher_GPU::knnMatch2Convert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance,
     std::vector< std::vector<DMatch> >& matches, bool compactResult)
 {
     if (trainIdx.empty() || imgIdx.empty() || distance.empty())
@@ -690,7 +690,7 @@ namespace
     };
 }
 
-void cv::gpu::BFMatcher_GPU::knnMatch(const GpuMat& query, std::vector< std::vector<DMatch> >& matches, int k,
+void cv::cuda::BFMatcher_GPU::knnMatch(const GpuMat& query, std::vector< std::vector<DMatch> >& matches, int k,
     const std::vector<GpuMat>& masks, bool compactResult)
 {
     if (k == 2)
@@ -748,14 +748,14 @@ void cv::gpu::BFMatcher_GPU::knnMatch(const GpuMat& query, std::vector< std::vec
 ////////////////////////////////////////////////////////////////////
 // RadiusMatch
 
-void cv::gpu::BFMatcher_GPU::radiusMatchSingle(const GpuMat& query, const GpuMat& train,
+void cv::cuda::BFMatcher_GPU::radiusMatchSingle(const GpuMat& query, const GpuMat& train,
     GpuMat& trainIdx, GpuMat& distance, GpuMat& nMatches, float maxDistance,
     const GpuMat& mask, Stream& stream)
 {
     if (query.empty() || train.empty())
         return;
 
-    using namespace cv::gpu::cudev::bf_radius_match;
+    using namespace cv::cuda::cudev::bf_radius_match;
 
     typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask,
                              const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
@@ -805,7 +805,7 @@ void cv::gpu::BFMatcher_GPU::radiusMatchSingle(const GpuMat& query, const GpuMat
     func(query, train, maxDistance, mask, trainIdx, distance, nMatches, StreamAccessor::getStream(stream));
 }
 
-void cv::gpu::BFMatcher_GPU::radiusMatchDownload(const GpuMat& trainIdx, const GpuMat& distance, const GpuMat& nMatches,
+void cv::cuda::BFMatcher_GPU::radiusMatchDownload(const GpuMat& trainIdx, const GpuMat& distance, const GpuMat& nMatches,
     std::vector< std::vector<DMatch> >& matches, bool compactResult)
 {
     if (trainIdx.empty() || distance.empty() || nMatches.empty())
@@ -818,7 +818,7 @@ void cv::gpu::BFMatcher_GPU::radiusMatchDownload(const GpuMat& trainIdx, const G
     radiusMatchConvert(trainIdxCPU, distanceCPU, nMatchesCPU, matches, compactResult);
 }
 
-void cv::gpu::BFMatcher_GPU::radiusMatchConvert(const Mat& trainIdx, const Mat& distance, const Mat& nMatches,
+void cv::cuda::BFMatcher_GPU::radiusMatchConvert(const Mat& trainIdx, const Mat& distance, const Mat& nMatches,
     std::vector< std::vector<DMatch> >& matches, bool compactResult)
 {
     if (trainIdx.empty() || distance.empty() || nMatches.empty())
@@ -867,7 +867,7 @@ void cv::gpu::BFMatcher_GPU::radiusMatchConvert(const Mat& trainIdx, const Mat&
     }
 }
 
-void cv::gpu::BFMatcher_GPU::radiusMatch(const GpuMat& query, const GpuMat& train,
+void cv::cuda::BFMatcher_GPU::radiusMatch(const GpuMat& query, const GpuMat& train,
     std::vector< std::vector<DMatch> >& matches, float maxDistance, const GpuMat& mask, bool compactResult)
 {
     GpuMat trainIdx, distance, nMatches;
@@ -875,13 +875,13 @@ void cv::gpu::BFMatcher_GPU::radiusMatch(const GpuMat& query, const GpuMat& trai
     radiusMatchDownload(trainIdx, distance, nMatches, matches, compactResult);
 }
 
-void cv::gpu::BFMatcher_GPU::radiusMatchCollection(const GpuMat& query, GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance, GpuMat& nMatches,
+void cv::cuda::BFMatcher_GPU::radiusMatchCollection(const GpuMat& query, GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance, GpuMat& nMatches,
     float maxDistance, const std::vector<GpuMat>& masks, Stream& stream)
 {
     if (query.empty() || empty())
         return;
 
-    using namespace cv::gpu::cudev::bf_radius_match;
+    using namespace cv::cuda::cudev::bf_radius_match;
 
     typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks,
                              const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
@@ -934,7 +934,7 @@ void cv::gpu::BFMatcher_GPU::radiusMatchCollection(const GpuMat& query, GpuMat&
         trainIdx, imgIdx, distance, nMatches, StreamAccessor::getStream(stream));
 }
 
-void cv::gpu::BFMatcher_GPU::radiusMatchDownload(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, const GpuMat& nMatches,
+void cv::cuda::BFMatcher_GPU::radiusMatchDownload(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, const GpuMat& nMatches,
     std::vector< std::vector<DMatch> >& matches, bool compactResult)
 {
     if (trainIdx.empty() || imgIdx.empty() || distance.empty() || nMatches.empty())
@@ -948,7 +948,7 @@ void cv::gpu::BFMatcher_GPU::radiusMatchDownload(const GpuMat& trainIdx, const G
     radiusMatchConvert(trainIdxCPU, imgIdxCPU, distanceCPU, nMatchesCPU, matches, compactResult);
 }
 
-void cv::gpu::BFMatcher_GPU::radiusMatchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, const Mat& nMatches,
+void cv::cuda::BFMatcher_GPU::radiusMatchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, const Mat& nMatches,
     std::vector< std::vector<DMatch> >& matches, bool compactResult)
 {
     if (trainIdx.empty() || imgIdx.empty() || distance.empty() || nMatches.empty())
@@ -1000,7 +1000,7 @@ void cv::gpu::BFMatcher_GPU::radiusMatchConvert(const Mat& trainIdx, const Mat&
     }
 }
 
-void cv::gpu::BFMatcher_GPU::radiusMatch(const GpuMat& query, std::vector< std::vector<DMatch> >& matches,
+void cv::cuda::BFMatcher_GPU::radiusMatch(const GpuMat& query, std::vector< std::vector<DMatch> >& matches,
     float maxDistance, const std::vector<GpuMat>& masks, bool compactResult)
 {
     GpuMat trainIdx, imgIdx, distance, nMatches;
diff --git a/modules/gpufeatures2d/src/cuda/bf_knnmatch.cu b/modules/gpufeatures2d/src/cuda/bf_knnmatch.cu
index d5d17bb8a0..5a1c49ff16 100644
--- a/modules/gpufeatures2d/src/cuda/bf_knnmatch.cu
+++ b/modules/gpufeatures2d/src/cuda/bf_knnmatch.cu
@@ -50,7 +50,7 @@
 #include "opencv2/core/cuda/datamov_utils.hpp"
 #include "opencv2/core/cuda/warp_shuffle.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace bf_knnmatch
     {
@@ -1249,7 +1249,7 @@ namespace cv { namespace gpu { namespace cudev
         //template void match2Hamming_gpu<short >(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, cudaStream_t stream);
         template void match2Hamming_gpu<int   >(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, cudaStream_t stream);
     } // namespace bf_knnmatch
-}}} // namespace cv { namespace gpu { namespace cudev {
+}}} // namespace cv { namespace cuda { namespace cudev {
 
 
 #endif /* CUDA_DISABLER */
diff --git a/modules/gpufeatures2d/src/cuda/bf_match.cu b/modules/gpufeatures2d/src/cuda/bf_match.cu
index 338fefcb69..a4a7fe0e42 100644
--- a/modules/gpufeatures2d/src/cuda/bf_match.cu
+++ b/modules/gpufeatures2d/src/cuda/bf_match.cu
@@ -49,7 +49,7 @@
 #include "opencv2/core/cuda/vec_distance.hpp"
 #include "opencv2/core/cuda/datamov_utils.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace bf_match
     {
@@ -768,7 +768,7 @@ namespace cv { namespace gpu { namespace cudev
         //template void matchHamming_gpu<short >(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, cudaStream_t stream);
         template void matchHamming_gpu<int   >(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, cudaStream_t stream);
     } // namespace bf_match
-}}} // namespace cv { namespace gpu { namespace cudev {
+}}} // namespace cv { namespace cuda { namespace cudev {
 
 
 #endif /* CUDA_DISABLER */
diff --git a/modules/gpufeatures2d/src/cuda/bf_radius_match.cu b/modules/gpufeatures2d/src/cuda/bf_radius_match.cu
index 3c714d63f9..7e1f2e07e0 100644
--- a/modules/gpufeatures2d/src/cuda/bf_radius_match.cu
+++ b/modules/gpufeatures2d/src/cuda/bf_radius_match.cu
@@ -48,7 +48,7 @@
 #include "opencv2/core/cuda/vec_distance.hpp"
 #include "opencv2/core/cuda/datamov_utils.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace bf_radius_match
     {
@@ -457,7 +457,7 @@ namespace cv { namespace gpu { namespace cudev
         //template void matchHamming_gpu<short >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream);
         template void matchHamming_gpu<int   >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream);
     } // namespace bf_radius_match
-}}} // namespace cv { namespace gpu { namespace cudev
+}}} // namespace cv { namespace cuda { namespace cudev
 
 
 #endif /* CUDA_DISABLER */
diff --git a/modules/gpufeatures2d/src/cuda/fast.cu b/modules/gpufeatures2d/src/cuda/fast.cu
index 39d66d0eda..94ecf1fbda 100644
--- a/modules/gpufeatures2d/src/cuda/fast.cu
+++ b/modules/gpufeatures2d/src/cuda/fast.cu
@@ -45,7 +45,7 @@
 #include "opencv2/core/cuda/common.hpp"
 #include "opencv2/core/cuda/utility.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace fast
     {
diff --git a/modules/gpufeatures2d/src/cuda/orb.cu b/modules/gpufeatures2d/src/cuda/orb.cu
index 571ca12bde..8e68b34354 100644
--- a/modules/gpufeatures2d/src/cuda/orb.cu
+++ b/modules/gpufeatures2d/src/cuda/orb.cu
@@ -49,7 +49,7 @@
 #include "opencv2/core/cuda/reduce.hpp"
 #include "opencv2/core/cuda/functional.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace orb
     {
diff --git a/modules/gpufeatures2d/src/fast.cpp b/modules/gpufeatures2d/src/fast.cpp
index 539423e75b..fb6a56fa53 100644
--- a/modules/gpufeatures2d/src/fast.cpp
+++ b/modules/gpufeatures2d/src/fast.cpp
@@ -43,27 +43,27 @@
 #include "precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
 
-cv::gpu::FAST_GPU::FAST_GPU(int, bool, double) { throw_no_cuda(); }
-void cv::gpu::FAST_GPU::operator ()(const GpuMat&, const GpuMat&, GpuMat&) { throw_no_cuda(); }
-void cv::gpu::FAST_GPU::operator ()(const GpuMat&, const GpuMat&, std::vector<KeyPoint>&) { throw_no_cuda(); }
-void cv::gpu::FAST_GPU::downloadKeypoints(const GpuMat&, std::vector<KeyPoint>&) { throw_no_cuda(); }
-void cv::gpu::FAST_GPU::convertKeypoints(const Mat&, std::vector<KeyPoint>&) { throw_no_cuda(); }
-void cv::gpu::FAST_GPU::release() { throw_no_cuda(); }
-int cv::gpu::FAST_GPU::calcKeyPointsLocation(const GpuMat&, const GpuMat&) { throw_no_cuda(); return 0; }
-int cv::gpu::FAST_GPU::getKeyPoints(GpuMat&) { throw_no_cuda(); return 0; }
+cv::cuda::FAST_GPU::FAST_GPU(int, bool, double) { throw_no_cuda(); }
+void cv::cuda::FAST_GPU::operator ()(const GpuMat&, const GpuMat&, GpuMat&) { throw_no_cuda(); }
+void cv::cuda::FAST_GPU::operator ()(const GpuMat&, const GpuMat&, std::vector<KeyPoint>&) { throw_no_cuda(); }
+void cv::cuda::FAST_GPU::downloadKeypoints(const GpuMat&, std::vector<KeyPoint>&) { throw_no_cuda(); }
+void cv::cuda::FAST_GPU::convertKeypoints(const Mat&, std::vector<KeyPoint>&) { throw_no_cuda(); }
+void cv::cuda::FAST_GPU::release() { throw_no_cuda(); }
+int cv::cuda::FAST_GPU::calcKeyPointsLocation(const GpuMat&, const GpuMat&) { throw_no_cuda(); return 0; }
+int cv::cuda::FAST_GPU::getKeyPoints(GpuMat&) { throw_no_cuda(); return 0; }
 
 #else /* !defined (HAVE_CUDA) */
 
-cv::gpu::FAST_GPU::FAST_GPU(int _threshold, bool _nonmaxSupression, double _keypointsRatio) :
+cv::cuda::FAST_GPU::FAST_GPU(int _threshold, bool _nonmaxSupression, double _keypointsRatio) :
     nonmaxSupression(_nonmaxSupression), threshold(_threshold), keypointsRatio(_keypointsRatio), count_(0)
 {
 }
 
-void cv::gpu::FAST_GPU::operator ()(const GpuMat& image, const GpuMat& mask, std::vector<KeyPoint>& keypoints)
+void cv::cuda::FAST_GPU::operator ()(const GpuMat& image, const GpuMat& mask, std::vector<KeyPoint>& keypoints)
 {
     if (image.empty())
         return;
@@ -72,7 +72,7 @@ void cv::gpu::FAST_GPU::operator ()(const GpuMat& image, const GpuMat& mask, std
     downloadKeypoints(d_keypoints_, keypoints);
 }
 
-void cv::gpu::FAST_GPU::downloadKeypoints(const GpuMat& d_keypoints, std::vector<KeyPoint>& keypoints)
+void cv::cuda::FAST_GPU::downloadKeypoints(const GpuMat& d_keypoints, std::vector<KeyPoint>& keypoints)
 {
     if (d_keypoints.empty())
         return;
@@ -81,7 +81,7 @@ void cv::gpu::FAST_GPU::downloadKeypoints(const GpuMat& d_keypoints, std::vector
     convertKeypoints(h_keypoints, keypoints);
 }
 
-void cv::gpu::FAST_GPU::convertKeypoints(const Mat& h_keypoints, std::vector<KeyPoint>& keypoints)
+void cv::cuda::FAST_GPU::convertKeypoints(const Mat& h_keypoints, std::vector<KeyPoint>& keypoints)
 {
     if (h_keypoints.empty())
         return;
@@ -102,13 +102,13 @@ void cv::gpu::FAST_GPU::convertKeypoints(const Mat& h_keypoints, std::vector<Key
     }
 }
 
-void cv::gpu::FAST_GPU::operator ()(const GpuMat& img, const GpuMat& mask, GpuMat& keypoints)
+void cv::cuda::FAST_GPU::operator ()(const GpuMat& img, const GpuMat& mask, GpuMat& keypoints)
 {
     calcKeyPointsLocation(img, mask);
     keypoints.cols = getKeyPoints(keypoints);
 }
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace fast
     {
@@ -117,9 +117,9 @@ namespace cv { namespace gpu { namespace cudev
     }
 }}}
 
-int cv::gpu::FAST_GPU::calcKeyPointsLocation(const GpuMat& img, const GpuMat& mask)
+int cv::cuda::FAST_GPU::calcKeyPointsLocation(const GpuMat& img, const GpuMat& mask)
 {
-    using namespace cv::gpu::cudev::fast;
+    using namespace cv::cuda::cudev::fast;
 
     CV_Assert(img.type() == CV_8UC1);
     CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == img.size()));
@@ -140,9 +140,9 @@ int cv::gpu::FAST_GPU::calcKeyPointsLocation(const GpuMat& img, const GpuMat& ma
     return count_;
 }
 
-int cv::gpu::FAST_GPU::getKeyPoints(GpuMat& keypoints)
+int cv::cuda::FAST_GPU::getKeyPoints(GpuMat& keypoints)
 {
-    using namespace cv::gpu::cudev::fast;
+    using namespace cv::cuda::cudev::fast;
 
     if (count_ == 0)
         return 0;
@@ -159,7 +159,7 @@ int cv::gpu::FAST_GPU::getKeyPoints(GpuMat& keypoints)
     return count_;
 }
 
-void cv::gpu::FAST_GPU::release()
+void cv::cuda::FAST_GPU::release()
 {
     kpLoc_.release();
     score_.release();
diff --git a/modules/gpufeatures2d/src/orb.cpp b/modules/gpufeatures2d/src/orb.cpp
index 7cb1cbecc1..e236e5c41f 100644
--- a/modules/gpufeatures2d/src/orb.cpp
+++ b/modules/gpufeatures2d/src/orb.cpp
@@ -43,26 +43,26 @@
 #include "precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
 
-cv::gpu::ORB_GPU::ORB_GPU(int, float, int, int, int, int, int, int) : fastDetector_(20) { throw_no_cuda(); }
-void cv::gpu::ORB_GPU::operator()(const GpuMat&, const GpuMat&, std::vector<KeyPoint>&) { throw_no_cuda(); }
-void cv::gpu::ORB_GPU::operator()(const GpuMat&, const GpuMat&, GpuMat&) { throw_no_cuda(); }
-void cv::gpu::ORB_GPU::operator()(const GpuMat&, const GpuMat&, std::vector<KeyPoint>&, GpuMat&) { throw_no_cuda(); }
-void cv::gpu::ORB_GPU::operator()(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&) { throw_no_cuda(); }
-void cv::gpu::ORB_GPU::downloadKeyPoints(const GpuMat&, std::vector<KeyPoint>&) { throw_no_cuda(); }
-void cv::gpu::ORB_GPU::convertKeyPoints(const Mat&, std::vector<KeyPoint>&) { throw_no_cuda(); }
-void cv::gpu::ORB_GPU::release() { throw_no_cuda(); }
-void cv::gpu::ORB_GPU::buildScalePyramids(const GpuMat&, const GpuMat&) { throw_no_cuda(); }
-void cv::gpu::ORB_GPU::computeKeyPointsPyramid() { throw_no_cuda(); }
-void cv::gpu::ORB_GPU::computeDescriptors(GpuMat&) { throw_no_cuda(); }
-void cv::gpu::ORB_GPU::mergeKeyPoints(GpuMat&) { throw_no_cuda(); }
+cv::cuda::ORB_GPU::ORB_GPU(int, float, int, int, int, int, int, int) : fastDetector_(20) { throw_no_cuda(); }
+void cv::cuda::ORB_GPU::operator()(const GpuMat&, const GpuMat&, std::vector<KeyPoint>&) { throw_no_cuda(); }
+void cv::cuda::ORB_GPU::operator()(const GpuMat&, const GpuMat&, GpuMat&) { throw_no_cuda(); }
+void cv::cuda::ORB_GPU::operator()(const GpuMat&, const GpuMat&, std::vector<KeyPoint>&, GpuMat&) { throw_no_cuda(); }
+void cv::cuda::ORB_GPU::operator()(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&) { throw_no_cuda(); }
+void cv::cuda::ORB_GPU::downloadKeyPoints(const GpuMat&, std::vector<KeyPoint>&) { throw_no_cuda(); }
+void cv::cuda::ORB_GPU::convertKeyPoints(const Mat&, std::vector<KeyPoint>&) { throw_no_cuda(); }
+void cv::cuda::ORB_GPU::release() { throw_no_cuda(); }
+void cv::cuda::ORB_GPU::buildScalePyramids(const GpuMat&, const GpuMat&) { throw_no_cuda(); }
+void cv::cuda::ORB_GPU::computeKeyPointsPyramid() { throw_no_cuda(); }
+void cv::cuda::ORB_GPU::computeDescriptors(GpuMat&) { throw_no_cuda(); }
+void cv::cuda::ORB_GPU::mergeKeyPoints(GpuMat&) { throw_no_cuda(); }
 
 #else /* !defined (HAVE_CUDA) */
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace orb
     {
@@ -395,7 +395,7 @@ namespace
     }
 }
 
-cv::gpu::ORB_GPU::ORB_GPU(int nFeatures, float scaleFactor, int nLevels, int edgeThreshold, int firstLevel, int WTA_K, int scoreType, int patchSize) :
+cv::cuda::ORB_GPU::ORB_GPU(int nFeatures, float scaleFactor, int nLevels, int edgeThreshold, int firstLevel, int WTA_K, int scoreType, int patchSize) :
     nFeatures_(nFeatures), scaleFactor_(scaleFactor), nLevels_(nLevels), edgeThreshold_(edgeThreshold), firstLevel_(firstLevel), WTA_K_(WTA_K),
     scoreType_(scoreType), patchSize_(patchSize),
     fastDetector_(DEFAULT_FAST_THRESHOLD)
@@ -431,7 +431,7 @@ cv::gpu::ORB_GPU::ORB_GPU(int nFeatures, float scaleFactor, int nLevels, int edg
         ++v_0;
     }
     CV_Assert(u_max.size() < 32);
-    cv::gpu::cudev::orb::loadUMax(&u_max[0], static_cast<int>(u_max.size()));
+    cv::cuda::cudev::orb::loadUMax(&u_max[0], static_cast<int>(u_max.size()));
 
     // Calc pattern
     const int npoints = 512;
@@ -468,7 +468,7 @@ cv::gpu::ORB_GPU::ORB_GPU(int nFeatures, float scaleFactor, int nLevels, int edg
 
     pattern_.upload(h_pattern);
 
-    blurFilter = gpu::createGaussianFilter(CV_8UC1, -1, Size(7, 7), 2, 2, BORDER_REFLECT_101);
+    blurFilter = cuda::createGaussianFilter(CV_8UC1, -1, Size(7, 7), 2, 2, BORDER_REFLECT_101);
 
     blurForDescriptor = false;
 }
@@ -481,7 +481,7 @@ namespace
     }
 }
 
-void cv::gpu::ORB_GPU::buildScalePyramids(const GpuMat& image, const GpuMat& mask)
+void cv::cuda::ORB_GPU::buildScalePyramids(const GpuMat& image, const GpuMat& mask)
 {
     CV_Assert(image.type() == CV_8UC1);
     CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size()));
@@ -504,19 +504,19 @@ void cv::gpu::ORB_GPU::buildScalePyramids(const GpuMat& image, const GpuMat& mas
         {
             if (level < firstLevel_)
             {
-                gpu::resize(image, imagePyr_[level], sz, 0, 0, INTER_LINEAR);
+                cuda::resize(image, imagePyr_[level], sz, 0, 0, INTER_LINEAR);
 
                 if (!mask.empty())
-                    gpu::resize(mask, maskPyr_[level], sz, 0, 0, INTER_LINEAR);
+                    cuda::resize(mask, maskPyr_[level], sz, 0, 0, INTER_LINEAR);
             }
             else
             {
-                gpu::resize(imagePyr_[level - 1], imagePyr_[level], sz, 0, 0, INTER_LINEAR);
+                cuda::resize(imagePyr_[level - 1], imagePyr_[level], sz, 0, 0, INTER_LINEAR);
 
                 if (!mask.empty())
                 {
-                    gpu::resize(maskPyr_[level - 1], maskPyr_[level], sz, 0, 0, INTER_LINEAR);
-                    gpu::threshold(maskPyr_[level], maskPyr_[level], 254, 0, THRESH_TOZERO);
+                    cuda::resize(maskPyr_[level - 1], maskPyr_[level], sz, 0, 0, INTER_LINEAR);
+                    cuda::threshold(maskPyr_[level], maskPyr_[level], 254, 0, THRESH_TOZERO);
                 }
             }
         }
@@ -534,7 +534,7 @@ void cv::gpu::ORB_GPU::buildScalePyramids(const GpuMat& image, const GpuMat& mas
         Rect inner(edgeThreshold_, edgeThreshold_, sz.width - 2 * edgeThreshold_, sz.height - 2 * edgeThreshold_);
         buf_(inner).setTo(Scalar::all(255));
 
-        gpu::bitwise_and(maskPyr_[level], buf_, maskPyr_[level]);
+        cuda::bitwise_and(maskPyr_[level], buf_, maskPyr_[level]);
     }
 }
 
@@ -543,7 +543,7 @@ namespace
     //takes keypoints and culls them by the response
     void cull(GpuMat& keypoints, int& count, int n_points)
     {
-        using namespace cv::gpu::cudev::orb;
+        using namespace cv::cuda::cudev::orb;
 
         //this is only necessary if the keypoints size is greater than the number of desired points.
         if (count > n_points)
@@ -559,9 +559,9 @@ namespace
     }
 }
 
-void cv::gpu::ORB_GPU::computeKeyPointsPyramid()
+void cv::cuda::ORB_GPU::computeKeyPointsPyramid()
 {
-    using namespace cv::gpu::cudev::orb;
+    using namespace cv::cuda::cudev::orb;
 
     int half_patch_size = patchSize_ / 2;
 
@@ -602,9 +602,9 @@ void cv::gpu::ORB_GPU::computeKeyPointsPyramid()
     }
 }
 
-void cv::gpu::ORB_GPU::computeDescriptors(GpuMat& descriptors)
+void cv::cuda::ORB_GPU::computeDescriptors(GpuMat& descriptors)
 {
-    using namespace cv::gpu::cudev::orb;
+    using namespace cv::cuda::cudev::orb;
 
     int nAllkeypoints = 0;
 
@@ -642,9 +642,9 @@ void cv::gpu::ORB_GPU::computeDescriptors(GpuMat& descriptors)
     }
 }
 
-void cv::gpu::ORB_GPU::mergeKeyPoints(GpuMat& keypoints)
+void cv::cuda::ORB_GPU::mergeKeyPoints(GpuMat& keypoints)
 {
-    using namespace cv::gpu::cudev::orb;
+    using namespace cv::cuda::cudev::orb;
 
     int nAllkeypoints = 0;
 
@@ -684,7 +684,7 @@ void cv::gpu::ORB_GPU::mergeKeyPoints(GpuMat& keypoints)
     }
 }
 
-void cv::gpu::ORB_GPU::downloadKeyPoints(const GpuMat &d_keypoints, std::vector<KeyPoint>& keypoints)
+void cv::cuda::ORB_GPU::downloadKeyPoints(const GpuMat &d_keypoints, std::vector<KeyPoint>& keypoints)
 {
     if (d_keypoints.empty())
     {
@@ -697,7 +697,7 @@ void cv::gpu::ORB_GPU::downloadKeyPoints(const GpuMat &d_keypoints, std::vector<
     convertKeyPoints(h_keypoints, keypoints);
 }
 
-void cv::gpu::ORB_GPU::convertKeyPoints(const Mat &d_keypoints, std::vector<KeyPoint>& keypoints)
+void cv::cuda::ORB_GPU::convertKeyPoints(const Mat &d_keypoints, std::vector<KeyPoint>& keypoints)
 {
     if (d_keypoints.empty())
     {
@@ -731,14 +731,14 @@ void cv::gpu::ORB_GPU::convertKeyPoints(const Mat &d_keypoints, std::vector<KeyP
     }
 }
 
-void cv::gpu::ORB_GPU::operator()(const GpuMat& image, const GpuMat& mask, GpuMat& keypoints)
+void cv::cuda::ORB_GPU::operator()(const GpuMat& image, const GpuMat& mask, GpuMat& keypoints)
 {
     buildScalePyramids(image, mask);
     computeKeyPointsPyramid();
     mergeKeyPoints(keypoints);
 }
 
-void cv::gpu::ORB_GPU::operator()(const GpuMat& image, const GpuMat& mask, GpuMat& keypoints, GpuMat& descriptors)
+void cv::cuda::ORB_GPU::operator()(const GpuMat& image, const GpuMat& mask, GpuMat& keypoints, GpuMat& descriptors)
 {
     buildScalePyramids(image, mask);
     computeKeyPointsPyramid();
@@ -746,19 +746,19 @@ void cv::gpu::ORB_GPU::operator()(const GpuMat& image, const GpuMat& mask, GpuMa
     mergeKeyPoints(keypoints);
 }
 
-void cv::gpu::ORB_GPU::operator()(const GpuMat& image, const GpuMat& mask, std::vector<KeyPoint>& keypoints)
+void cv::cuda::ORB_GPU::operator()(const GpuMat& image, const GpuMat& mask, std::vector<KeyPoint>& keypoints)
 {
     (*this)(image, mask, d_keypoints_);
     downloadKeyPoints(d_keypoints_, keypoints);
 }
 
-void cv::gpu::ORB_GPU::operator()(const GpuMat& image, const GpuMat& mask, std::vector<KeyPoint>& keypoints, GpuMat& descriptors)
+void cv::cuda::ORB_GPU::operator()(const GpuMat& image, const GpuMat& mask, std::vector<KeyPoint>& keypoints, GpuMat& descriptors)
 {
     (*this)(image, mask, d_keypoints_, descriptors);
     downloadKeyPoints(d_keypoints_, keypoints);
 }
 
-void cv::gpu::ORB_GPU::release()
+void cv::cuda::ORB_GPU::release()
 {
     imagePyr_.clear();
     maskPyr_.clear();
diff --git a/modules/gpufeatures2d/test/test_features2d.cpp b/modules/gpufeatures2d/test/test_features2d.cpp
index 29dcc20407..a4995c00a4 100644
--- a/modules/gpufeatures2d/test/test_features2d.cpp
+++ b/modules/gpufeatures2d/test/test_features2d.cpp
@@ -55,9 +55,9 @@ namespace
     IMPLEMENT_PARAM_CLASS(FAST_NonmaxSupression, bool)
 }
 
-PARAM_TEST_CASE(FAST, cv::gpu::DeviceInfo, FAST_Threshold, FAST_NonmaxSupression)
+PARAM_TEST_CASE(FAST, cv::cuda::DeviceInfo, FAST_Threshold, FAST_NonmaxSupression)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     int threshold;
     bool nonmaxSupression;
 
@@ -67,7 +67,7 @@ PARAM_TEST_CASE(FAST, cv::gpu::DeviceInfo, FAST_Threshold, FAST_NonmaxSupression
         threshold = GET_PARAM(1);
         nonmaxSupression = GET_PARAM(2);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -76,15 +76,15 @@ GPU_TEST_P(FAST, Accuracy)
     cv::Mat image = readImage("features2d/aloe.png", cv::IMREAD_GRAYSCALE);
     ASSERT_FALSE(image.empty());
 
-    cv::gpu::FAST_GPU fast(threshold);
+    cv::cuda::FAST_GPU fast(threshold);
     fast.nonmaxSupression = nonmaxSupression;
 
-    if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS))
+    if (!supportFeature(devInfo, cv::cuda::GLOBAL_ATOMICS))
     {
         try
         {
             std::vector<cv::KeyPoint> keypoints;
-            fast(loadMat(image), cv::gpu::GpuMat(), keypoints);
+            fast(loadMat(image), cv::cuda::GpuMat(), keypoints);
         }
         catch (const cv::Exception& e)
         {
@@ -94,7 +94,7 @@ GPU_TEST_P(FAST, Accuracy)
     else
     {
         std::vector<cv::KeyPoint> keypoints;
-        fast(loadMat(image), cv::gpu::GpuMat(), keypoints);
+        fast(loadMat(image), cv::cuda::GpuMat(), keypoints);
 
         std::vector<cv::KeyPoint> keypoints_gold;
         cv::FAST(image, keypoints_gold, threshold, nonmaxSupression);
@@ -125,9 +125,9 @@ namespace
 
 CV_ENUM(ORB_ScoreType, ORB::HARRIS_SCORE, ORB::FAST_SCORE)
 
-PARAM_TEST_CASE(ORB, cv::gpu::DeviceInfo, ORB_FeaturesCount, ORB_ScaleFactor, ORB_LevelsCount, ORB_EdgeThreshold, ORB_firstLevel, ORB_WTA_K, ORB_ScoreType, ORB_PatchSize, ORB_BlurForDescriptor)
+PARAM_TEST_CASE(ORB, cv::cuda::DeviceInfo, ORB_FeaturesCount, ORB_ScaleFactor, ORB_LevelsCount, ORB_EdgeThreshold, ORB_firstLevel, ORB_WTA_K, ORB_ScoreType, ORB_PatchSize, ORB_BlurForDescriptor)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     int nFeatures;
     float scaleFactor;
     int nLevels;
@@ -151,7 +151,7 @@ PARAM_TEST_CASE(ORB, cv::gpu::DeviceInfo, ORB_FeaturesCount, ORB_ScaleFactor, OR
         patchSize = GET_PARAM(8);
         blurForDescriptor = GET_PARAM(9);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -163,15 +163,15 @@ GPU_TEST_P(ORB, Accuracy)
     cv::Mat mask(image.size(), CV_8UC1, cv::Scalar::all(1));
     mask(cv::Range(0, image.rows / 2), cv::Range(0, image.cols / 2)).setTo(cv::Scalar::all(0));
 
-    cv::gpu::ORB_GPU orb(nFeatures, scaleFactor, nLevels, edgeThreshold, firstLevel, WTA_K, scoreType, patchSize);
+    cv::cuda::ORB_GPU orb(nFeatures, scaleFactor, nLevels, edgeThreshold, firstLevel, WTA_K, scoreType, patchSize);
     orb.blurForDescriptor = blurForDescriptor;
 
-    if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS))
+    if (!supportFeature(devInfo, cv::cuda::GLOBAL_ATOMICS))
     {
         try
         {
             std::vector<cv::KeyPoint> keypoints;
-            cv::gpu::GpuMat descriptors;
+            cv::cuda::GpuMat descriptors;
             orb(loadMat(image), loadMat(mask), keypoints, descriptors);
         }
         catch (const cv::Exception& e)
@@ -182,7 +182,7 @@ GPU_TEST_P(ORB, Accuracy)
     else
     {
         std::vector<cv::KeyPoint> keypoints;
-        cv::gpu::GpuMat descriptors;
+        cv::cuda::GpuMat descriptors;
         orb(loadMat(image), loadMat(mask), keypoints, descriptors);
 
         cv::ORB orb_gold(nFeatures, scaleFactor, nLevels, edgeThreshold, firstLevel, WTA_K, scoreType, patchSize);
@@ -223,9 +223,9 @@ namespace
     IMPLEMENT_PARAM_CLASS(UseMask, bool)
 }
 
-PARAM_TEST_CASE(BruteForceMatcher, cv::gpu::DeviceInfo, NormCode, DescriptorSize, UseMask)
+PARAM_TEST_CASE(BruteForceMatcher, cv::cuda::DeviceInfo, NormCode, DescriptorSize, UseMask)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     int normCode;
     int dim;
     bool useMask;
@@ -242,7 +242,7 @@ PARAM_TEST_CASE(BruteForceMatcher, cv::gpu::DeviceInfo, NormCode, DescriptorSize
         dim = GET_PARAM(2);
         useMask = GET_PARAM(3);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
 
         queryDescCount = 300; // must be even number because we split train data in some cases in two
         countFactor = 4; // do not change it
@@ -285,9 +285,9 @@ PARAM_TEST_CASE(BruteForceMatcher, cv::gpu::DeviceInfo, NormCode, DescriptorSize
 
 GPU_TEST_P(BruteForceMatcher, Match_Single)
 {
-    cv::gpu::BFMatcher_GPU matcher(normCode);
+    cv::cuda::BFMatcher_GPU matcher(normCode);
 
-    cv::gpu::GpuMat mask;
+    cv::cuda::GpuMat mask;
     if (useMask)
     {
         mask.create(query.rows, train.rows, CV_8UC1);
@@ -312,28 +312,28 @@ GPU_TEST_P(BruteForceMatcher, Match_Single)
 
 GPU_TEST_P(BruteForceMatcher, Match_Collection)
 {
-    cv::gpu::BFMatcher_GPU matcher(normCode);
+    cv::cuda::BFMatcher_GPU matcher(normCode);
 
-    cv::gpu::GpuMat d_train(train);
+    cv::cuda::GpuMat d_train(train);
 
     // make add() twice to test such case
-    matcher.add(std::vector<cv::gpu::GpuMat>(1, d_train.rowRange(0, train.rows / 2)));
-    matcher.add(std::vector<cv::gpu::GpuMat>(1, d_train.rowRange(train.rows / 2, train.rows)));
+    matcher.add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(0, train.rows / 2)));
+    matcher.add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(train.rows / 2, train.rows)));
 
     // prepare masks (make first nearest match illegal)
-    std::vector<cv::gpu::GpuMat> masks(2);
+    std::vector<cv::cuda::GpuMat> masks(2);
     for (int mi = 0; mi < 2; mi++)
     {
-        masks[mi] = cv::gpu::GpuMat(query.rows, train.rows/2, CV_8UC1, cv::Scalar::all(1));
+        masks[mi] = cv::cuda::GpuMat(query.rows, train.rows/2, CV_8UC1, cv::Scalar::all(1));
         for (int di = 0; di < queryDescCount/2; di++)
             masks[mi].col(di * countFactor).setTo(cv::Scalar::all(0));
     }
 
     std::vector<cv::DMatch> matches;
     if (useMask)
-        matcher.match(cv::gpu::GpuMat(query), matches, masks);
+        matcher.match(cv::cuda::GpuMat(query), matches, masks);
     else
-        matcher.match(cv::gpu::GpuMat(query), matches);
+        matcher.match(cv::cuda::GpuMat(query), matches);
 
     ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
 
@@ -366,11 +366,11 @@ GPU_TEST_P(BruteForceMatcher, Match_Collection)
 
 GPU_TEST_P(BruteForceMatcher, KnnMatch_2_Single)
 {
-    cv::gpu::BFMatcher_GPU matcher(normCode);
+    cv::cuda::BFMatcher_GPU matcher(normCode);
 
     const int knn = 2;
 
-    cv::gpu::GpuMat mask;
+    cv::cuda::GpuMat mask;
     if (useMask)
     {
         mask.create(query.rows, train.rows, CV_8UC1);
@@ -405,11 +405,11 @@ GPU_TEST_P(BruteForceMatcher, KnnMatch_2_Single)
 
 GPU_TEST_P(BruteForceMatcher, KnnMatch_3_Single)
 {
-    cv::gpu::BFMatcher_GPU matcher(normCode);
+    cv::cuda::BFMatcher_GPU matcher(normCode);
 
     const int knn = 3;
 
-    cv::gpu::GpuMat mask;
+    cv::cuda::GpuMat mask;
     if (useMask)
     {
         mask.create(query.rows, train.rows, CV_8UC1);
@@ -444,21 +444,21 @@ GPU_TEST_P(BruteForceMatcher, KnnMatch_3_Single)
 
 GPU_TEST_P(BruteForceMatcher, KnnMatch_2_Collection)
 {
-    cv::gpu::BFMatcher_GPU matcher(normCode);
+    cv::cuda::BFMatcher_GPU matcher(normCode);
 
     const int knn = 2;
 
-    cv::gpu::GpuMat d_train(train);
+    cv::cuda::GpuMat d_train(train);
 
     // make add() twice to test such case
-    matcher.add(std::vector<cv::gpu::GpuMat>(1, d_train.rowRange(0, train.rows / 2)));
-    matcher.add(std::vector<cv::gpu::GpuMat>(1, d_train.rowRange(train.rows / 2, train.rows)));
+    matcher.add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(0, train.rows / 2)));
+    matcher.add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(train.rows / 2, train.rows)));
 
     // prepare masks (make first nearest match illegal)
-    std::vector<cv::gpu::GpuMat> masks(2);
+    std::vector<cv::cuda::GpuMat> masks(2);
     for (int mi = 0; mi < 2; mi++ )
     {
-        masks[mi] = cv::gpu::GpuMat(query.rows, train.rows / 2, CV_8UC1, cv::Scalar::all(1));
+        masks[mi] = cv::cuda::GpuMat(query.rows, train.rows / 2, CV_8UC1, cv::Scalar::all(1));
         for (int di = 0; di < queryDescCount / 2; di++)
             masks[mi].col(di * countFactor).setTo(cv::Scalar::all(0));
     }
@@ -466,9 +466,9 @@ GPU_TEST_P(BruteForceMatcher, KnnMatch_2_Collection)
     std::vector< std::vector<cv::DMatch> > matches;
 
     if (useMask)
-        matcher.knnMatch(cv::gpu::GpuMat(query), matches, knn, masks);
+        matcher.knnMatch(cv::cuda::GpuMat(query), matches, knn, masks);
     else
-        matcher.knnMatch(cv::gpu::GpuMat(query), matches, knn);
+        matcher.knnMatch(cv::cuda::GpuMat(query), matches, knn);
 
     ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
 
@@ -506,21 +506,21 @@ GPU_TEST_P(BruteForceMatcher, KnnMatch_2_Collection)
 
 GPU_TEST_P(BruteForceMatcher, KnnMatch_3_Collection)
 {
-    cv::gpu::BFMatcher_GPU matcher(normCode);
+    cv::cuda::BFMatcher_GPU matcher(normCode);
 
     const int knn = 3;
 
-    cv::gpu::GpuMat d_train(train);
+    cv::cuda::GpuMat d_train(train);
 
     // make add() twice to test such case
-    matcher.add(std::vector<cv::gpu::GpuMat>(1, d_train.rowRange(0, train.rows / 2)));
-    matcher.add(std::vector<cv::gpu::GpuMat>(1, d_train.rowRange(train.rows / 2, train.rows)));
+    matcher.add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(0, train.rows / 2)));
+    matcher.add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(train.rows / 2, train.rows)));
 
     // prepare masks (make first nearest match illegal)
-    std::vector<cv::gpu::GpuMat> masks(2);
+    std::vector<cv::cuda::GpuMat> masks(2);
     for (int mi = 0; mi < 2; mi++ )
     {
-        masks[mi] = cv::gpu::GpuMat(query.rows, train.rows / 2, CV_8UC1, cv::Scalar::all(1));
+        masks[mi] = cv::cuda::GpuMat(query.rows, train.rows / 2, CV_8UC1, cv::Scalar::all(1));
         for (int di = 0; di < queryDescCount / 2; di++)
             masks[mi].col(di * countFactor).setTo(cv::Scalar::all(0));
     }
@@ -528,9 +528,9 @@ GPU_TEST_P(BruteForceMatcher, KnnMatch_3_Collection)
     std::vector< std::vector<cv::DMatch> > matches;
 
     if (useMask)
-        matcher.knnMatch(cv::gpu::GpuMat(query), matches, knn, masks);
+        matcher.knnMatch(cv::cuda::GpuMat(query), matches, knn, masks);
     else
-        matcher.knnMatch(cv::gpu::GpuMat(query), matches, knn);
+        matcher.knnMatch(cv::cuda::GpuMat(query), matches, knn);
 
     ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
 
@@ -568,11 +568,11 @@ GPU_TEST_P(BruteForceMatcher, KnnMatch_3_Collection)
 
 GPU_TEST_P(BruteForceMatcher, RadiusMatch_Single)
 {
-    cv::gpu::BFMatcher_GPU matcher(normCode);
+    cv::cuda::BFMatcher_GPU matcher(normCode);
 
     const float radius = 1.f / countFactor;
 
-    if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS))
+    if (!supportFeature(devInfo, cv::cuda::GLOBAL_ATOMICS))
     {
         try
         {
@@ -586,7 +586,7 @@ GPU_TEST_P(BruteForceMatcher, RadiusMatch_Single)
     }
     else
     {
-        cv::gpu::GpuMat mask;
+        cv::cuda::GpuMat mask;
         if (useMask)
         {
             mask.create(query.rows, train.rows, CV_8UC1);
@@ -617,32 +617,32 @@ GPU_TEST_P(BruteForceMatcher, RadiusMatch_Single)
 
 GPU_TEST_P(BruteForceMatcher, RadiusMatch_Collection)
 {
-    cv::gpu::BFMatcher_GPU matcher(normCode);
+    cv::cuda::BFMatcher_GPU matcher(normCode);
 
     const int n = 3;
     const float radius = 1.f / countFactor * n;
 
-    cv::gpu::GpuMat d_train(train);
+    cv::cuda::GpuMat d_train(train);
 
     // make add() twice to test such case
-    matcher.add(std::vector<cv::gpu::GpuMat>(1, d_train.rowRange(0, train.rows / 2)));
-    matcher.add(std::vector<cv::gpu::GpuMat>(1, d_train.rowRange(train.rows / 2, train.rows)));
+    matcher.add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(0, train.rows / 2)));
+    matcher.add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(train.rows / 2, train.rows)));
 
     // prepare masks (make first nearest match illegal)
-    std::vector<cv::gpu::GpuMat> masks(2);
+    std::vector<cv::cuda::GpuMat> masks(2);
     for (int mi = 0; mi < 2; mi++)
     {
-        masks[mi] = cv::gpu::GpuMat(query.rows, train.rows / 2, CV_8UC1, cv::Scalar::all(1));
+        masks[mi] = cv::cuda::GpuMat(query.rows, train.rows / 2, CV_8UC1, cv::Scalar::all(1));
         for (int di = 0; di < queryDescCount / 2; di++)
             masks[mi].col(di * countFactor).setTo(cv::Scalar::all(0));
     }
 
-    if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS))
+    if (!supportFeature(devInfo, cv::cuda::GLOBAL_ATOMICS))
     {
         try
         {
             std::vector< std::vector<cv::DMatch> > matches;
-            matcher.radiusMatch(cv::gpu::GpuMat(query), matches, radius, masks);
+            matcher.radiusMatch(cv::cuda::GpuMat(query), matches, radius, masks);
         }
         catch (const cv::Exception& e)
         {
@@ -654,9 +654,9 @@ GPU_TEST_P(BruteForceMatcher, RadiusMatch_Collection)
         std::vector< std::vector<cv::DMatch> > matches;
 
         if (useMask)
-            matcher.radiusMatch(cv::gpu::GpuMat(query), matches, radius, masks);
+            matcher.radiusMatch(cv::cuda::GpuMat(query), matches, radius, masks);
         else
-            matcher.radiusMatch(cv::gpu::GpuMat(query), matches, radius);
+            matcher.radiusMatch(cv::cuda::GpuMat(query), matches, radius);
 
         ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
 
diff --git a/modules/gpufilters/include/opencv2/gpufilters.hpp b/modules/gpufilters/include/opencv2/gpufilters.hpp
index b0ebfd73c5..6d1b89d0ec 100644
--- a/modules/gpufilters/include/opencv2/gpufilters.hpp
+++ b/modules/gpufilters/include/opencv2/gpufilters.hpp
@@ -50,7 +50,7 @@
 #include "opencv2/core/gpu.hpp"
 #include "opencv2/imgproc.hpp"
 
-namespace cv { namespace gpu {
+namespace cv { namespace cuda {
 
 class CV_EXPORTS Filter : public Algorithm
 {
@@ -144,6 +144,6 @@ CV_EXPORTS Ptr<Filter> createRowSumFilter(int srcType, int dstType, int ksize, i
 //! supports only CV_8UC1 sum type and CV_32FC1 dst type
 CV_EXPORTS Ptr<Filter> createColumnSumFilter(int srcType, int dstType, int ksize, int anchor = -1, int borderMode = BORDER_DEFAULT, Scalar borderVal = Scalar::all(0));
 
-}} // namespace cv { namespace gpu {
+}} // namespace cv { namespace cuda {
 
 #endif /* __OPENCV_GPUFILTERS_HPP__ */
diff --git a/modules/gpufilters/perf/perf_filters.cpp b/modules/gpufilters/perf/perf_filters.cpp
index 6ad0998a5b..4a2ae2a617 100644
--- a/modules/gpufilters/perf/perf_filters.cpp
+++ b/modules/gpufilters/perf/perf_filters.cpp
@@ -67,10 +67,10 @@ PERF_TEST_P(Sz_Type_KernelSz, Blur,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        cv::Ptr<cv::gpu::Filter> blurFilter = cv::gpu::createBoxFilter(d_src.type(), -1, cv::Size(ksize, ksize));
+        cv::Ptr<cv::cuda::Filter> blurFilter = cv::cuda::createBoxFilter(d_src.type(), -1, cv::Size(ksize, ksize));
 
         TEST_CYCLE() blurFilter->apply(d_src, dst);
 
@@ -105,10 +105,10 @@ PERF_TEST_P(Sz_Type_KernelSz, Filter2D, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        cv::Ptr<cv::gpu::Filter> filter2D = cv::gpu::createLinearFilter(d_src.type(), -1, kernel);
+        cv::Ptr<cv::cuda::Filter> filter2D = cv::cuda::createLinearFilter(d_src.type(), -1, kernel);
 
         TEST_CYCLE() filter2D->apply(d_src, dst);
 
@@ -140,10 +140,10 @@ PERF_TEST_P(Sz_Type_KernelSz, Laplacian, Combine(GPU_TYPICAL_MAT_SIZES, Values(C
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        cv::Ptr<cv::gpu::Filter> laplacian = cv::gpu::createLaplacianFilter(d_src.type(), -1, ksize);
+        cv::Ptr<cv::cuda::Filter> laplacian = cv::cuda::createLaplacianFilter(d_src.type(), -1, ksize);
 
         TEST_CYCLE() laplacian->apply(d_src, dst);
 
@@ -175,10 +175,10 @@ PERF_TEST_P(Sz_Type_KernelSz, Sobel, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        cv::Ptr<cv::gpu::Filter> sobel = cv::gpu::createSobelFilter(d_src.type(), -1, 1, 1, ksize);
+        cv::Ptr<cv::cuda::Filter> sobel = cv::cuda::createSobelFilter(d_src.type(), -1, 1, 1, ksize);
 
         TEST_CYCLE() sobel->apply(d_src, dst);
 
@@ -209,10 +209,10 @@ PERF_TEST_P(Sz_Type, Scharr, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        cv::Ptr<cv::gpu::Filter> scharr = cv::gpu::createScharrFilter(d_src.type(), -1, 1, 0);
+        cv::Ptr<cv::cuda::Filter> scharr = cv::cuda::createScharrFilter(d_src.type(), -1, 1, 0);
 
         TEST_CYCLE() scharr->apply(d_src, dst);
 
@@ -244,10 +244,10 @@ PERF_TEST_P(Sz_Type_KernelSz, GaussianBlur, Combine(GPU_TYPICAL_MAT_SIZES, Value
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        cv::Ptr<cv::gpu::Filter> gauss = cv::gpu::createGaussianFilter(d_src.type(), -1, cv::Size(ksize, ksize), 0.5);
+        cv::Ptr<cv::cuda::Filter> gauss = cv::cuda::createGaussianFilter(d_src.type(), -1, cv::Size(ksize, ksize), 0.5);
 
         TEST_CYCLE() gauss->apply(d_src, dst);
 
@@ -280,10 +280,10 @@ PERF_TEST_P(Sz_Type, Erode, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8U
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        cv::Ptr<cv::gpu::Filter> erode = cv::gpu::createMorphologyFilter(cv::MORPH_ERODE, src.type(), ker);
+        cv::Ptr<cv::cuda::Filter> erode = cv::cuda::createMorphologyFilter(cv::MORPH_ERODE, src.type(), ker);
 
         TEST_CYCLE() erode->apply(d_src, dst);
 
@@ -316,10 +316,10 @@ PERF_TEST_P(Sz_Type, Dilate, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        cv::Ptr<cv::gpu::Filter> dilate = cv::gpu::createMorphologyFilter(cv::MORPH_DILATE, src.type(), ker);
+        cv::Ptr<cv::cuda::Filter> dilate = cv::cuda::createMorphologyFilter(cv::MORPH_DILATE, src.type(), ker);
 
         TEST_CYCLE() dilate->apply(d_src, dst);
 
@@ -357,10 +357,10 @@ PERF_TEST_P(Sz_Type_Op, MorphologyEx, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        cv::Ptr<cv::gpu::Filter> morph = cv::gpu::createMorphologyFilter(morphOp, src.type(), ker);
+        cv::Ptr<cv::cuda::Filter> morph = cv::cuda::createMorphologyFilter(morphOp, src.type(), ker);
 
         TEST_CYCLE() morph->apply(d_src, dst);
 
diff --git a/modules/gpufilters/src/cuda/column_filter.hpp b/modules/gpufilters/src/cuda/column_filter.hpp
index 6f10c36f5f..39abfb0e72 100644
--- a/modules/gpufilters/src/cuda/column_filter.hpp
+++ b/modules/gpufilters/src/cuda/column_filter.hpp
@@ -45,8 +45,8 @@
 #include "opencv2/core/cuda/vec_math.hpp"
 #include "opencv2/core/cuda/border_interpolate.hpp"
 
-using namespace cv::gpu;
-using namespace cv::gpu::cudev;
+using namespace cv::cuda;
+using namespace cv::cuda::cudev;
 
 namespace column_filter
 {
diff --git a/modules/gpufilters/src/cuda/filter2d.cu b/modules/gpufilters/src/cuda/filter2d.cu
index 4e913124df..fbffe8c578 100644
--- a/modules/gpufilters/src/cuda/filter2d.cu
+++ b/modules/gpufilters/src/cuda/filter2d.cu
@@ -46,7 +46,7 @@
 #include "opencv2/core/cuda/saturate_cast.hpp"
 #include "opencv2/core/cuda/border_interpolate.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     template <class SrcPtr, typename D>
     __global__ void filter2D(const SrcPtr src, PtrStepSz<D> dst,
diff --git a/modules/gpufilters/src/cuda/row_filter.hpp b/modules/gpufilters/src/cuda/row_filter.hpp
index 3199a02e6a..1f62a5606e 100644
--- a/modules/gpufilters/src/cuda/row_filter.hpp
+++ b/modules/gpufilters/src/cuda/row_filter.hpp
@@ -45,8 +45,8 @@
 #include "opencv2/core/cuda/vec_math.hpp"
 #include "opencv2/core/cuda/border_interpolate.hpp"
 
-using namespace cv::gpu;
-using namespace cv::gpu::cudev;
+using namespace cv::cuda;
+using namespace cv::cuda::cudev;
 
 namespace row_filter
 {
diff --git a/modules/gpufilters/src/filtering.cpp b/modules/gpufilters/src/filtering.cpp
index 14917acc33..94b69c4c14 100644
--- a/modules/gpufilters/src/filtering.cpp
+++ b/modules/gpufilters/src/filtering.cpp
@@ -43,31 +43,31 @@
 #include "precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
 
-Ptr<Filter> cv::gpu::createBoxFilter(int, int, Size, Point, int, Scalar) { throw_no_cuda(); return Ptr<Filter>(); }
+Ptr<Filter> cv::cuda::createBoxFilter(int, int, Size, Point, int, Scalar) { throw_no_cuda(); return Ptr<Filter>(); }
 
-Ptr<Filter> cv::gpu::createLinearFilter(int, int, InputArray, Point, int, Scalar) { throw_no_cuda(); return Ptr<Filter>(); }
+Ptr<Filter> cv::cuda::createLinearFilter(int, int, InputArray, Point, int, Scalar) { throw_no_cuda(); return Ptr<Filter>(); }
 
-Ptr<Filter> cv::gpu::createLaplacianFilter(int, int, int, double, int, Scalar) { throw_no_cuda(); return Ptr<Filter>(); }
+Ptr<Filter> cv::cuda::createLaplacianFilter(int, int, int, double, int, Scalar) { throw_no_cuda(); return Ptr<Filter>(); }
 
-Ptr<Filter> cv::gpu::createSeparableLinearFilter(int, int, InputArray, InputArray, Point, int, int) { throw_no_cuda(); return Ptr<Filter>(); }
+Ptr<Filter> cv::cuda::createSeparableLinearFilter(int, int, InputArray, InputArray, Point, int, int) { throw_no_cuda(); return Ptr<Filter>(); }
 
-Ptr<Filter> cv::gpu::createDerivFilter(int, int, int, int, int, bool, double, int, int) { throw_no_cuda(); return Ptr<Filter>(); }
-Ptr<Filter> cv::gpu::createSobelFilter(int, int, int, int, int, double, int, int) { throw_no_cuda(); return Ptr<Filter>(); }
-Ptr<Filter> cv::gpu::createScharrFilter(int, int, int, int, double, int, int) { throw_no_cuda(); return Ptr<Filter>(); }
+Ptr<Filter> cv::cuda::createDerivFilter(int, int, int, int, int, bool, double, int, int) { throw_no_cuda(); return Ptr<Filter>(); }
+Ptr<Filter> cv::cuda::createSobelFilter(int, int, int, int, int, double, int, int) { throw_no_cuda(); return Ptr<Filter>(); }
+Ptr<Filter> cv::cuda::createScharrFilter(int, int, int, int, double, int, int) { throw_no_cuda(); return Ptr<Filter>(); }
 
-Ptr<Filter> cv::gpu::createGaussianFilter(int, int, Size, double, double, int, int) { throw_no_cuda(); return Ptr<Filter>(); }
+Ptr<Filter> cv::cuda::createGaussianFilter(int, int, Size, double, double, int, int) { throw_no_cuda(); return Ptr<Filter>(); }
 
-Ptr<Filter> cv::gpu::createMorphologyFilter(int, int, InputArray, Point, int) { throw_no_cuda(); return Ptr<Filter>(); }
+Ptr<Filter> cv::cuda::createMorphologyFilter(int, int, InputArray, Point, int) { throw_no_cuda(); return Ptr<Filter>(); }
 
-Ptr<Filter> cv::gpu::createBoxMaxFilter(int, Size, Point, int, Scalar) { throw_no_cuda(); return Ptr<Filter>(); }
-Ptr<Filter> cv::gpu::createBoxMinFilter(int, Size, Point, int, Scalar) { throw_no_cuda(); return Ptr<Filter>(); }
+Ptr<Filter> cv::cuda::createBoxMaxFilter(int, Size, Point, int, Scalar) { throw_no_cuda(); return Ptr<Filter>(); }
+Ptr<Filter> cv::cuda::createBoxMinFilter(int, Size, Point, int, Scalar) { throw_no_cuda(); return Ptr<Filter>(); }
 
-Ptr<Filter> cv::gpu::createRowSumFilter(int, int, int, int, int, Scalar) { throw_no_cuda(); return Ptr<Filter>(); }
-Ptr<Filter> cv::gpu::createColumnSumFilter(int, int, int, int, int, Scalar) { throw_no_cuda(); return Ptr<Filter>(); }
+Ptr<Filter> cv::cuda::createRowSumFilter(int, int, int, int, int, Scalar) { throw_no_cuda(); return Ptr<Filter>(); }
+Ptr<Filter> cv::cuda::createColumnSumFilter(int, int, int, int, int, Scalar) { throw_no_cuda(); return Ptr<Filter>(); }
 
 #else
 
@@ -131,7 +131,7 @@ namespace
         GpuMat src = _src.getGpuMat();
         CV_Assert( src.type() == type_ );
 
-        gpu::copyMakeBorder(src, srcBorder_, ksize_.height, ksize_.height, ksize_.width, ksize_.width, borderMode_, borderVal_, _stream);
+        cuda::copyMakeBorder(src, srcBorder_, ksize_.height, ksize_.height, ksize_.width, ksize_.width, borderMode_, borderVal_, _stream);
 
         _dst.create(src.size(), src.type());
         GpuMat dst = _dst.getGpuMat();
@@ -162,7 +162,7 @@ namespace
     }
 }
 
-Ptr<Filter> cv::gpu::createBoxFilter(int srcType, int dstType, Size ksize, Point anchor, int borderMode, Scalar borderVal)
+Ptr<Filter> cv::cuda::createBoxFilter(int srcType, int dstType, Size ksize, Point anchor, int borderMode, Scalar borderVal)
 {
     if (dstType < 0)
         dstType = srcType;
@@ -175,7 +175,7 @@ Ptr<Filter> cv::gpu::createBoxFilter(int srcType, int dstType, Size ksize, Point
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 // Linear Filter
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     template <typename T, typename D>
     void filter2D(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel,
@@ -222,7 +222,7 @@ namespace
         Mat kernel32F;
         kernel.convertTo(kernel32F, CV_32F);
 
-        kernel_ = gpu::createContinuous(kernel.size(), CV_32FC1);
+        kernel_ = cuda::createContinuous(kernel.size(), CV_32FC1);
         kernel_.upload(kernel32F);
 
         normalizeAnchor(anchor_, kernel.size());
@@ -230,22 +230,22 @@ namespace
         switch (srcType)
         {
         case CV_8UC1:
-            func_ = cv::gpu::cudev::filter2D<uchar, uchar>;
+            func_ = cv::cuda::cudev::filter2D<uchar, uchar>;
             break;
         case CV_8UC4:
-            func_ = cv::gpu::cudev::filter2D<uchar4, uchar4>;
+            func_ = cv::cuda::cudev::filter2D<uchar4, uchar4>;
             break;
         case CV_16UC1:
-            func_ = cv::gpu::cudev::filter2D<ushort, ushort>;
+            func_ = cv::cuda::cudev::filter2D<ushort, ushort>;
             break;
         case CV_16UC4:
-            func_ = cv::gpu::cudev::filter2D<ushort4, ushort4>;
+            func_ = cv::cuda::cudev::filter2D<ushort4, ushort4>;
             break;
         case CV_32FC1:
-            func_ = cv::gpu::cudev::filter2D<float, float>;
+            func_ = cv::cuda::cudev::filter2D<float, float>;
             break;
         case CV_32FC4:
-            func_ = cv::gpu::cudev::filter2D<float4, float4>;
+            func_ = cv::cuda::cudev::filter2D<float4, float4>;
             break;
         }
     }
@@ -270,7 +270,7 @@ namespace
     }
 }
 
-Ptr<Filter> cv::gpu::createLinearFilter(int srcType, int dstType, InputArray kernel, Point anchor, int borderMode, Scalar borderVal)
+Ptr<Filter> cv::cuda::createLinearFilter(int srcType, int dstType, InputArray kernel, Point anchor, int borderMode, Scalar borderVal)
 {
     if (dstType < 0)
         dstType = srcType;
@@ -283,7 +283,7 @@ Ptr<Filter> cv::gpu::createLinearFilter(int srcType, int dstType, InputArray ker
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 // Laplacian Filter
 
-Ptr<Filter> cv::gpu::createLaplacianFilter(int srcType, int dstType, int ksize, double scale, int borderMode, Scalar borderVal)
+Ptr<Filter> cv::cuda::createLaplacianFilter(int srcType, int dstType, int ksize, double scale, int borderMode, Scalar borderVal)
 {
     CV_Assert( ksize == 1 || ksize == 3 );
 
@@ -297,7 +297,7 @@ Ptr<Filter> cv::gpu::createLaplacianFilter(int srcType, int dstType, int ksize,
     if (scale != 1)
         kernel *= scale;
 
-    return gpu::createLinearFilter(srcType, dstType, kernel, Point(-1,-1), borderMode, borderVal);
+    return cuda::createLinearFilter(srcType, dstType, kernel, Point(-1,-1), borderMode, borderVal);
 }
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -418,7 +418,7 @@ namespace
     }
 }
 
-Ptr<Filter> cv::gpu::createSeparableLinearFilter(int srcType, int dstType, InputArray rowKernel, InputArray columnKernel, Point anchor, int rowBorderMode, int columnBorderMode)
+Ptr<Filter> cv::cuda::createSeparableLinearFilter(int srcType, int dstType, InputArray rowKernel, InputArray columnKernel, Point anchor, int rowBorderMode, int columnBorderMode)
 {
     if (dstType < 0)
         dstType = srcType;
@@ -434,7 +434,7 @@ Ptr<Filter> cv::gpu::createSeparableLinearFilter(int srcType, int dstType, Input
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 // Deriv Filter
 
-Ptr<Filter> cv::gpu::createDerivFilter(int srcType, int dstType, int dx, int dy, int ksize, bool normalize, double scale, int rowBorderMode, int columnBorderMode)
+Ptr<Filter> cv::cuda::createDerivFilter(int srcType, int dstType, int dx, int dy, int ksize, bool normalize, double scale, int rowBorderMode, int columnBorderMode)
 {
     Mat kx, ky;
     getDerivKernels(kx, ky, dx, dy, ksize, normalize, CV_32F);
@@ -449,23 +449,23 @@ Ptr<Filter> cv::gpu::createDerivFilter(int srcType, int dstType, int dx, int dy,
             ky *= scale;
     }
 
-    return gpu::createSeparableLinearFilter(srcType, dstType, kx, ky, Point(-1, -1), rowBorderMode, columnBorderMode);
+    return cuda::createSeparableLinearFilter(srcType, dstType, kx, ky, Point(-1, -1), rowBorderMode, columnBorderMode);
 }
 
-Ptr<Filter> cv::gpu::createSobelFilter(int srcType, int dstType, int dx, int dy, int ksize, double scale, int rowBorderMode, int columnBorderMode)
+Ptr<Filter> cv::cuda::createSobelFilter(int srcType, int dstType, int dx, int dy, int ksize, double scale, int rowBorderMode, int columnBorderMode)
 {
-    return gpu::createDerivFilter(srcType, dstType, dx, dy, ksize, false, scale, rowBorderMode, columnBorderMode);
+    return cuda::createDerivFilter(srcType, dstType, dx, dy, ksize, false, scale, rowBorderMode, columnBorderMode);
 }
 
-Ptr<Filter> cv::gpu::createScharrFilter(int srcType, int dstType, int dx, int dy, double scale, int rowBorderMode, int columnBorderMode)
+Ptr<Filter> cv::cuda::createScharrFilter(int srcType, int dstType, int dx, int dy, double scale, int rowBorderMode, int columnBorderMode)
 {
-    return gpu::createDerivFilter(srcType, dstType, dx, dy, -1, false, scale, rowBorderMode, columnBorderMode);
+    return cuda::createDerivFilter(srcType, dstType, dx, dy, -1, false, scale, rowBorderMode, columnBorderMode);
 }
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 // Gaussian Filter
 
-Ptr<Filter> cv::gpu::createGaussianFilter(int srcType, int dstType, Size ksize, double sigma1, double sigma2, int rowBorderMode, int columnBorderMode)
+Ptr<Filter> cv::cuda::createGaussianFilter(int srcType, int dstType, Size ksize, double sigma1, double sigma2, int rowBorderMode, int columnBorderMode)
 {
     const int depth = CV_MAT_DEPTH(srcType);
 
@@ -557,7 +557,7 @@ namespace
         Mat kernel8U;
         kernel.convertTo(kernel8U, CV_8U);
 
-        kernel_ = gpu::createContinuous(kernel.size(), CV_8UC1);
+        kernel_ = cuda::createContinuous(kernel.size(), CV_8UC1);
         kernel_.upload(kernel8U);
 
         func_ = funcs[op][CV_MAT_CN(srcType)];
@@ -569,7 +569,7 @@ namespace
         CV_Assert( src.type() == type_ );
 
         Size ksize = kernel_.size();
-        gpu::copyMakeBorder(src, srcBorder_, ksize.height, ksize.height, ksize.width, ksize.width, BORDER_DEFAULT, Scalar(), _stream);
+        cuda::copyMakeBorder(src, srcBorder_, ksize.height, ksize.height, ksize.width, ksize.width, BORDER_DEFAULT, Scalar(), _stream);
 
         GpuMat srcRoi = srcBorder_(Rect(ksize.width, ksize.height, src.cols, src.rows));
 
@@ -623,14 +623,14 @@ namespace
         MorphologyExFilter(int srcType, InputArray kernel, Point anchor, int iterations);
 
     protected:
-        Ptr<gpu::Filter> erodeFilter_, dilateFilter_;
+        Ptr<cuda::Filter> erodeFilter_, dilateFilter_;
         GpuMat buf_;
     };
 
     MorphologyExFilter::MorphologyExFilter(int srcType, InputArray kernel, Point anchor, int iterations)
     {
-        erodeFilter_ = gpu::createMorphologyFilter(MORPH_ERODE, srcType, kernel, anchor, iterations);
-        dilateFilter_ = gpu::createMorphologyFilter(MORPH_DILATE, srcType, kernel, anchor, iterations);
+        erodeFilter_ = cuda::createMorphologyFilter(MORPH_ERODE, srcType, kernel, anchor, iterations);
+        dilateFilter_ = cuda::createMorphologyFilter(MORPH_DILATE, srcType, kernel, anchor, iterations);
     }
 
     // MORPH_OPEN
@@ -694,7 +694,7 @@ namespace
     {
         erodeFilter_->apply(src, buf_, stream);
         dilateFilter_->apply(src, dst, stream);
-        gpu::subtract(dst, buf_, dst, noArray(), -1, stream);
+        cuda::subtract(dst, buf_, dst, noArray(), -1, stream);
     }
 
     // MORPH_TOPHAT
@@ -716,7 +716,7 @@ namespace
     {
         erodeFilter_->apply(src, dst, stream);
         dilateFilter_->apply(dst, buf_, stream);
-        gpu::subtract(src, buf_, dst, noArray(), -1, stream);
+        cuda::subtract(src, buf_, dst, noArray(), -1, stream);
     }
 
     // MORPH_BLACKHAT
@@ -738,11 +738,11 @@ namespace
     {
         dilateFilter_->apply(src, dst, stream);
         erodeFilter_->apply(dst, buf_, stream);
-        gpu::subtract(buf_, src, dst, noArray(), -1, stream);
+        cuda::subtract(buf_, src, dst, noArray(), -1, stream);
     }
 }
 
-Ptr<Filter> cv::gpu::createMorphologyFilter(int op, int srcType, InputArray kernel, Point anchor, int iterations)
+Ptr<Filter> cv::cuda::createMorphologyFilter(int op, int srcType, InputArray kernel, Point anchor, int iterations)
 {
     switch( op )
     {
@@ -830,7 +830,7 @@ namespace
         GpuMat src = _src.getGpuMat();
         CV_Assert( src.type() == type_ );
 
-        gpu::copyMakeBorder(src, srcBorder_, ksize_.height, ksize_.height, ksize_.width, ksize_.width, borderMode_, borderVal_, _stream);
+        cuda::copyMakeBorder(src, srcBorder_, ksize_.height, ksize_.height, ksize_.width, ksize_.width, borderMode_, borderVal_, _stream);
 
         _dst.create(src.size(), src.type());
         GpuMat dst = _dst.getGpuMat();
@@ -860,12 +860,12 @@ namespace
     }
 }
 
-Ptr<Filter> cv::gpu::createBoxMaxFilter(int srcType, Size ksize, Point anchor, int borderMode, Scalar borderVal)
+Ptr<Filter> cv::cuda::createBoxMaxFilter(int srcType, Size ksize, Point anchor, int borderMode, Scalar borderVal)
 {
     return new NPPRankFilter(RANK_MAX, srcType, ksize, anchor, borderMode, borderVal);
 }
 
-Ptr<Filter> cv::gpu::createBoxMinFilter(int srcType, Size ksize, Point anchor, int borderMode, Scalar borderVal)
+Ptr<Filter> cv::cuda::createBoxMinFilter(int srcType, Size ksize, Point anchor, int borderMode, Scalar borderVal)
 {
     return new NPPRankFilter(RANK_MIN, srcType, ksize, anchor, borderMode, borderVal);
 }
@@ -906,7 +906,7 @@ namespace
         GpuMat src = _src.getGpuMat();
         CV_Assert( src.type() == srcType_ );
 
-        gpu::copyMakeBorder(src, srcBorder_, 0, 0, ksize_, ksize_, borderMode_, borderVal_, _stream);
+        cuda::copyMakeBorder(src, srcBorder_, 0, 0, ksize_, ksize_, borderMode_, borderVal_, _stream);
 
         _dst.create(src.size(), dstType_);
         GpuMat dst = _dst.getGpuMat();
@@ -929,7 +929,7 @@ namespace
     }
 }
 
-Ptr<Filter> cv::gpu::createRowSumFilter(int srcType, int dstType, int ksize, int anchor, int borderMode, Scalar borderVal)
+Ptr<Filter> cv::cuda::createRowSumFilter(int srcType, int dstType, int ksize, int anchor, int borderMode, Scalar borderVal)
 {
     return new NppRowSumFilter(srcType, dstType, ksize, anchor, borderMode, borderVal);
 }
@@ -967,7 +967,7 @@ namespace
         GpuMat src = _src.getGpuMat();
         CV_Assert( src.type() == srcType_ );
 
-        gpu::copyMakeBorder(src, srcBorder_, ksize_, ksize_, 0, 0, borderMode_, borderVal_, _stream);
+        cuda::copyMakeBorder(src, srcBorder_, ksize_, ksize_, 0, 0, borderMode_, borderVal_, _stream);
 
         _dst.create(src.size(), dstType_);
         GpuMat dst = _dst.getGpuMat();
@@ -990,7 +990,7 @@ namespace
     }
 }
 
-Ptr<Filter> cv::gpu::createColumnSumFilter(int srcType, int dstType, int ksize, int anchor, int borderMode, Scalar borderVal)
+Ptr<Filter> cv::cuda::createColumnSumFilter(int srcType, int dstType, int ksize, int anchor, int borderMode, Scalar borderVal)
 {
     return new NppColumnSumFilter(srcType, dstType, ksize, anchor, borderMode, borderVal);
 }
diff --git a/modules/gpufilters/test/test_filters.cpp b/modules/gpufilters/test/test_filters.cpp
index 03bea05e6d..cff71ae465 100644
--- a/modules/gpufilters/test/test_filters.cpp
+++ b/modules/gpufilters/test/test_filters.cpp
@@ -70,9 +70,9 @@ namespace
 /////////////////////////////////////////////////////////////////////////////////////////////////
 // Blur
 
-PARAM_TEST_CASE(Blur, cv::gpu::DeviceInfo, cv::Size, MatType, KSize, Anchor, BorderType, UseRoi)
+PARAM_TEST_CASE(Blur, cv::cuda::DeviceInfo, cv::Size, MatType, KSize, Anchor, BorderType, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int type;
     cv::Size ksize;
@@ -90,7 +90,7 @@ PARAM_TEST_CASE(Blur, cv::gpu::DeviceInfo, cv::Size, MatType, KSize, Anchor, Bor
         borderType = GET_PARAM(5);
         useRoi = GET_PARAM(6);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -98,9 +98,9 @@ GPU_TEST_P(Blur, Accuracy)
 {
     cv::Mat src = randomMat(size, type);
 
-    cv::Ptr<cv::gpu::Filter> blurFilter = cv::gpu::createBoxFilter(src.type(), -1, ksize, anchor, borderType);
+    cv::Ptr<cv::cuda::Filter> blurFilter = cv::cuda::createBoxFilter(src.type(), -1, ksize, anchor, borderType);
 
-    cv::gpu::GpuMat dst = createMat(size, type, useRoi);
+    cv::cuda::GpuMat dst = createMat(size, type, useRoi);
     blurFilter->apply(loadMat(src, useRoi), dst);
 
     cv::Mat dst_gold;
@@ -121,9 +121,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Filters, Blur, testing::Combine(
 /////////////////////////////////////////////////////////////////////////////////////////////////
 // Filter2D
 
-PARAM_TEST_CASE(Filter2D, cv::gpu::DeviceInfo, cv::Size, MatType, KSize, Anchor, BorderType, UseRoi)
+PARAM_TEST_CASE(Filter2D, cv::cuda::DeviceInfo, cv::Size, MatType, KSize, Anchor, BorderType, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int type;
     cv::Size ksize;
@@ -141,7 +141,7 @@ PARAM_TEST_CASE(Filter2D, cv::gpu::DeviceInfo, cv::Size, MatType, KSize, Anchor,
         borderType = GET_PARAM(5);
         useRoi = GET_PARAM(6);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -150,9 +150,9 @@ GPU_TEST_P(Filter2D, Accuracy)
     cv::Mat src = randomMat(size, type);
     cv::Mat kernel = randomMat(cv::Size(ksize.width, ksize.height), CV_32FC1, 0.0, 1.0);
 
-    cv::Ptr<cv::gpu::Filter> filter2D = cv::gpu::createLinearFilter(src.type(), -1, kernel, anchor, borderType);
+    cv::Ptr<cv::cuda::Filter> filter2D = cv::cuda::createLinearFilter(src.type(), -1, kernel, anchor, borderType);
 
-    cv::gpu::GpuMat dst = createMat(size, type, useRoi);
+    cv::cuda::GpuMat dst = createMat(size, type, useRoi);
     filter2D->apply(loadMat(src, useRoi), dst);
 
     cv::Mat dst_gold;
@@ -173,9 +173,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Filters, Filter2D, testing::Combine(
 /////////////////////////////////////////////////////////////////////////////////////////////////
 // Laplacian
 
-PARAM_TEST_CASE(Laplacian, cv::gpu::DeviceInfo, cv::Size, MatType, KSize, UseRoi)
+PARAM_TEST_CASE(Laplacian, cv::cuda::DeviceInfo, cv::Size, MatType, KSize, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int type;
     cv::Size ksize;
@@ -189,7 +189,7 @@ PARAM_TEST_CASE(Laplacian, cv::gpu::DeviceInfo, cv::Size, MatType, KSize, UseRoi
         ksize = GET_PARAM(3);
         useRoi = GET_PARAM(4);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -197,9 +197,9 @@ GPU_TEST_P(Laplacian, Accuracy)
 {
     cv::Mat src = randomMat(size, type);
 
-    cv::Ptr<cv::gpu::Filter> laplacian = cv::gpu::createLaplacianFilter(src.type(), -1, ksize.width);
+    cv::Ptr<cv::cuda::Filter> laplacian = cv::cuda::createLaplacianFilter(src.type(), -1, ksize.width);
 
-    cv::gpu::GpuMat dst = createMat(size, type, useRoi);
+    cv::cuda::GpuMat dst = createMat(size, type, useRoi);
     laplacian->apply(loadMat(src, useRoi), dst);
 
     cv::Mat dst_gold;
@@ -218,9 +218,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Filters, Laplacian, testing::Combine(
 /////////////////////////////////////////////////////////////////////////////////////////////////
 // SeparableLinearFilter
 
-PARAM_TEST_CASE(SeparableLinearFilter, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels, KSize, Anchor, BorderType, UseRoi)
+PARAM_TEST_CASE(SeparableLinearFilter, cv::cuda::DeviceInfo, cv::Size, MatDepth, Channels, KSize, Anchor, BorderType, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int depth;
     int cn;
@@ -242,7 +242,7 @@ PARAM_TEST_CASE(SeparableLinearFilter, cv::gpu::DeviceInfo, cv::Size, MatDepth,
         borderType = GET_PARAM(6);
         useRoi = GET_PARAM(7);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
 
         type = CV_MAKE_TYPE(depth, cn);
     }
@@ -254,9 +254,9 @@ GPU_TEST_P(SeparableLinearFilter, Accuracy)
     cv::Mat rowKernel = randomMat(Size(ksize.width, 1), CV_32FC1, 0.0, 1.0);
     cv::Mat columnKernel = randomMat(Size(ksize.height, 1), CV_32FC1, 0.0, 1.0);
 
-    cv::Ptr<cv::gpu::Filter> filter = cv::gpu::createSeparableLinearFilter(src.type(), -1, rowKernel, columnKernel, anchor, borderType);
+    cv::Ptr<cv::cuda::Filter> filter = cv::cuda::createSeparableLinearFilter(src.type(), -1, rowKernel, columnKernel, anchor, borderType);
 
-    cv::gpu::GpuMat dst = createMat(size, type, useRoi);
+    cv::cuda::GpuMat dst = createMat(size, type, useRoi);
     filter->apply(loadMat(src, useRoi), dst);
 
     cv::Mat dst_gold;
@@ -287,9 +287,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Filters, SeparableLinearFilter, testing::Combine(
 /////////////////////////////////////////////////////////////////////////////////////////////////
 // Sobel
 
-PARAM_TEST_CASE(Sobel, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels, KSize, Deriv_X, Deriv_Y, BorderType, UseRoi)
+PARAM_TEST_CASE(Sobel, cv::cuda::DeviceInfo, cv::Size, MatDepth, Channels, KSize, Deriv_X, Deriv_Y, BorderType, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int depth;
     int cn;
@@ -313,7 +313,7 @@ PARAM_TEST_CASE(Sobel, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels, KSize,
         borderType = GET_PARAM(7);
         useRoi = GET_PARAM(8);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
 
         type = CV_MAKE_TYPE(depth, cn);
     }
@@ -326,9 +326,9 @@ GPU_TEST_P(Sobel, Accuracy)
 
     cv::Mat src = randomMat(size, type);
 
-    cv::Ptr<cv::gpu::Filter> sobel = cv::gpu::createSobelFilter(src.type(), -1, dx, dy, ksize.width, 1.0, borderType);
+    cv::Ptr<cv::cuda::Filter> sobel = cv::cuda::createSobelFilter(src.type(), -1, dx, dy, ksize.width, 1.0, borderType);
 
-    cv::gpu::GpuMat dst = createMat(size, type, useRoi);
+    cv::cuda::GpuMat dst = createMat(size, type, useRoi);
     sobel->apply(loadMat(src, useRoi), dst);
 
     cv::Mat dst_gold;
@@ -354,9 +354,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Filters, Sobel, testing::Combine(
 /////////////////////////////////////////////////////////////////////////////////////////////////
 // Scharr
 
-PARAM_TEST_CASE(Scharr, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels, Deriv_X, Deriv_Y, BorderType, UseRoi)
+PARAM_TEST_CASE(Scharr, cv::cuda::DeviceInfo, cv::Size, MatDepth, Channels, Deriv_X, Deriv_Y, BorderType, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int depth;
     int cn;
@@ -378,7 +378,7 @@ PARAM_TEST_CASE(Scharr, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels, Deriv
         borderType = GET_PARAM(6);
         useRoi = GET_PARAM(7);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
 
         type = CV_MAKE_TYPE(depth, cn);
     }
@@ -391,9 +391,9 @@ GPU_TEST_P(Scharr, Accuracy)
 
     cv::Mat src = randomMat(size, type);
 
-    cv::Ptr<cv::gpu::Filter> scharr = cv::gpu::createScharrFilter(src.type(), -1, dx, dy, 1.0, borderType);
+    cv::Ptr<cv::cuda::Filter> scharr = cv::cuda::createScharrFilter(src.type(), -1, dx, dy, 1.0, borderType);
 
-    cv::gpu::GpuMat dst = createMat(size, type, useRoi);
+    cv::cuda::GpuMat dst = createMat(size, type, useRoi);
     scharr->apply(loadMat(src, useRoi), dst);
 
     cv::Mat dst_gold;
@@ -418,9 +418,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Filters, Scharr, testing::Combine(
 /////////////////////////////////////////////////////////////////////////////////////////////////
 // GaussianBlur
 
-PARAM_TEST_CASE(GaussianBlur, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels, KSize, BorderType, UseRoi)
+PARAM_TEST_CASE(GaussianBlur, cv::cuda::DeviceInfo, cv::Size, MatDepth, Channels, KSize, BorderType, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int depth;
     int cn;
@@ -440,7 +440,7 @@ PARAM_TEST_CASE(GaussianBlur, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels,
         borderType = GET_PARAM(5);
         useRoi = GET_PARAM(6);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
 
         type = CV_MAKE_TYPE(depth, cn);
     }
@@ -452,9 +452,9 @@ GPU_TEST_P(GaussianBlur, Accuracy)
     double sigma1 = randomDouble(0.1, 1.0);
     double sigma2 = randomDouble(0.1, 1.0);
 
-    cv::Ptr<cv::gpu::Filter> gauss = cv::gpu::createGaussianFilter(src.type(), -1, ksize, sigma1, sigma2, borderType);
+    cv::Ptr<cv::cuda::Filter> gauss = cv::cuda::createGaussianFilter(src.type(), -1, ksize, sigma1, sigma2, borderType);
 
-    cv::gpu::GpuMat dst = createMat(size, type, useRoi);
+    cv::cuda::GpuMat dst = createMat(size, type, useRoi);
     gauss->apply(loadMat(src, useRoi), dst);
 
     cv::Mat dst_gold;
@@ -492,9 +492,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Filters, GaussianBlur, testing::Combine(
 /////////////////////////////////////////////////////////////////////////////////////////////////
 // Erode
 
-PARAM_TEST_CASE(Erode, cv::gpu::DeviceInfo, cv::Size, MatType, Anchor, Iterations, UseRoi)
+PARAM_TEST_CASE(Erode, cv::cuda::DeviceInfo, cv::Size, MatType, Anchor, Iterations, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int type;
     cv::Point anchor;
@@ -510,7 +510,7 @@ PARAM_TEST_CASE(Erode, cv::gpu::DeviceInfo, cv::Size, MatType, Anchor, Iteration
         iterations = GET_PARAM(4);
         useRoi = GET_PARAM(5);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -519,9 +519,9 @@ GPU_TEST_P(Erode, Accuracy)
     cv::Mat src = randomMat(size, type);
     cv::Mat kernel = cv::Mat::ones(3, 3, CV_8U);
 
-    cv::Ptr<cv::gpu::Filter> erode = cv::gpu::createMorphologyFilter(cv::MORPH_ERODE, src.type(), kernel, anchor, iterations);
+    cv::Ptr<cv::cuda::Filter> erode = cv::cuda::createMorphologyFilter(cv::MORPH_ERODE, src.type(), kernel, anchor, iterations);
 
-    cv::gpu::GpuMat dst = createMat(size, type, useRoi);
+    cv::cuda::GpuMat dst = createMat(size, type, useRoi);
     erode->apply(loadMat(src, useRoi), dst);
 
     cv::Mat dst_gold;
@@ -543,9 +543,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Filters, Erode, testing::Combine(
 /////////////////////////////////////////////////////////////////////////////////////////////////
 // Dilate
 
-PARAM_TEST_CASE(Dilate, cv::gpu::DeviceInfo, cv::Size, MatType, Anchor, Iterations, UseRoi)
+PARAM_TEST_CASE(Dilate, cv::cuda::DeviceInfo, cv::Size, MatType, Anchor, Iterations, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int type;
     cv::Point anchor;
@@ -561,7 +561,7 @@ PARAM_TEST_CASE(Dilate, cv::gpu::DeviceInfo, cv::Size, MatType, Anchor, Iteratio
         iterations = GET_PARAM(4);
         useRoi = GET_PARAM(5);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -570,9 +570,9 @@ GPU_TEST_P(Dilate, Accuracy)
     cv::Mat src = randomMat(size, type);
     cv::Mat kernel = cv::Mat::ones(3, 3, CV_8U);
 
-    cv::Ptr<cv::gpu::Filter> dilate = cv::gpu::createMorphologyFilter(cv::MORPH_DILATE, src.type(), kernel, anchor, iterations);
+    cv::Ptr<cv::cuda::Filter> dilate = cv::cuda::createMorphologyFilter(cv::MORPH_DILATE, src.type(), kernel, anchor, iterations);
 
-    cv::gpu::GpuMat dst = createMat(size, type, useRoi);
+    cv::cuda::GpuMat dst = createMat(size, type, useRoi);
     dilate->apply(loadMat(src, useRoi), dst);
 
     cv::Mat dst_gold;
@@ -596,9 +596,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Filters, Dilate, testing::Combine(
 
 CV_ENUM(MorphOp, MORPH_OPEN, MORPH_CLOSE, MORPH_GRADIENT, MORPH_TOPHAT, MORPH_BLACKHAT)
 
-PARAM_TEST_CASE(MorphEx, cv::gpu::DeviceInfo, cv::Size, MatType, MorphOp, Anchor, Iterations, UseRoi)
+PARAM_TEST_CASE(MorphEx, cv::cuda::DeviceInfo, cv::Size, MatType, MorphOp, Anchor, Iterations, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int type;
     int morphOp;
@@ -616,7 +616,7 @@ PARAM_TEST_CASE(MorphEx, cv::gpu::DeviceInfo, cv::Size, MatType, MorphOp, Anchor
         iterations = GET_PARAM(5);
         useRoi = GET_PARAM(6);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -625,9 +625,9 @@ GPU_TEST_P(MorphEx, Accuracy)
     cv::Mat src = randomMat(size, type);
     cv::Mat kernel = cv::Mat::ones(3, 3, CV_8U);
 
-    cv::Ptr<cv::gpu::Filter> morph = cv::gpu::createMorphologyFilter(morphOp, src.type(), kernel, anchor, iterations);
+    cv::Ptr<cv::cuda::Filter> morph = cv::cuda::createMorphologyFilter(morphOp, src.type(), kernel, anchor, iterations);
 
-    cv::gpu::GpuMat dst = createMat(size, type, useRoi);
+    cv::cuda::GpuMat dst = createMat(size, type, useRoi);
     morph->apply(loadMat(src, useRoi), dst);
 
     cv::Mat dst_gold;
diff --git a/modules/gpuimgproc/include/opencv2/gpuimgproc.hpp b/modules/gpuimgproc/include/opencv2/gpuimgproc.hpp
index f0a0f1260a..edeb8a53df 100644
--- a/modules/gpuimgproc/include/opencv2/gpuimgproc.hpp
+++ b/modules/gpuimgproc/include/opencv2/gpuimgproc.hpp
@@ -50,7 +50,7 @@
 #include "opencv2/core/gpu.hpp"
 #include "opencv2/imgproc.hpp"
 
-namespace cv { namespace gpu {
+namespace cv { namespace cuda {
 
 /////////////////////////// Color Processing ///////////////////////////
 
@@ -106,7 +106,7 @@ CV_EXPORTS void equalizeHist(InputArray src, OutputArray dst, InputOutputArray b
 static inline void equalizeHist(InputArray src, OutputArray dst, Stream& stream = Stream::Null())
 {
     GpuMat buf;
-    gpu::equalizeHist(src, dst, buf, stream);
+    cuda::equalizeHist(src, dst, buf, stream);
 }
 
 class CV_EXPORTS CLAHE : public cv::CLAHE
@@ -115,7 +115,7 @@ public:
     using cv::CLAHE::apply;
     virtual void apply(InputArray src, OutputArray dst, Stream& stream) = 0;
 };
-CV_EXPORTS Ptr<gpu::CLAHE> createCLAHE(double clipLimit = 40.0, Size tileGridSize = Size(8, 8));
+CV_EXPORTS Ptr<cuda::CLAHE> createCLAHE(double clipLimit = 40.0, Size tileGridSize = Size(8, 8));
 
 //! Compute levels with even distribution. levels will have 1 row and nLevels cols and CV_32SC1 type.
 CV_EXPORTS void evenLevels(OutputArray levels, int nLevels, int lowerLevel, int upperLevel);
@@ -128,7 +128,7 @@ CV_EXPORTS void histEven(InputArray src, OutputArray hist, InputOutputArray buf,
 static inline void histEven(InputArray src, OutputArray hist, int histSize, int lowerLevel, int upperLevel, Stream& stream = Stream::Null())
 {
     GpuMat buf;
-    gpu::histEven(src, hist, buf, histSize, lowerLevel, upperLevel, stream);
+    cuda::histEven(src, hist, buf, histSize, lowerLevel, upperLevel, stream);
 }
 
 //! Calculates histogram with evenly distributed bins for four-channel source.
@@ -140,7 +140,7 @@ CV_EXPORTS void histEven(InputArray src, GpuMat hist[4], InputOutputArray buf, i
 static inline void histEven(InputArray src, GpuMat hist[4], int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream = Stream::Null())
 {
     GpuMat buf;
-    gpu::histEven(src, hist, buf, histSize, lowerLevel, upperLevel, stream);
+    cuda::histEven(src, hist, buf, histSize, lowerLevel, upperLevel, stream);
 }
 
 //! Calculates histogram with bins determined by levels array.
@@ -152,7 +152,7 @@ CV_EXPORTS void histRange(InputArray src, OutputArray hist, InputArray levels, I
 static inline void histRange(InputArray src, OutputArray hist, InputArray levels, Stream& stream = Stream::Null())
 {
     GpuMat buf;
-    gpu::histRange(src, hist, levels, buf, stream);
+    cuda::histRange(src, hist, levels, buf, stream);
 }
 
 //! Calculates histogram with bins determined by levels array.
@@ -165,7 +165,7 @@ CV_EXPORTS void histRange(InputArray src, GpuMat hist[4], const GpuMat levels[4]
 static inline void histRange(InputArray src, GpuMat hist[4], const GpuMat levels[4], Stream& stream = Stream::Null())
 {
     GpuMat buf;
-    gpu::histRange(src, hist, levels, buf, stream);
+    cuda::histRange(src, hist, levels, buf, stream);
 }
 
 //////////////////////////////// Canny ////////////////////////////////
@@ -357,6 +357,6 @@ CV_EXPORTS void bilateralFilter(InputArray src, OutputArray dst, int kernel_size
 CV_EXPORTS void blendLinear(InputArray img1, InputArray img2, InputArray weights1, InputArray weights2,
                             OutputArray result, Stream& stream = Stream::Null());
 
-}} // namespace cv { namespace gpu {
+}} // namespace cv { namespace cuda {
 
 #endif /* __OPENCV_GPUIMGPROC_HPP__ */
diff --git a/modules/gpuimgproc/perf/perf_bilateral_filter.cpp b/modules/gpuimgproc/perf/perf_bilateral_filter.cpp
index 1787fdc096..6805bf91bb 100644
--- a/modules/gpuimgproc/perf/perf_bilateral_filter.cpp
+++ b/modules/gpuimgproc/perf/perf_bilateral_filter.cpp
@@ -75,10 +75,10 @@ PERF_TEST_P(Sz_Depth_Cn_KernelSz, BilateralFilter,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::bilateralFilter(d_src, dst, kernel_size, sigma_color, sigma_spatial, borderMode);
+        TEST_CYCLE() cv::cuda::bilateralFilter(d_src, dst, kernel_size, sigma_color, sigma_spatial, borderMode);
 
         GPU_SANITY_CHECK(dst);
     }
diff --git a/modules/gpuimgproc/perf/perf_blend.cpp b/modules/gpuimgproc/perf/perf_blend.cpp
index 5d43817684..1c14f1f352 100644
--- a/modules/gpuimgproc/perf/perf_blend.cpp
+++ b/modules/gpuimgproc/perf/perf_blend.cpp
@@ -69,13 +69,13 @@ PERF_TEST_P(Sz_Depth_Cn, BlendLinear,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_img1(img1);
-        const cv::gpu::GpuMat d_img2(img2);
-        const cv::gpu::GpuMat d_weights1(weights1);
-        const cv::gpu::GpuMat d_weights2(weights2);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_img1(img1);
+        const cv::cuda::GpuMat d_img2(img2);
+        const cv::cuda::GpuMat d_weights1(weights1);
+        const cv::cuda::GpuMat d_weights2(weights2);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::blendLinear(d_img1, d_img2, d_weights1, d_weights2, dst);
+        TEST_CYCLE() cv::cuda::blendLinear(d_img1, d_img2, d_weights1, d_weights2, dst);
 
         GPU_SANITY_CHECK(dst);
     }
diff --git a/modules/gpuimgproc/perf/perf_canny.cpp b/modules/gpuimgproc/perf/perf_canny.cpp
index 2bbf70a496..4716d10374 100644
--- a/modules/gpuimgproc/perf/perf_canny.cpp
+++ b/modules/gpuimgproc/perf/perf_canny.cpp
@@ -68,10 +68,10 @@ PERF_TEST_P(Image_AppertureSz_L2gradient, Canny,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_image(image);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_image(image);
+        cv::cuda::GpuMat dst;
 
-        cv::Ptr<cv::gpu::CannyEdgeDetector> canny = cv::gpu::createCannyEdgeDetector(low_thresh, high_thresh, apperture_size, useL2gradient);
+        cv::Ptr<cv::cuda::CannyEdgeDetector> canny = cv::cuda::createCannyEdgeDetector(low_thresh, high_thresh, apperture_size, useL2gradient);
 
         TEST_CYCLE() canny->detect(d_image, dst);
 
diff --git a/modules/gpuimgproc/perf/perf_color.cpp b/modules/gpuimgproc/perf/perf_color.cpp
index 1df324816e..72cb4001af 100644
--- a/modules/gpuimgproc/perf/perf_color.cpp
+++ b/modules/gpuimgproc/perf/perf_color.cpp
@@ -85,10 +85,10 @@ PERF_TEST_P(Sz_Depth_Code, CvtColor,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::cvtColor(d_src, dst, info.code, info.dcn);
+        TEST_CYCLE() cv::cuda::cvtColor(d_src, dst, info.code, info.dcn);
 
         GPU_SANITY_CHECK(dst, 1e-4);
     }
@@ -124,10 +124,10 @@ PERF_TEST_P(Sz_Depth_Code, CvtColorBayer,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::cvtColor(d_src, dst, info.code, info.dcn);
+        TEST_CYCLE() cv::cuda::cvtColor(d_src, dst, info.code, info.dcn);
 
         GPU_SANITY_CHECK(dst);
     }
@@ -147,8 +147,8 @@ PERF_TEST_P(Sz_Depth_Code, CvtColorBayer,
 CV_ENUM(DemosaicingCode,
         cv::COLOR_BayerBG2BGR, cv::COLOR_BayerGB2BGR, cv::COLOR_BayerRG2BGR, cv::COLOR_BayerGR2BGR,
         cv::COLOR_BayerBG2GRAY, cv::COLOR_BayerGB2GRAY, cv::COLOR_BayerRG2GRAY, cv::COLOR_BayerGR2GRAY,
-        cv::gpu::COLOR_BayerBG2BGR_MHT, cv::gpu::COLOR_BayerGB2BGR_MHT, cv::gpu::COLOR_BayerRG2BGR_MHT, cv::gpu::COLOR_BayerGR2BGR_MHT,
-        cv::gpu::COLOR_BayerBG2GRAY_MHT, cv::gpu::COLOR_BayerGB2GRAY_MHT, cv::gpu::COLOR_BayerRG2GRAY_MHT, cv::gpu::COLOR_BayerGR2GRAY_MHT)
+        cv::cuda::COLOR_BayerBG2BGR_MHT, cv::cuda::COLOR_BayerGB2BGR_MHT, cv::cuda::COLOR_BayerRG2BGR_MHT, cv::cuda::COLOR_BayerGR2BGR_MHT,
+        cv::cuda::COLOR_BayerBG2GRAY_MHT, cv::cuda::COLOR_BayerGB2GRAY_MHT, cv::cuda::COLOR_BayerRG2GRAY_MHT, cv::cuda::COLOR_BayerGR2GRAY_MHT)
 
 DEF_PARAM_TEST(Sz_Code, cv::Size, DemosaicingCode);
 
@@ -164,10 +164,10 @@ PERF_TEST_P(Sz_Code, Demosaicing,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::demosaicing(d_src, dst, code);
+        TEST_CYCLE() cv::cuda::demosaicing(d_src, dst, code);
 
         GPU_SANITY_CHECK(dst);
     }
@@ -203,9 +203,9 @@ PERF_TEST_P(Sz, SwapChannels,
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat dst(src);
+        cv::cuda::GpuMat dst(src);
 
-        TEST_CYCLE() cv::gpu::swapChannels(dst, dstOrder);
+        TEST_CYCLE() cv::cuda::swapChannels(dst, dstOrder);
 
         GPU_SANITY_CHECK(dst);
     }
@@ -218,7 +218,7 @@ PERF_TEST_P(Sz, SwapChannels,
 //////////////////////////////////////////////////////////////////////
 // AlphaComp
 
-CV_ENUM(AlphaOp, cv::gpu::ALPHA_OVER, cv::gpu::ALPHA_IN, cv::gpu::ALPHA_OUT, cv::gpu::ALPHA_ATOP, cv::gpu::ALPHA_XOR, cv::gpu::ALPHA_PLUS, cv::gpu::ALPHA_OVER_PREMUL, cv::gpu::ALPHA_IN_PREMUL, cv::gpu::ALPHA_OUT_PREMUL, cv::gpu::ALPHA_ATOP_PREMUL, cv::gpu::ALPHA_XOR_PREMUL, cv::gpu::ALPHA_PLUS_PREMUL, cv::gpu::ALPHA_PREMUL)
+CV_ENUM(AlphaOp, cv::cuda::ALPHA_OVER, cv::cuda::ALPHA_IN, cv::cuda::ALPHA_OUT, cv::cuda::ALPHA_ATOP, cv::cuda::ALPHA_XOR, cv::cuda::ALPHA_PLUS, cv::cuda::ALPHA_OVER_PREMUL, cv::cuda::ALPHA_IN_PREMUL, cv::cuda::ALPHA_OUT_PREMUL, cv::cuda::ALPHA_ATOP_PREMUL, cv::cuda::ALPHA_XOR_PREMUL, cv::cuda::ALPHA_PLUS_PREMUL, cv::cuda::ALPHA_PREMUL)
 
 DEF_PARAM_TEST(Sz_Type_Op, cv::Size, MatType, AlphaOp);
 
@@ -237,11 +237,11 @@ PERF_TEST_P(Sz_Type_Op, AlphaComp,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_img1(img1);
-        const cv::gpu::GpuMat d_img2(img2);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_img1(img1);
+        const cv::cuda::GpuMat d_img2(img2);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::alphaComp(d_img1, d_img2, dst, alpha_op);
+        TEST_CYCLE() cv::cuda::alphaComp(d_img1, d_img2, dst, alpha_op);
 
         GPU_SANITY_CHECK(dst, 1e-3, ERROR_RELATIVE);
     }
diff --git a/modules/gpuimgproc/perf/perf_corners.cpp b/modules/gpuimgproc/perf/perf_corners.cpp
index a0c1f8d30f..2a2445bc28 100644
--- a/modules/gpuimgproc/perf/perf_corners.cpp
+++ b/modules/gpuimgproc/perf/perf_corners.cpp
@@ -73,10 +73,10 @@ PERF_TEST_P(Image_Type_Border_BlockSz_ApertureSz, CornerHarris,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_img(img);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_img(img);
+        cv::cuda::GpuMat dst;
 
-        cv::Ptr<cv::gpu::CornernessCriteria> harris = cv::gpu::createHarrisCorner(img.type(), blockSize, apertureSize, k, borderMode);
+        cv::Ptr<cv::cuda::CornernessCriteria> harris = cv::cuda::createHarrisCorner(img.type(), blockSize, apertureSize, k, borderMode);
 
         TEST_CYCLE() harris->compute(d_img, dst);
 
@@ -115,10 +115,10 @@ PERF_TEST_P(Image_Type_Border_BlockSz_ApertureSz, CornerMinEigenVal,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_img(img);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_img(img);
+        cv::cuda::GpuMat dst;
 
-        cv::Ptr<cv::gpu::CornernessCriteria> minEigenVal = cv::gpu::createMinEigenValCorner(img.type(), blockSize, apertureSize, borderMode);
+        cv::Ptr<cv::cuda::CornernessCriteria> minEigenVal = cv::cuda::createMinEigenValCorner(img.type(), blockSize, apertureSize, borderMode);
 
         TEST_CYCLE() minEigenVal->compute(d_img, dst);
 
diff --git a/modules/gpuimgproc/perf/perf_gftt.cpp b/modules/gpuimgproc/perf/perf_gftt.cpp
index ed8d6ac16d..5b6f0f4d5c 100644
--- a/modules/gpuimgproc/perf/perf_gftt.cpp
+++ b/modules/gpuimgproc/perf/perf_gftt.cpp
@@ -66,10 +66,10 @@ PERF_TEST_P(Image_MinDistance, GoodFeaturesToTrack,
 
     if (PERF_RUN_GPU())
     {
-        cv::Ptr<cv::gpu::CornersDetector> d_detector = cv::gpu::createGoodFeaturesToTrackDetector(image.type(), maxCorners, qualityLevel, minDistance);
+        cv::Ptr<cv::cuda::CornersDetector> d_detector = cv::cuda::createGoodFeaturesToTrackDetector(image.type(), maxCorners, qualityLevel, minDistance);
 
-        const cv::gpu::GpuMat d_image(image);
-        cv::gpu::GpuMat pts;
+        const cv::cuda::GpuMat d_image(image);
+        cv::cuda::GpuMat pts;
 
         TEST_CYCLE() d_detector->detect(d_image, pts);
 
diff --git a/modules/gpuimgproc/perf/perf_histogram.cpp b/modules/gpuimgproc/perf/perf_histogram.cpp
index d8def54ff1..49560d339f 100644
--- a/modules/gpuimgproc/perf/perf_histogram.cpp
+++ b/modules/gpuimgproc/perf/perf_histogram.cpp
@@ -61,11 +61,11 @@ PERF_TEST_P(Sz_Depth, HistEvenC1,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-        cv::gpu::GpuMat d_buf;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
+        cv::cuda::GpuMat d_buf;
 
-        TEST_CYCLE() cv::gpu::histEven(d_src, dst, d_buf, 30, 0, 180);
+        TEST_CYCLE() cv::cuda::histEven(d_src, dst, d_buf, 30, 0, 180);
 
         GPU_SANITY_CHECK(dst);
     }
@@ -104,11 +104,11 @@ PERF_TEST_P(Sz_Depth, HistEvenC4,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_hist[4];
-        cv::gpu::GpuMat d_buf;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat d_hist[4];
+        cv::cuda::GpuMat d_buf;
 
-        TEST_CYCLE() cv::gpu::histEven(d_src, d_hist, d_buf, histSize, lowerLevel, upperLevel);
+        TEST_CYCLE() cv::cuda::histEven(d_src, d_hist, d_buf, histSize, lowerLevel, upperLevel);
 
         cv::Mat cpu_hist0, cpu_hist1, cpu_hist2, cpu_hist3;
         d_hist[0].download(cpu_hist0);
@@ -139,10 +139,10 @@ PERF_TEST_P(Sz, CalcHist,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::calcHist(d_src, dst);
+        TEST_CYCLE() cv::cuda::calcHist(d_src, dst);
 
         GPU_SANITY_CHECK(dst);
     }
@@ -165,11 +165,11 @@ PERF_TEST_P(Sz, EqualizeHist,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
-        cv::gpu::GpuMat d_buf;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
+        cv::cuda::GpuMat d_buf;
 
-        TEST_CYCLE() cv::gpu::equalizeHist(d_src, dst, d_buf);
+        TEST_CYCLE() cv::cuda::equalizeHist(d_src, dst, d_buf);
 
         GPU_SANITY_CHECK(dst);
     }
@@ -200,9 +200,9 @@ PERF_TEST_P(Sz_ClipLimit, CLAHE,
 
     if (PERF_RUN_GPU())
     {
-        cv::Ptr<cv::gpu::CLAHE> clahe = cv::gpu::createCLAHE(clipLimit);
-        cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        cv::Ptr<cv::cuda::CLAHE> clahe = cv::cuda::createCLAHE(clipLimit);
+        cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
         TEST_CYCLE() clahe->apply(d_src, dst);
 
diff --git a/modules/gpuimgproc/perf/perf_hough.cpp b/modules/gpuimgproc/perf/perf_hough.cpp
index cce8e7432e..3b62b86858 100644
--- a/modules/gpuimgproc/perf/perf_hough.cpp
+++ b/modules/gpuimgproc/perf/perf_hough.cpp
@@ -101,10 +101,10 @@ PERF_TEST_P(Sz, HoughLines,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_lines;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat d_lines;
 
-        cv::Ptr<cv::gpu::HoughLinesDetector> hough = cv::gpu::createHoughLinesDetector(rho, theta, threshold);
+        cv::Ptr<cv::cuda::HoughLinesDetector> hough = cv::cuda::createHoughLinesDetector(rho, theta, threshold);
 
         TEST_CYCLE() hough->detect(d_src, d_lines);
 
@@ -150,10 +150,10 @@ PERF_TEST_P(Image, HoughLinesP,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_mask(mask);
-        cv::gpu::GpuMat d_lines;
+        const cv::cuda::GpuMat d_mask(mask);
+        cv::cuda::GpuMat d_lines;
 
-        cv::Ptr<cv::gpu::HoughSegmentDetector> hough = cv::gpu::createHoughSegmentDetector(rho, theta, minLineLenght, maxLineGap);
+        cv::Ptr<cv::cuda::HoughSegmentDetector> hough = cv::cuda::createHoughSegmentDetector(rho, theta, minLineLenght, maxLineGap);
 
         TEST_CYCLE() hough->detect(d_mask, d_lines);
 
@@ -201,10 +201,10 @@ PERF_TEST_P(Sz_Dp_MinDist, HoughCircles,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat d_circles;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat d_circles;
 
-        cv::Ptr<cv::gpu::HoughCirclesDetector> houghCircles = cv::gpu::createHoughCirclesDetector(dp, minDist, cannyThreshold, votesThreshold, minRadius, maxRadius);
+        cv::Ptr<cv::cuda::HoughCirclesDetector> houghCircles = cv::cuda::createHoughCirclesDetector(dp, minDist, cannyThreshold, votesThreshold, minRadius, maxRadius);
 
         TEST_CYCLE() houghCircles->detect(d_src, d_circles);
 
@@ -248,14 +248,14 @@ PERF_TEST_P(Sz, GeneralizedHoughBallard, GPU_TYPICAL_MAT_SIZES)
 
     if (PERF_RUN_GPU())
     {
-        cv::Ptr<cv::GeneralizedHoughBallard> alg = cv::gpu::createGeneralizedHoughBallard();
+        cv::Ptr<cv::GeneralizedHoughBallard> alg = cv::cuda::createGeneralizedHoughBallard();
 
-        const cv::gpu::GpuMat d_edges(edges);
-        const cv::gpu::GpuMat d_dx(dx);
-        const cv::gpu::GpuMat d_dy(dy);
-        cv::gpu::GpuMat positions;
+        const cv::cuda::GpuMat d_edges(edges);
+        const cv::cuda::GpuMat d_dx(dx);
+        const cv::cuda::GpuMat d_dy(dy);
+        cv::cuda::GpuMat positions;
 
-        alg->setTemplate(cv::gpu::GpuMat(templ));
+        alg->setTemplate(cv::cuda::GpuMat(templ));
 
         TEST_CYCLE() alg->detect(d_edges, d_dx, d_dy, positions);
 
@@ -317,16 +317,16 @@ PERF_TEST_P(Sz, GeneralizedHoughGuil, GPU_TYPICAL_MAT_SIZES)
 
     if (PERF_RUN_GPU())
     {
-        cv::Ptr<cv::GeneralizedHoughGuil> alg = cv::gpu::createGeneralizedHoughGuil();
+        cv::Ptr<cv::GeneralizedHoughGuil> alg = cv::cuda::createGeneralizedHoughGuil();
         alg->setMaxAngle(90.0);
         alg->setAngleStep(2.0);
 
-        const cv::gpu::GpuMat d_edges(edges);
-        const cv::gpu::GpuMat d_dx(dx);
-        const cv::gpu::GpuMat d_dy(dy);
-        cv::gpu::GpuMat positions;
+        const cv::cuda::GpuMat d_edges(edges);
+        const cv::cuda::GpuMat d_dx(dx);
+        const cv::cuda::GpuMat d_dy(dy);
+        cv::cuda::GpuMat positions;
 
-        alg->setTemplate(cv::gpu::GpuMat(templ));
+        alg->setTemplate(cv::cuda::GpuMat(templ));
 
         TEST_CYCLE() alg->detect(d_edges, d_dx, d_dy, positions);
 
diff --git a/modules/gpuimgproc/perf/perf_match_template.cpp b/modules/gpuimgproc/perf/perf_match_template.cpp
index 35f36596c6..8ebae61a07 100644
--- a/modules/gpuimgproc/perf/perf_match_template.cpp
+++ b/modules/gpuimgproc/perf/perf_match_template.cpp
@@ -72,11 +72,11 @@ PERF_TEST_P(Sz_TemplateSz_Cn_Method, MatchTemplate8U,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_image(image);
-        const cv::gpu::GpuMat d_templ(templ);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_image(image);
+        const cv::cuda::GpuMat d_templ(templ);
+        cv::cuda::GpuMat dst;
 
-        cv::Ptr<cv::gpu::TemplateMatching> alg = cv::gpu::createTemplateMatching(image.type(), method);
+        cv::Ptr<cv::cuda::TemplateMatching> alg = cv::cuda::createTemplateMatching(image.type(), method);
 
         TEST_CYCLE() alg->match(d_image, d_templ, dst);
 
@@ -114,11 +114,11 @@ PERF_TEST_P(Sz_TemplateSz_Cn_Method, MatchTemplate32F,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_image(image);
-        const cv::gpu::GpuMat d_templ(templ);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_image(image);
+        const cv::cuda::GpuMat d_templ(templ);
+        cv::cuda::GpuMat dst;
 
-        cv::Ptr<cv::gpu::TemplateMatching> alg = cv::gpu::createTemplateMatching(image.type(), method);
+        cv::Ptr<cv::cuda::TemplateMatching> alg = cv::cuda::createTemplateMatching(image.type(), method);
 
         TEST_CYCLE() alg->match(d_image, d_templ, dst);
 
diff --git a/modules/gpuimgproc/perf/perf_mean_shift.cpp b/modules/gpuimgproc/perf/perf_mean_shift.cpp
index 0ac0b71c44..2c04b7eb93 100644
--- a/modules/gpuimgproc/perf/perf_mean_shift.cpp
+++ b/modules/gpuimgproc/perf/perf_mean_shift.cpp
@@ -67,10 +67,10 @@ PERF_TEST_P(Image, MeanShiftFiltering,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(rgba);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(rgba);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::meanShiftFiltering(d_src, dst, sp, sr);
+        TEST_CYCLE() cv::cuda::meanShiftFiltering(d_src, dst, sp, sr);
 
         GPU_SANITY_CHECK(dst);
     }
@@ -103,11 +103,11 @@ PERF_TEST_P(Image, MeanShiftProc,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(rgba);
-        cv::gpu::GpuMat dstr;
-        cv::gpu::GpuMat dstsp;
+        const cv::cuda::GpuMat d_src(rgba);
+        cv::cuda::GpuMat dstr;
+        cv::cuda::GpuMat dstsp;
 
-        TEST_CYCLE() cv::gpu::meanShiftProc(d_src, dstr, dstsp, sp, sr);
+        TEST_CYCLE() cv::cuda::meanShiftProc(d_src, dstr, dstsp, sp, sr);
 
         GPU_SANITY_CHECK(dstr);
         GPU_SANITY_CHECK(dstsp);
@@ -138,10 +138,10 @@ PERF_TEST_P(Image, MeanShiftSegmentation,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(rgba);
+        const cv::cuda::GpuMat d_src(rgba);
         cv::Mat dst;
 
-        TEST_CYCLE() cv::gpu::meanShiftSegmentation(d_src, dst, sp, sr, minsize);
+        TEST_CYCLE() cv::cuda::meanShiftSegmentation(d_src, dst, sp, sr, minsize);
 
         GPU_SANITY_CHECK(dst);
     }
diff --git a/modules/gpuimgproc/src/bilateral_filter.cpp b/modules/gpuimgproc/src/bilateral_filter.cpp
index b9d0b811e4..e90a6ddc07 100644
--- a/modules/gpuimgproc/src/bilateral_filter.cpp
+++ b/modules/gpuimgproc/src/bilateral_filter.cpp
@@ -43,15 +43,15 @@
 #include "precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
 
-void cv::gpu::bilateralFilter(InputArray, OutputArray, int, float, float, int, Stream&) { throw_no_cuda(); }
+void cv::cuda::bilateralFilter(InputArray, OutputArray, int, float, float, int, Stream&) { throw_no_cuda(); }
 
 #else
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace imgproc
     {
@@ -60,9 +60,9 @@ namespace cv { namespace gpu { namespace cudev
     }
 }}}
 
-void cv::gpu::bilateralFilter(InputArray _src, OutputArray _dst, int kernel_size, float sigma_color, float sigma_spatial, int borderMode, Stream& stream)
+void cv::cuda::bilateralFilter(InputArray _src, OutputArray _dst, int kernel_size, float sigma_color, float sigma_spatial, int borderMode, Stream& stream)
 {
-    using cv::gpu::cudev::imgproc::bilateral_filter_gpu;
+    using cv::cuda::cudev::imgproc::bilateral_filter_gpu;
 
     typedef void (*func_t)(const PtrStepSzb& src, PtrStepSzb dst, int kernel_size, float sigma_spatial, float sigma_color, int borderMode, cudaStream_t s);
 
diff --git a/modules/gpuimgproc/src/blend.cpp b/modules/gpuimgproc/src/blend.cpp
index 71c72a7153..b1515a080c 100644
--- a/modules/gpuimgproc/src/blend.cpp
+++ b/modules/gpuimgproc/src/blend.cpp
@@ -43,18 +43,18 @@
 #include "precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
 
-void cv::gpu::blendLinear(InputArray, InputArray, InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda(); }
+void cv::cuda::blendLinear(InputArray, InputArray, InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda(); }
 
 #else
 
 ////////////////////////////////////////////////////////////////////////
 // blendLinear
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace blend
     {
@@ -65,9 +65,9 @@ namespace cv { namespace gpu { namespace cudev
     }
 }}}
 
-using namespace ::cv::gpu::cudev::blend;
+using namespace ::cv::cuda::cudev::blend;
 
-void cv::gpu::blendLinear(InputArray _img1, InputArray _img2, InputArray _weights1, InputArray _weights2,
+void cv::cuda::blendLinear(InputArray _img1, InputArray _img2, InputArray _weights1, InputArray _weights2,
                           OutputArray _result, Stream& stream)
 {
     GpuMat img1 = _img1.getGpuMat();
diff --git a/modules/gpuimgproc/src/canny.cpp b/modules/gpuimgproc/src/canny.cpp
index d6f5e6b032..7d7ae80c34 100644
--- a/modules/gpuimgproc/src/canny.cpp
+++ b/modules/gpuimgproc/src/canny.cpp
@@ -43,11 +43,11 @@
 #include "precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
 
-Ptr<CannyEdgeDetector> cv::gpu::createCannyEdgeDetector(double, double, int, bool) { throw_no_cuda(); return Ptr<CannyEdgeDetector>(); }
+Ptr<CannyEdgeDetector> cv::cuda::createCannyEdgeDetector(double, double, int, bool) { throw_no_cuda(); return Ptr<CannyEdgeDetector>(); }
 
 #else /* !defined (HAVE_CUDA) */
 
@@ -200,8 +200,8 @@ namespace
 #ifdef HAVE_OPENCV_GPUFILTERS
         if (apperture_size_ != 3 && apperture_size_ != old_apperture_size_)
         {
-            filterDX_ = gpu::createDerivFilter(CV_8UC1, CV_32S, 1, 0, apperture_size_, false, 1, BORDER_REPLICATE);
-            filterDY_ = gpu::createDerivFilter(CV_8UC1, CV_32S, 0, 1, apperture_size_, false, 1, BORDER_REPLICATE);
+            filterDX_ = cuda::createDerivFilter(CV_8UC1, CV_32S, 1, 0, apperture_size_, false, 1, BORDER_REPLICATE);
+            filterDY_ = cuda::createDerivFilter(CV_8UC1, CV_32S, 0, 1, apperture_size_, false, 1, BORDER_REPLICATE);
             old_apperture_size_ = apperture_size_;
         }
 #endif
@@ -226,7 +226,7 @@ namespace
     }
 }
 
-Ptr<CannyEdgeDetector> cv::gpu::createCannyEdgeDetector(double low_thresh, double high_thresh, int apperture_size, bool L2gradient)
+Ptr<CannyEdgeDetector> cv::cuda::createCannyEdgeDetector(double low_thresh, double high_thresh, int apperture_size, bool L2gradient)
 {
     return new CannyImpl(low_thresh, high_thresh, apperture_size, L2gradient);
 }
diff --git a/modules/gpuimgproc/src/color.cpp b/modules/gpuimgproc/src/color.cpp
index 3d714b6287..185fb10a3c 100644
--- a/modules/gpuimgproc/src/color.cpp
+++ b/modules/gpuimgproc/src/color.cpp
@@ -43,26 +43,26 @@
 #include "precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
 
-void cv::gpu::cvtColor(InputArray, OutputArray, int, int, Stream&) { throw_no_cuda(); }
+void cv::cuda::cvtColor(InputArray, OutputArray, int, int, Stream&) { throw_no_cuda(); }
 
-void cv::gpu::demosaicing(InputArray, OutputArray, int, int, Stream&) { throw_no_cuda(); }
+void cv::cuda::demosaicing(InputArray, OutputArray, int, int, Stream&) { throw_no_cuda(); }
 
-void cv::gpu::swapChannels(InputOutputArray, const int[], Stream&) { throw_no_cuda(); }
+void cv::cuda::swapChannels(InputOutputArray, const int[], Stream&) { throw_no_cuda(); }
 
-void cv::gpu::gammaCorrection(InputArray, OutputArray, bool, Stream&) { throw_no_cuda(); }
+void cv::cuda::gammaCorrection(InputArray, OutputArray, bool, Stream&) { throw_no_cuda(); }
 
-void cv::gpu::alphaComp(InputArray, InputArray, OutputArray, int, Stream&) { throw_no_cuda(); }
+void cv::cuda::alphaComp(InputArray, InputArray, OutputArray, int, Stream&) { throw_no_cuda(); }
 
 
 #else /* !defined (HAVE_CUDA) */
 
 #include "cvt_color_internal.h"
 
-namespace cv { namespace gpu {
+namespace cv { namespace cuda {
     namespace cudev
     {
         template <int cn>
@@ -75,7 +75,7 @@ namespace cv { namespace gpu {
     }
 }}
 
-using namespace ::cv::gpu::cudev;
+using namespace ::cv::cuda::cudev;
 
 namespace
 {
@@ -83,7 +83,7 @@ namespace
 
     void bgr_to_rgb(InputArray _src, OutputArray _dst, int, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[] = {bgr_to_rgb_8u, 0, bgr_to_rgb_16u, 0, 0, bgr_to_rgb_32f};
 
         GpuMat src = _src.getGpuMat();
@@ -99,7 +99,7 @@ namespace
 
     void bgr_to_bgra(InputArray _src, OutputArray _dst, int, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[] = {bgr_to_bgra_8u, 0, bgr_to_bgra_16u, 0, 0, bgr_to_bgra_32f};
 
         GpuMat src = _src.getGpuMat();
@@ -115,7 +115,7 @@ namespace
 
     void bgr_to_rgba(InputArray _src, OutputArray _dst, int, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[] = {bgr_to_rgba_8u, 0, bgr_to_rgba_16u, 0, 0, bgr_to_rgba_32f};
 
         GpuMat src = _src.getGpuMat();
@@ -131,7 +131,7 @@ namespace
 
     void bgra_to_bgr(InputArray _src, OutputArray _dst, int, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[] = {bgra_to_bgr_8u, 0, bgra_to_bgr_16u, 0, 0, bgra_to_bgr_32f};
 
         GpuMat src = _src.getGpuMat();
@@ -147,7 +147,7 @@ namespace
 
     void bgra_to_rgb(InputArray _src, OutputArray _dst, int, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[] = {bgra_to_rgb_8u, 0, bgra_to_rgb_16u, 0, 0, bgra_to_rgb_32f};
 
         GpuMat src = _src.getGpuMat();
@@ -163,7 +163,7 @@ namespace
 
     void bgra_to_rgba(InputArray _src, OutputArray _dst, int, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[] = {bgra_to_rgba_8u, 0, bgra_to_rgba_16u, 0, 0, bgra_to_rgba_32f};
 
         GpuMat src = _src.getGpuMat();
@@ -187,7 +187,7 @@ namespace
         _dst.create(src.size(), CV_8UC2);
         GpuMat dst = _dst.getGpuMat();
 
-        cv::gpu::cudev::bgr_to_bgr555(src, dst, StreamAccessor::getStream(stream));
+        cv::cuda::cudev::bgr_to_bgr555(src, dst, StreamAccessor::getStream(stream));
     }
 
     void bgr_to_bgr565(InputArray _src, OutputArray _dst, int, Stream& stream)
@@ -200,7 +200,7 @@ namespace
         _dst.create(src.size(), CV_8UC2);
         GpuMat dst = _dst.getGpuMat();
 
-        cv::gpu::cudev::bgr_to_bgr565(src, dst, StreamAccessor::getStream(stream));
+        cv::cuda::cudev::bgr_to_bgr565(src, dst, StreamAccessor::getStream(stream));
     }
 
     void rgb_to_bgr555(InputArray _src, OutputArray _dst, int, Stream& stream)
@@ -213,7 +213,7 @@ namespace
         _dst.create(src.size(), CV_8UC2);
         GpuMat dst = _dst.getGpuMat();
 
-        cv::gpu::cudev::rgb_to_bgr555(src, dst, StreamAccessor::getStream(stream));
+        cv::cuda::cudev::rgb_to_bgr555(src, dst, StreamAccessor::getStream(stream));
     }
 
     void rgb_to_bgr565(InputArray _src, OutputArray _dst, int, Stream& stream)
@@ -226,7 +226,7 @@ namespace
         _dst.create(src.size(), CV_8UC2);
         GpuMat dst = _dst.getGpuMat();
 
-        cv::gpu::cudev::rgb_to_bgr565(src, dst, StreamAccessor::getStream(stream));
+        cv::cuda::cudev::rgb_to_bgr565(src, dst, StreamAccessor::getStream(stream));
     }
 
     void bgra_to_bgr555(InputArray _src, OutputArray _dst, int, Stream& stream)
@@ -239,7 +239,7 @@ namespace
         _dst.create(src.size(), CV_8UC2);
         GpuMat dst = _dst.getGpuMat();
 
-        cv::gpu::cudev::bgra_to_bgr555(src, dst, StreamAccessor::getStream(stream));
+        cv::cuda::cudev::bgra_to_bgr555(src, dst, StreamAccessor::getStream(stream));
     }
 
     void bgra_to_bgr565(InputArray _src, OutputArray _dst, int, Stream& stream)
@@ -252,7 +252,7 @@ namespace
         _dst.create(src.size(), CV_8UC2);
         GpuMat dst = _dst.getGpuMat();
 
-        cv::gpu::cudev::bgra_to_bgr565(src, dst, StreamAccessor::getStream(stream));
+        cv::cuda::cudev::bgra_to_bgr565(src, dst, StreamAccessor::getStream(stream));
     }
 
     void rgba_to_bgr555(InputArray _src, OutputArray _dst, int, Stream& stream)
@@ -265,7 +265,7 @@ namespace
         _dst.create(src.size(), CV_8UC2);
         GpuMat dst = _dst.getGpuMat();
 
-        cv::gpu::cudev::rgba_to_bgr555(src, dst, StreamAccessor::getStream(stream));
+        cv::cuda::cudev::rgba_to_bgr555(src, dst, StreamAccessor::getStream(stream));
     }
 
     void rgba_to_bgr565(InputArray _src, OutputArray _dst, int, Stream& stream)
@@ -278,7 +278,7 @@ namespace
         _dst.create(src.size(), CV_8UC2);
         GpuMat dst = _dst.getGpuMat();
 
-        cv::gpu::cudev::rgba_to_bgr565(src, dst, StreamAccessor::getStream(stream));
+        cv::cuda::cudev::rgba_to_bgr565(src, dst, StreamAccessor::getStream(stream));
     }
 
     void bgr555_to_rgb(InputArray _src, OutputArray _dst, int, Stream& stream)
@@ -291,7 +291,7 @@ namespace
         _dst.create(src.size(), CV_8UC3);
         GpuMat dst = _dst.getGpuMat();
 
-        cv::gpu::cudev::bgr555_to_rgb(src, dst, StreamAccessor::getStream(stream));
+        cv::cuda::cudev::bgr555_to_rgb(src, dst, StreamAccessor::getStream(stream));
     }
 
     void bgr565_to_rgb(InputArray _src, OutputArray _dst, int, Stream& stream)
@@ -304,7 +304,7 @@ namespace
         _dst.create(src.size(), CV_8UC3);
         GpuMat dst = _dst.getGpuMat();
 
-        cv::gpu::cudev::bgr565_to_rgb(src, dst, StreamAccessor::getStream(stream));
+        cv::cuda::cudev::bgr565_to_rgb(src, dst, StreamAccessor::getStream(stream));
     }
 
     void bgr555_to_bgr(InputArray _src, OutputArray _dst, int, Stream& stream)
@@ -317,7 +317,7 @@ namespace
         _dst.create(src.size(), CV_8UC3);
         GpuMat dst = _dst.getGpuMat();
 
-        cv::gpu::cudev::bgr555_to_bgr(src, dst, StreamAccessor::getStream(stream));
+        cv::cuda::cudev::bgr555_to_bgr(src, dst, StreamAccessor::getStream(stream));
     }
 
     void bgr565_to_bgr(InputArray _src, OutputArray _dst, int, Stream& stream)
@@ -330,7 +330,7 @@ namespace
         _dst.create(src.size(), CV_8UC3);
         GpuMat dst = _dst.getGpuMat();
 
-        cv::gpu::cudev::bgr565_to_bgr(src, dst, StreamAccessor::getStream(stream));
+        cv::cuda::cudev::bgr565_to_bgr(src, dst, StreamAccessor::getStream(stream));
     }
 
     void bgr555_to_rgba(InputArray _src, OutputArray _dst, int, Stream& stream)
@@ -343,7 +343,7 @@ namespace
         _dst.create(src.size(), CV_8UC4);
         GpuMat dst = _dst.getGpuMat();
 
-        cv::gpu::cudev::bgr555_to_rgba(src, dst, StreamAccessor::getStream(stream));
+        cv::cuda::cudev::bgr555_to_rgba(src, dst, StreamAccessor::getStream(stream));
     }
 
     void bgr565_to_rgba(InputArray _src, OutputArray _dst, int, Stream& stream)
@@ -356,7 +356,7 @@ namespace
         _dst.create(src.size(), CV_8UC4);
         GpuMat dst = _dst.getGpuMat();
 
-        cv::gpu::cudev::bgr565_to_rgba(src, dst, StreamAccessor::getStream(stream));
+        cv::cuda::cudev::bgr565_to_rgba(src, dst, StreamAccessor::getStream(stream));
     }
 
     void bgr555_to_bgra(InputArray _src, OutputArray _dst, int, Stream& stream)
@@ -369,7 +369,7 @@ namespace
         _dst.create(src.size(), CV_8UC4);
         GpuMat dst = _dst.getGpuMat();
 
-        cv::gpu::cudev::bgr555_to_bgra(src, dst, StreamAccessor::getStream(stream));
+        cv::cuda::cudev::bgr555_to_bgra(src, dst, StreamAccessor::getStream(stream));
     }
 
     void bgr565_to_bgra(InputArray _src, OutputArray _dst, int, Stream& stream)
@@ -382,12 +382,12 @@ namespace
         _dst.create(src.size(), CV_8UC4);
         GpuMat dst = _dst.getGpuMat();
 
-        cv::gpu::cudev::bgr565_to_bgra(src, dst, StreamAccessor::getStream(stream));
+        cv::cuda::cudev::bgr565_to_bgra(src, dst, StreamAccessor::getStream(stream));
     }
 
     void gray_to_bgr(InputArray _src, OutputArray _dst, int, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[] = {gray_to_bgr_8u, 0, gray_to_bgr_16u, 0, 0, gray_to_bgr_32f};
 
         GpuMat src = _src.getGpuMat();
@@ -403,7 +403,7 @@ namespace
 
     void gray_to_bgra(InputArray _src, OutputArray _dst, int, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[] = {gray_to_bgra_8u, 0, gray_to_bgra_16u, 0, 0, gray_to_bgra_32f};
 
         GpuMat src = _src.getGpuMat();
@@ -427,7 +427,7 @@ namespace
         _dst.create(src.size(), CV_8UC2);
         GpuMat dst = _dst.getGpuMat();
 
-        cv::gpu::cudev::gray_to_bgr555(src, dst, StreamAccessor::getStream(stream));
+        cv::cuda::cudev::gray_to_bgr555(src, dst, StreamAccessor::getStream(stream));
     }
 
     void gray_to_bgr565(InputArray _src, OutputArray _dst, int, Stream& stream)
@@ -440,7 +440,7 @@ namespace
         _dst.create(src.size(), CV_8UC2);
         GpuMat dst = _dst.getGpuMat();
 
-        cv::gpu::cudev::gray_to_bgr565(src, dst, StreamAccessor::getStream(stream));
+        cv::cuda::cudev::gray_to_bgr565(src, dst, StreamAccessor::getStream(stream));
     }
 
     void bgr555_to_gray(InputArray _src, OutputArray _dst, int, Stream& stream)
@@ -453,7 +453,7 @@ namespace
         _dst.create(src.size(), CV_8UC1);
         GpuMat dst = _dst.getGpuMat();
 
-        cv::gpu::cudev::bgr555_to_gray(src, dst, StreamAccessor::getStream(stream));
+        cv::cuda::cudev::bgr555_to_gray(src, dst, StreamAccessor::getStream(stream));
     }
 
     void bgr565_to_gray(InputArray _src, OutputArray _dst, int, Stream& stream)
@@ -466,12 +466,12 @@ namespace
         _dst.create(src.size(), CV_8UC1);
         GpuMat dst = _dst.getGpuMat();
 
-        cv::gpu::cudev::bgr565_to_gray(src, dst, StreamAccessor::getStream(stream));
+        cv::cuda::cudev::bgr565_to_gray(src, dst, StreamAccessor::getStream(stream));
     }
 
     void rgb_to_gray(InputArray _src, OutputArray _dst, int, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[] = {rgb_to_gray_8u, 0, rgb_to_gray_16u, 0, 0, rgb_to_gray_32f};
 
         GpuMat src = _src.getGpuMat();
@@ -487,7 +487,7 @@ namespace
 
     void bgr_to_gray(InputArray _src, OutputArray _dst, int, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[] = {bgr_to_gray_8u, 0, bgr_to_gray_16u, 0, 0, bgr_to_gray_32f};
 
         GpuMat src = _src.getGpuMat();
@@ -503,7 +503,7 @@ namespace
 
     void rgba_to_gray(InputArray _src, OutputArray _dst, int, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[] = {rgba_to_gray_8u, 0, rgba_to_gray_16u, 0, 0, rgba_to_gray_32f};
 
         GpuMat src = _src.getGpuMat();
@@ -519,7 +519,7 @@ namespace
 
     void bgra_to_gray(InputArray _src, OutputArray _dst, int, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[] = {bgra_to_gray_8u, 0, bgra_to_gray_16u, 0, 0, bgra_to_gray_32f};
 
         GpuMat src = _src.getGpuMat();
@@ -535,7 +535,7 @@ namespace
 
     void rgb_to_yuv(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[2][2][6] =
         {
             {
@@ -564,7 +564,7 @@ namespace
 
     void bgr_to_yuv(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[2][2][6] =
         {
             {
@@ -593,7 +593,7 @@ namespace
 
     void yuv_to_rgb(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[2][2][6] =
         {
             {
@@ -622,7 +622,7 @@ namespace
 
     void yuv_to_bgr(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[2][2][6] =
         {
             {
@@ -651,7 +651,7 @@ namespace
 
     void rgb_to_YCrCb(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[2][2][6] =
         {
             {
@@ -680,7 +680,7 @@ namespace
 
     void bgr_to_YCrCb(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[2][2][6] =
         {
             {
@@ -709,7 +709,7 @@ namespace
 
     void YCrCb_to_rgb(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[2][2][6] =
         {
             {
@@ -738,7 +738,7 @@ namespace
 
     void YCrCb_to_bgr(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[2][2][6] =
         {
             {
@@ -767,7 +767,7 @@ namespace
 
     void rgb_to_xyz(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[2][2][6] =
         {
             {
@@ -796,7 +796,7 @@ namespace
 
     void bgr_to_xyz(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[2][2][6] =
         {
             {
@@ -825,7 +825,7 @@ namespace
 
     void xyz_to_rgb(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[2][2][6] =
         {
             {
@@ -854,7 +854,7 @@ namespace
 
     void xyz_to_bgr(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[2][2][6] =
         {
             {
@@ -883,7 +883,7 @@ namespace
 
     void rgb_to_hsv(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[2][2][6] =
         {
             {
@@ -912,7 +912,7 @@ namespace
 
     void bgr_to_hsv(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[2][2][6] =
         {
             {
@@ -941,7 +941,7 @@ namespace
 
     void hsv_to_rgb(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[2][2][6] =
         {
             {
@@ -970,7 +970,7 @@ namespace
 
     void hsv_to_bgr(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[2][2][6] =
         {
             {
@@ -999,7 +999,7 @@ namespace
 
     void rgb_to_hls(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[2][2][6] =
         {
             {
@@ -1028,7 +1028,7 @@ namespace
 
     void bgr_to_hls(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[2][2][6] =
         {
             {
@@ -1057,7 +1057,7 @@ namespace
 
     void hls_to_rgb(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[2][2][6] =
         {
             {
@@ -1086,7 +1086,7 @@ namespace
 
     void hls_to_bgr(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[2][2][6] =
         {
             {
@@ -1115,7 +1115,7 @@ namespace
 
     void rgb_to_hsv_full(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[2][2][6] =
         {
             {
@@ -1144,7 +1144,7 @@ namespace
 
     void bgr_to_hsv_full(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[2][2][6] =
         {
             {
@@ -1173,7 +1173,7 @@ namespace
 
     void hsv_to_rgb_full(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[2][2][6] =
         {
             {
@@ -1202,7 +1202,7 @@ namespace
 
     void hsv_to_bgr_full(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[2][2][6] =
         {
             {
@@ -1231,7 +1231,7 @@ namespace
 
     void rgb_to_hls_full(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[2][2][6] =
         {
             {
@@ -1260,7 +1260,7 @@ namespace
 
     void bgr_to_hls_full(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[2][2][6] =
         {
             {
@@ -1289,7 +1289,7 @@ namespace
 
     void hls_to_rgb_full(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[2][2][6] =
         {
             {
@@ -1318,7 +1318,7 @@ namespace
 
     void hls_to_bgr_full(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[2][2][6] =
         {
             {
@@ -1347,7 +1347,7 @@ namespace
 
     void bgr_to_lab(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[2][2][2] =
         {
             {
@@ -1376,7 +1376,7 @@ namespace
 
     void rgb_to_lab(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[2][2][2] =
         {
             {
@@ -1405,7 +1405,7 @@ namespace
 
     void lbgr_to_lab(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[2][2][2] =
         {
             {
@@ -1434,7 +1434,7 @@ namespace
 
     void lrgb_to_lab(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[2][2][2] =
         {
             {
@@ -1463,7 +1463,7 @@ namespace
 
     void lab_to_bgr(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[2][2][2] =
         {
             {
@@ -1492,7 +1492,7 @@ namespace
 
     void lab_to_rgb(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[2][2][2] =
         {
             {
@@ -1521,7 +1521,7 @@ namespace
 
     void lab_to_lbgr(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[2][2][2] =
         {
             {
@@ -1550,7 +1550,7 @@ namespace
 
     void lab_to_lrgb(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[2][2][2] =
         {
             {
@@ -1579,7 +1579,7 @@ namespace
 
     void bgr_to_luv(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[2][2][2] =
         {
             {
@@ -1608,7 +1608,7 @@ namespace
 
     void rgb_to_luv(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[2][2][2] =
         {
             {
@@ -1637,7 +1637,7 @@ namespace
 
     void lbgr_to_luv(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[2][2][2] =
         {
             {
@@ -1666,7 +1666,7 @@ namespace
 
     void lrgb_to_luv(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[2][2][2] =
         {
             {
@@ -1695,7 +1695,7 @@ namespace
 
     void luv_to_bgr(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[2][2][2] =
         {
             {
@@ -1724,7 +1724,7 @@ namespace
 
     void luv_to_rgb(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[2][2][2] =
         {
             {
@@ -1753,7 +1753,7 @@ namespace
 
     void luv_to_lbgr(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[2][2][2] =
         {
             {
@@ -1782,7 +1782,7 @@ namespace
 
     void luv_to_lrgb(InputArray _src, OutputArray _dst, int dcn, Stream& stream)
     {
-        using namespace cv::gpu::cudev;
+        using namespace cv::cuda::cudev;
         static const gpu_func_t funcs[2][2][2] =
         {
             {
@@ -1922,7 +1922,7 @@ namespace
 ////////////////////////////////////////////////////////////////////////
 // cvtColor
 
-void cv::gpu::cvtColor(InputArray src, OutputArray dst, int code, int dcn, Stream& stream)
+void cv::cuda::cvtColor(InputArray src, OutputArray dst, int code, int dcn, Stream& stream)
 {
     typedef void (*func_t)(InputArray src, OutputArray dst, int dcn, Stream& stream);
     static const func_t funcs[] =
@@ -2108,7 +2108,7 @@ void cv::gpu::cvtColor(InputArray src, OutputArray dst, int code, int dcn, Strea
 ////////////////////////////////////////////////////////////////////////
 // demosaicing
 
-void cv::gpu::demosaicing(InputArray _src, OutputArray _dst, int code, int dcn, Stream& stream)
+void cv::cuda::demosaicing(InputArray _src, OutputArray _dst, int code, int dcn, Stream& stream)
 {
     switch (code)
     {
@@ -2145,9 +2145,9 @@ void cv::gpu::demosaicing(InputArray _src, OutputArray _dst, int code, int dcn,
                                         code == COLOR_BayerRG2BGR_MHT || code == COLOR_BayerGR2BGR_MHT ? 0 : 1);
 
         if (dcn == 3)
-            cv::gpu::cudev::MHCdemosaic<3>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream));
+            cv::cuda::cudev::MHCdemosaic<3>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream));
         else
-            cv::gpu::cudev::MHCdemosaic<4>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream));
+            cv::cuda::cudev::MHCdemosaic<4>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream));
 
         break;
     }
@@ -2172,7 +2172,7 @@ void cv::gpu::demosaicing(InputArray _src, OutputArray _dst, int code, int dcn,
         const int2 firstRed = make_int2(code == COLOR_BayerRG2BGR_MHT || code == COLOR_BayerGB2BGR_MHT ? 0 : 1,
                                         code == COLOR_BayerRG2BGR_MHT || code == COLOR_BayerGR2BGR_MHT ? 0 : 1);
 
-        cv::gpu::cudev::MHCdemosaic<1>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream));
+        cv::cuda::cudev::MHCdemosaic<1>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream));
 
         break;
     }
@@ -2185,7 +2185,7 @@ void cv::gpu::demosaicing(InputArray _src, OutputArray _dst, int code, int dcn,
 ////////////////////////////////////////////////////////////////////////
 // swapChannels
 
-void cv::gpu::swapChannels(InputOutputArray _image, const int dstOrder[4], Stream& _stream)
+void cv::cuda::swapChannels(InputOutputArray _image, const int dstOrder[4], Stream& _stream)
 {
     GpuMat image = _image.getGpuMat();
 
@@ -2207,7 +2207,7 @@ void cv::gpu::swapChannels(InputOutputArray _image, const int dstOrder[4], Strea
 ////////////////////////////////////////////////////////////////////////
 // gammaCorrection
 
-void cv::gpu::gammaCorrection(InputArray _src, OutputArray _dst, bool forward, Stream& stream)
+void cv::cuda::gammaCorrection(InputArray _src, OutputArray _dst, bool forward, Stream& stream)
 {
 #if (CUDA_VERSION < 5000)
     (void) _src;
@@ -2284,7 +2284,7 @@ namespace
     };
 }
 
-void cv::gpu::alphaComp(InputArray _img1, InputArray _img2, OutputArray _dst, int alpha_op, Stream& stream)
+void cv::cuda::alphaComp(InputArray _img1, InputArray _img2, OutputArray _dst, int alpha_op, Stream& stream)
 {
     static const NppiAlphaOp npp_alpha_ops[] = {
         NPPI_OP_ALPHA_OVER,
diff --git a/modules/gpuimgproc/src/corners.cpp b/modules/gpuimgproc/src/corners.cpp
index 5df5063274..e82d611a97 100644
--- a/modules/gpuimgproc/src/corners.cpp
+++ b/modules/gpuimgproc/src/corners.cpp
@@ -43,16 +43,16 @@
 #include "precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) || !defined(HAVE_OPENCV_GPUFILTERS)
 
-Ptr<gpu::CornernessCriteria> cv::gpu::createHarrisCorner(int, int, int, double, int) { throw_no_cuda(); return Ptr<gpu::CornernessCriteria>(); }
-Ptr<gpu::CornernessCriteria> cv::gpu::createMinEigenValCorner(int, int, int, int) { throw_no_cuda(); return Ptr<gpu::CornernessCriteria>(); }
+Ptr<cuda::CornernessCriteria> cv::cuda::createHarrisCorner(int, int, int, double, int) { throw_no_cuda(); return Ptr<cuda::CornernessCriteria>(); }
+Ptr<cuda::CornernessCriteria> cv::cuda::createMinEigenValCorner(int, int, int, int) { throw_no_cuda(); return Ptr<cuda::CornernessCriteria>(); }
 
 #else /* !defined (HAVE_CUDA) */
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace imgproc
     {
@@ -77,7 +77,7 @@ namespace
         GpuMat Dx_, Dy_;
 
     private:
-        Ptr<gpu::Filter> filterDx_, filterDy_;
+        Ptr<cuda::Filter> filterDx_, filterDy_;
     };
 
     CornerBase::CornerBase(int srcType, int blockSize, int ksize, int borderType) :
@@ -102,13 +102,13 @@ namespace
 
         if (ksize_ > 0)
         {
-            filterDx_ = gpu::createSobelFilter(srcType, CV_32F, 1, 0, ksize_, scale, borderType_);
-            filterDy_ = gpu::createSobelFilter(srcType, CV_32F, 0, 1, ksize_, scale, borderType_);
+            filterDx_ = cuda::createSobelFilter(srcType, CV_32F, 1, 0, ksize_, scale, borderType_);
+            filterDy_ = cuda::createSobelFilter(srcType, CV_32F, 0, 1, ksize_, scale, borderType_);
         }
         else
         {
-            filterDx_ = gpu::createScharrFilter(srcType, CV_32F, 1, 0, scale, borderType_);
-            filterDy_ = gpu::createScharrFilter(srcType, CV_32F, 0, 1, scale, borderType_);
+            filterDx_ = cuda::createScharrFilter(srcType, CV_32F, 1, 0, scale, borderType_);
+            filterDy_ = cuda::createScharrFilter(srcType, CV_32F, 0, 1, scale, borderType_);
         }
     }
 
@@ -135,7 +135,7 @@ namespace
 
     void Harris::compute(InputArray _src, OutputArray _dst, Stream& stream)
     {
-        using namespace cv::gpu::cudev::imgproc;
+        using namespace cv::cuda::cudev::imgproc;
 
         GpuMat src = _src.getGpuMat();
 
@@ -163,7 +163,7 @@ namespace
 
     void MinEigenVal::compute(InputArray _src, OutputArray _dst, Stream& stream)
     {
-        using namespace cv::gpu::cudev::imgproc;
+        using namespace cv::cuda::cudev::imgproc;
 
         GpuMat src = _src.getGpuMat();
 
@@ -176,12 +176,12 @@ namespace
     }
 }
 
-Ptr<gpu::CornernessCriteria> cv::gpu::createHarrisCorner(int srcType, int blockSize, int ksize, double k, int borderType)
+Ptr<cuda::CornernessCriteria> cv::cuda::createHarrisCorner(int srcType, int blockSize, int ksize, double k, int borderType)
 {
     return new Harris(srcType, blockSize, ksize, k, borderType);
 }
 
-Ptr<gpu::CornernessCriteria> cv::gpu::createMinEigenValCorner(int srcType, int blockSize, int ksize, int borderType)
+Ptr<cuda::CornernessCriteria> cv::cuda::createMinEigenValCorner(int srcType, int blockSize, int ksize, int borderType)
 {
     return new MinEigenVal(srcType, blockSize, ksize, borderType);
 }
diff --git a/modules/gpuimgproc/src/cuda/bilateral_filter.cu b/modules/gpuimgproc/src/cuda/bilateral_filter.cu
index 3192f649b7..2f228388f9 100644
--- a/modules/gpuimgproc/src/cuda/bilateral_filter.cu
+++ b/modules/gpuimgproc/src/cuda/bilateral_filter.cu
@@ -47,7 +47,7 @@
 #include "opencv2/core/cuda/vec_math.hpp"
 #include "opencv2/core/cuda/border_interpolate.hpp"
 
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 typedef unsigned char uchar;
 typedef unsigned short ushort;
@@ -55,7 +55,7 @@ typedef unsigned short ushort;
 //////////////////////////////////////////////////////////////////////////////////
 /// Bilateral filtering
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace imgproc
     {
@@ -163,7 +163,7 @@ namespace cv { namespace gpu { namespace cudev
 
 
 #define OCV_INSTANTIATE_BILATERAL_FILTER(T) \
-    template void cv::gpu::cudev::imgproc::bilateral_filter_gpu<T>(const PtrStepSzb&, PtrStepSzb, int, float, float, int, cudaStream_t);
+    template void cv::cuda::cudev::imgproc::bilateral_filter_gpu<T>(const PtrStepSzb&, PtrStepSzb, int, float, float, int, cudaStream_t);
 
 OCV_INSTANTIATE_BILATERAL_FILTER(uchar)
 //OCV_INSTANTIATE_BILATERAL_FILTER(uchar2)
diff --git a/modules/gpuimgproc/src/cuda/blend.cu b/modules/gpuimgproc/src/cuda/blend.cu
index be8c0b2f35..7cca1755d6 100644
--- a/modules/gpuimgproc/src/cuda/blend.cu
+++ b/modules/gpuimgproc/src/cuda/blend.cu
@@ -44,7 +44,7 @@
 
 #include "opencv2/core/cuda/common.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace blend
     {
@@ -115,7 +115,7 @@ namespace cv { namespace gpu { namespace cudev
                 cudaSafeCall(cudaDeviceSynchronize());
         }
     } // namespace blend
-}}} // namespace cv { namespace gpu { namespace cudev
+}}} // namespace cv { namespace cuda { namespace cudev
 
 
 #endif /* CUDA_DISABLER */
diff --git a/modules/gpuimgproc/src/cuda/build_point_list.cu b/modules/gpuimgproc/src/cuda/build_point_list.cu
index c5f2b23f6f..3a2e06cdc0 100644
--- a/modules/gpuimgproc/src/cuda/build_point_list.cu
+++ b/modules/gpuimgproc/src/cuda/build_point_list.cu
@@ -45,7 +45,7 @@
 #include "opencv2/core/cuda/common.hpp"
 #include "opencv2/core/cuda/emulation.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace hough
     {
diff --git a/modules/gpuimgproc/src/cuda/canny.cu b/modules/gpuimgproc/src/cuda/canny.cu
index 271fffbc7d..6398bd3330 100644
--- a/modules/gpuimgproc/src/cuda/canny.cu
+++ b/modules/gpuimgproc/src/cuda/canny.cu
@@ -50,8 +50,8 @@
 #include "opencv2/core/cuda/functional.hpp"
 #include "opencv2/core/cuda/utility.hpp"
 
-using namespace cv::gpu;
-using namespace cv::gpu::cudev;
+using namespace cv::cuda;
+using namespace cv::cuda::cudev;
 
 namespace canny
 {
@@ -77,7 +77,7 @@ namespace canny
     };
 }
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     template <> struct TransformFunctorTraits<canny::L1> : DefaultTransformFunctorTraits<canny::L1>
     {
@@ -475,7 +475,7 @@ namespace canny
     };
 }
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     template <> struct TransformFunctorTraits<canny::GetEdges> : DefaultTransformFunctorTraits<canny::GetEdges>
     {
diff --git a/modules/gpuimgproc/src/cuda/clahe.cu b/modules/gpuimgproc/src/cuda/clahe.cu
index 7c6645749b..2c37c34b63 100644
--- a/modules/gpuimgproc/src/cuda/clahe.cu
+++ b/modules/gpuimgproc/src/cuda/clahe.cu
@@ -49,8 +49,8 @@
 #include "opencv2/core/cuda/reduce.hpp"
 #include "opencv2/core/cuda/saturate_cast.hpp"
 
-using namespace cv::gpu;
-using namespace cv::gpu::cudev;
+using namespace cv::cuda;
+using namespace cv::cuda::cudev;
 
 namespace clahe
 {
diff --git a/modules/gpuimgproc/src/cuda/color.cu b/modules/gpuimgproc/src/cuda/color.cu
index 1a5d4865ed..e59741b78f 100644
--- a/modules/gpuimgproc/src/cuda/color.cu
+++ b/modules/gpuimgproc/src/cuda/color.cu
@@ -47,7 +47,7 @@
 #include "opencv2/core/cuda/color.hpp"
 #include "cvt_color_internal.h"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_rgba_traits<uchar>::functor_type)
     {
@@ -229,7 +229,7 @@ namespace cv { namespace gpu { namespace cudev
         traits::functor_type functor = traits::create_functor(); \
         typedef typename traits::functor_type::argument_type src_t; \
         typedef typename traits::functor_type::result_type   dst_t; \
-        cv::gpu::cudev::transform((PtrStepSz<src_t>)src, (PtrStepSz<dst_t>)dst, functor, WithOutMask(), stream); \
+        cv::cuda::cudev::transform((PtrStepSz<src_t>)src, (PtrStepSz<dst_t>)dst, functor, WithOutMask(), stream); \
     }
 
 #define OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(name) \
@@ -456,6 +456,6 @@ namespace cv { namespace gpu { namespace cudev
     #undef OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL
     #undef OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F
     #undef OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL
-}}} // namespace cv { namespace gpu { namespace cudev
+}}} // namespace cv { namespace cuda { namespace cudev
 
 #endif /* CUDA_DISABLER */
diff --git a/modules/gpuimgproc/src/cuda/corners.cu b/modules/gpuimgproc/src/cuda/corners.cu
index aa65ac8f8b..847df92b08 100644
--- a/modules/gpuimgproc/src/cuda/corners.cu
+++ b/modules/gpuimgproc/src/cuda/corners.cu
@@ -52,7 +52,7 @@
 
 #ifdef HAVE_OPENCV_GPUFILTERS
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace imgproc
     {
diff --git a/modules/gpuimgproc/src/cuda/debayer.cu b/modules/gpuimgproc/src/cuda/debayer.cu
index 46a1c14ef4..8acdda423b 100644
--- a/modules/gpuimgproc/src/cuda/debayer.cu
+++ b/modules/gpuimgproc/src/cuda/debayer.cu
@@ -49,7 +49,7 @@
 #include "opencv2/core/cuda/color.hpp"
 #include "opencv2/core/cuda/saturate_cast.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     template <typename T> struct Bayer2BGR;
 
diff --git a/modules/gpuimgproc/src/cuda/generalized_hough.cu b/modules/gpuimgproc/src/cuda/generalized_hough.cu
index fdf691ff4a..17eeff6604 100644
--- a/modules/gpuimgproc/src/cuda/generalized_hough.cu
+++ b/modules/gpuimgproc/src/cuda/generalized_hough.cu
@@ -54,7 +54,7 @@
 
 #ifdef HAVE_OPENCV_GPUARITHM
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace ght
     {
diff --git a/modules/gpuimgproc/src/cuda/gftt.cu b/modules/gpuimgproc/src/cuda/gftt.cu
index b4af9e5dbc..f6fb0d207a 100644
--- a/modules/gpuimgproc/src/cuda/gftt.cu
+++ b/modules/gpuimgproc/src/cuda/gftt.cu
@@ -48,7 +48,7 @@
 #include "opencv2/core/cuda/common.hpp"
 #include "opencv2/core/cuda/utility.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace gfft
     {
diff --git a/modules/gpuimgproc/src/cuda/hist.cu b/modules/gpuimgproc/src/cuda/hist.cu
index 51931d7ce5..ca1490b5bf 100644
--- a/modules/gpuimgproc/src/cuda/hist.cu
+++ b/modules/gpuimgproc/src/cuda/hist.cu
@@ -47,8 +47,8 @@
 #include "opencv2/core/cuda/emulation.hpp"
 #include "opencv2/core/cuda/transform.hpp"
 
-using namespace cv::gpu;
-using namespace cv::gpu::cudev;
+using namespace cv::cuda;
+using namespace cv::cuda::cudev;
 
 namespace hist
 {
@@ -207,7 +207,7 @@ namespace hist
     };
 }
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     template <> struct TransformFunctorTraits<hist::EqualizeHist> : DefaultTransformFunctorTraits<hist::EqualizeHist>
     {
diff --git a/modules/gpuimgproc/src/cuda/hough_circles.cu b/modules/gpuimgproc/src/cuda/hough_circles.cu
index 6757e430b6..52d3c3d5bc 100644
--- a/modules/gpuimgproc/src/cuda/hough_circles.cu
+++ b/modules/gpuimgproc/src/cuda/hough_circles.cu
@@ -50,7 +50,7 @@
 
 #ifdef HAVE_OPENCV_GPUFILTERS
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace hough_circles
     {
diff --git a/modules/gpuimgproc/src/cuda/hough_lines.cu b/modules/gpuimgproc/src/cuda/hough_lines.cu
index 0cee0a43d2..9bbd1f8103 100644
--- a/modules/gpuimgproc/src/cuda/hough_lines.cu
+++ b/modules/gpuimgproc/src/cuda/hough_lines.cu
@@ -49,7 +49,7 @@
 #include "opencv2/core/cuda/emulation.hpp"
 #include "opencv2/core/cuda/dynamic_smem.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace hough_lines
     {
diff --git a/modules/gpuimgproc/src/cuda/hough_segments.cu b/modules/gpuimgproc/src/cuda/hough_segments.cu
index e420449fae..fbb4d254c7 100644
--- a/modules/gpuimgproc/src/cuda/hough_segments.cu
+++ b/modules/gpuimgproc/src/cuda/hough_segments.cu
@@ -45,7 +45,7 @@
 #include "opencv2/core/cuda/common.hpp"
 #include "opencv2/core/cuda/vec_math.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace hough_segments
     {
diff --git a/modules/gpuimgproc/src/cuda/match_template.cu b/modules/gpuimgproc/src/cuda/match_template.cu
index 6670639290..7a6a5c5270 100644
--- a/modules/gpuimgproc/src/cuda/match_template.cu
+++ b/modules/gpuimgproc/src/cuda/match_template.cu
@@ -45,7 +45,7 @@
 #include "opencv2/core/cuda/common.hpp"
 #include "opencv2/core/cuda/vec_math.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace match_template
     {
@@ -910,7 +910,7 @@ namespace cv { namespace gpu { namespace cudev
                 cudaSafeCall( cudaDeviceSynchronize() );
         }
     } //namespace match_template
-}}} // namespace cv { namespace gpu { namespace cudev
+}}} // namespace cv { namespace cuda { namespace cudev
 
 
 #endif /* CUDA_DISABLER */
diff --git a/modules/gpuimgproc/src/cuda/mean_shift.cu b/modules/gpuimgproc/src/cuda/mean_shift.cu
index aa82f295e4..62603506c7 100644
--- a/modules/gpuimgproc/src/cuda/mean_shift.cu
+++ b/modules/gpuimgproc/src/cuda/mean_shift.cu
@@ -48,7 +48,7 @@
 #include "opencv2/core/cuda/saturate_cast.hpp"
 #include "opencv2/core/cuda/border_interpolate.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace imgproc
     {
diff --git a/modules/gpuimgproc/src/cvt_color_internal.h b/modules/gpuimgproc/src/cvt_color_internal.h
index 010d832a25..1cd7b1c6a9 100644
--- a/modules/gpuimgproc/src/cvt_color_internal.h
+++ b/modules/gpuimgproc/src/cvt_color_internal.h
@@ -43,7 +43,7 @@
 #ifndef __cvt_color_internal_h__
 #define __cvt_color_internal_h__
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
 #define OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name) \
     void name(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
diff --git a/modules/gpuimgproc/src/generalized_hough.cpp b/modules/gpuimgproc/src/generalized_hough.cpp
index 6adfcb7a26..ffaf275295 100644
--- a/modules/gpuimgproc/src/generalized_hough.cpp
+++ b/modules/gpuimgproc/src/generalized_hough.cpp
@@ -43,17 +43,17 @@
 #include "precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) || !defined(HAVE_OPENCV_GPUARITHM)
 
-Ptr<GeneralizedHoughBallard> cv::gpu::createGeneralizedHoughBallard() { throw_no_cuda(); return Ptr<GeneralizedHoughBallard>(); }
+Ptr<GeneralizedHoughBallard> cv::cuda::createGeneralizedHoughBallard() { throw_no_cuda(); return Ptr<GeneralizedHoughBallard>(); }
 
-Ptr<GeneralizedHoughGuil> cv::gpu::createGeneralizedHoughGuil() { throw_no_cuda(); return Ptr<GeneralizedHoughGuil>(); }
+Ptr<GeneralizedHoughGuil> cv::cuda::createGeneralizedHoughGuil() { throw_no_cuda(); return Ptr<GeneralizedHoughGuil>(); }
 
 #else /* !defined (HAVE_CUDA) */
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace ght
     {
@@ -148,9 +148,9 @@ namespace
         void convertTo(OutputArray positions, OutputArray votes);
 
 #ifdef HAVE_OPENCV_GPUFILTERS
-        Ptr<gpu::CannyEdgeDetector> canny_;
-        Ptr<gpu::Filter> filterDx_;
-        Ptr<gpu::Filter> filterDy_;
+        Ptr<cuda::CannyEdgeDetector> canny_;
+        Ptr<cuda::Filter> filterDx_;
+        Ptr<cuda::Filter> filterDy_;
 #endif
 
         std::vector<float4> oldPosBuf_;
@@ -170,9 +170,9 @@ namespace
         maxBufferSize_ = 10000;
 
 #ifdef HAVE_OPENCV_GPUFILTERS
-        canny_ = gpu::createCannyEdgeDetector(cannyLowThresh_, cannyHighThresh_);
-        filterDx_ = gpu::createSobelFilter(CV_8UC1, CV_32S, 1, 0);
-        filterDy_ = gpu::createSobelFilter(CV_8UC1, CV_32S, 0, 1);
+        canny_ = cuda::createCannyEdgeDetector(cannyLowThresh_, cannyHighThresh_);
+        filterDx_ = cuda::createSobelFilter(CV_8UC1, CV_32S, 1, 0);
+        filterDy_ = cuda::createSobelFilter(CV_8UC1, CV_32S, 0, 1);
 #endif
     }
 
@@ -298,7 +298,7 @@ namespace
 
     void GeneralizedHoughBase::buildEdgePointList(const GpuMat& edges, const GpuMat& dx, const GpuMat& dy)
     {
-        using namespace cv::gpu::cudev::ght;
+        using namespace cv::cuda::cudev::ght;
 
         typedef int (*func_t)(PtrStepSzb edges, PtrStepSzb dx, PtrStepSzb dy, unsigned int* coordList, float* thetaList);
         static const func_t funcs[] =
@@ -493,7 +493,7 @@ namespace
 
     void GeneralizedHoughBallardImpl::processTempl()
     {
-        using namespace cv::gpu::cudev::ght;
+        using namespace cv::cuda::cudev::ght;
 
         CV_Assert( levels_ > 0 );
 
@@ -507,7 +507,7 @@ namespace
         {
             buildRTable_gpu(edgePointList_.ptr<unsigned int>(0), edgePointList_.ptr<float>(1), edgePointList_.cols,
                             r_table_, r_sizes_.ptr<int>(), make_short2(templCenter_.x, templCenter_.y), levels_);
-            gpu::min(r_sizes_, maxBufferSize_, r_sizes_);
+            cuda::min(r_sizes_, maxBufferSize_, r_sizes_);
         }
     }
 
@@ -519,7 +519,7 @@ namespace
 
     void GeneralizedHoughBallardImpl::calcHist()
     {
-        using namespace cv::gpu::cudev::ght;
+        using namespace cv::cuda::cudev::ght;
 
         CV_Assert( levels_ > 0 && r_table_.rows == (levels_ + 1) && r_sizes_.cols == (levels_ + 1) );
         CV_Assert( dp_ > 0.0);
@@ -542,7 +542,7 @@ namespace
 
     void GeneralizedHoughBallardImpl::findPosInHist()
     {
-        using namespace cv::gpu::cudev::ght;
+        using namespace cv::cuda::cudev::ght;
 
         CV_Assert( votesThreshold_ > 0 );
 
@@ -552,7 +552,7 @@ namespace
     }
 }
 
-Ptr<GeneralizedHoughBallard> cv::gpu::createGeneralizedHoughBallard()
+Ptr<GeneralizedHoughBallard> cv::cuda::createGeneralizedHoughBallard()
 {
     return new GeneralizedHoughBallardImpl;
 }
@@ -728,7 +728,7 @@ namespace
 
     void GeneralizedHoughGuilImpl::processTempl()
     {
-        using namespace cv::gpu::cudev::ght;
+        using namespace cv::cuda::cudev::ght;
 
         buildFeatureList(templEdges_, templDx_, templDy_, templFeatures_,
             Guil_Full_setTemplFeatures, Guil_Full_buildTemplFeatureList_gpu,
@@ -741,7 +741,7 @@ namespace
 
     void GeneralizedHoughGuilImpl::processImage()
     {
-        using namespace cv::gpu::cudev::ght;
+        using namespace cv::cuda::cudev::ght;
 
         CV_Assert( levels_ > 0 );
         CV_Assert( templFeatures_.sizes.cols == levels_ + 1 );
@@ -837,7 +837,7 @@ namespace
 
     void GeneralizedHoughGuilImpl::calcOrientation()
     {
-        using namespace cv::gpu::cudev::ght;
+        using namespace cv::cuda::cudev::ght;
 
         const double iAngleStep = 1.0 / angleStep_;
         const int angleRange = cvCeil((maxAngle_ - minAngle_) * iAngleStep);
@@ -861,7 +861,7 @@ namespace
 
     void GeneralizedHoughGuilImpl::calcScale(double angle)
     {
-        using namespace cv::gpu::cudev::ght;
+        using namespace cv::cuda::cudev::ght;
 
         const double iScaleStep = 1.0 / scaleStep_;
         const int scaleRange = cvCeil((maxScale_ - minScale_) * iScaleStep);
@@ -886,7 +886,7 @@ namespace
 
     void GeneralizedHoughGuilImpl::calcPosition(double angle, int angleVotes, double scale, int scaleVotes)
     {
-        using namespace cv::gpu::cudev::ght;
+        using namespace cv::cuda::cudev::ght;
 
         hist_.setTo(Scalar::all(0));
         Guil_Full_calcPHist_gpu(templFeatures_.sizes.ptr<int>(), imageFeatures_.sizes.ptr<int>(0), hist_,
@@ -898,7 +898,7 @@ namespace
     }
 }
 
-Ptr<GeneralizedHoughGuil> cv::gpu::createGeneralizedHoughGuil()
+Ptr<GeneralizedHoughGuil> cv::cuda::createGeneralizedHoughGuil()
 {
     return new GeneralizedHoughGuilImpl;
 }
diff --git a/modules/gpuimgproc/src/gftt.cpp b/modules/gpuimgproc/src/gftt.cpp
index ff197d84c2..350e1f4359 100644
--- a/modules/gpuimgproc/src/gftt.cpp
+++ b/modules/gpuimgproc/src/gftt.cpp
@@ -43,15 +43,15 @@
 #include "precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) || !defined(HAVE_OPENCV_GPUARITHM)
 
-Ptr<gpu::CornersDetector> cv::gpu::createGoodFeaturesToTrackDetector(int, int, double, double, int, bool, double) { throw_no_cuda(); return Ptr<gpu::CornersDetector>(); }
+Ptr<cuda::CornersDetector> cv::cuda::createGoodFeaturesToTrackDetector(int, int, double, double, int, bool, double) { throw_no_cuda(); return Ptr<cuda::CornersDetector>(); }
 
 #else /* !defined (HAVE_CUDA) */
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace gfft
     {
@@ -75,7 +75,7 @@ namespace
         double qualityLevel_;
         double minDistance_;
 
-        Ptr<gpu::CornernessCriteria> cornerCriteria_;
+        Ptr<cuda::CornernessCriteria> cornerCriteria_;
 
         GpuMat Dx_;
         GpuMat Dy_;
@@ -92,13 +92,13 @@ namespace
         CV_Assert( qualityLevel_ > 0 && minDistance_ >= 0 && maxCorners_ >= 0 );
 
         cornerCriteria_ = useHarrisDetector ?
-                    gpu::createHarrisCorner(srcType, blockSize, 3, harrisK) :
-                    gpu::createMinEigenValCorner(srcType, blockSize, 3);
+                    cuda::createHarrisCorner(srcType, blockSize, 3, harrisK) :
+                    cuda::createMinEigenValCorner(srcType, blockSize, 3);
     }
 
     void GoodFeaturesToTrackDetector::detect(InputArray _image, OutputArray _corners, InputArray _mask)
     {
-        using namespace cv::gpu::cudev::gfft;
+        using namespace cv::cuda::cudev::gfft;
 
         GpuMat image = _image.getGpuMat();
         GpuMat mask = _mask.getGpuMat();
@@ -109,7 +109,7 @@ namespace
         cornerCriteria_->compute(image, eig_);
 
         double maxVal = 0;
-        gpu::minMax(eig_, 0, &maxVal, noArray(), minMaxbuf_);
+        cuda::minMax(eig_, 0, &maxVal, noArray(), minMaxbuf_);
 
         ensureSizeIsEnough(1, std::max(1000, static_cast<int>(image.size().area() * 0.05)), CV_32FC2, tmpCorners_);
 
@@ -206,7 +206,7 @@ namespace
     }
 }
 
-Ptr<gpu::CornersDetector> cv::gpu::createGoodFeaturesToTrackDetector(int srcType, int maxCorners, double qualityLevel, double minDistance,
+Ptr<cuda::CornersDetector> cv::cuda::createGoodFeaturesToTrackDetector(int srcType, int maxCorners, double qualityLevel, double minDistance,
                                                                      int blockSize, bool useHarrisDetector, double harrisK)
 {
     return new GoodFeaturesToTrackDetector(srcType, maxCorners, qualityLevel, minDistance, blockSize, useHarrisDetector, harrisK);
diff --git a/modules/gpuimgproc/src/histogram.cpp b/modules/gpuimgproc/src/histogram.cpp
index 1baa892055..92df20baf1 100644
--- a/modules/gpuimgproc/src/histogram.cpp
+++ b/modules/gpuimgproc/src/histogram.cpp
@@ -43,23 +43,23 @@
 #include "precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
 
-void cv::gpu::calcHist(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
+void cv::cuda::calcHist(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
 
-void cv::gpu::equalizeHist(InputArray, OutputArray, InputOutputArray, Stream&) { throw_no_cuda(); }
+void cv::cuda::equalizeHist(InputArray, OutputArray, InputOutputArray, Stream&) { throw_no_cuda(); }
 
-cv::Ptr<cv::gpu::CLAHE> cv::gpu::createCLAHE(double, cv::Size) { throw_no_cuda(); return cv::Ptr<cv::gpu::CLAHE>(); }
+cv::Ptr<cv::cuda::CLAHE> cv::cuda::createCLAHE(double, cv::Size) { throw_no_cuda(); return cv::Ptr<cv::cuda::CLAHE>(); }
 
-void cv::gpu::evenLevels(OutputArray, int, int, int) { throw_no_cuda(); }
+void cv::cuda::evenLevels(OutputArray, int, int, int) { throw_no_cuda(); }
 
-void cv::gpu::histEven(InputArray, OutputArray, InputOutputArray, int, int, int, Stream&) { throw_no_cuda(); }
-void cv::gpu::histEven(InputArray, GpuMat*, InputOutputArray, int*, int*, int*, Stream&) { throw_no_cuda(); }
+void cv::cuda::histEven(InputArray, OutputArray, InputOutputArray, int, int, int, Stream&) { throw_no_cuda(); }
+void cv::cuda::histEven(InputArray, GpuMat*, InputOutputArray, int*, int*, int*, Stream&) { throw_no_cuda(); }
 
-void cv::gpu::histRange(InputArray, OutputArray, InputArray, InputOutputArray, Stream&) { throw_no_cuda(); }
-void cv::gpu::histRange(InputArray, GpuMat*, const GpuMat*, InputOutputArray, Stream&) { throw_no_cuda(); }
+void cv::cuda::histRange(InputArray, OutputArray, InputArray, InputOutputArray, Stream&) { throw_no_cuda(); }
+void cv::cuda::histRange(InputArray, GpuMat*, const GpuMat*, InputOutputArray, Stream&) { throw_no_cuda(); }
 
 #else /* !defined (HAVE_CUDA) */
 
@@ -71,7 +71,7 @@ namespace hist
     void histogram256(PtrStepSzb src, int* hist, cudaStream_t stream);
 }
 
-void cv::gpu::calcHist(InputArray _src, OutputArray _hist, Stream& stream)
+void cv::cuda::calcHist(InputArray _src, OutputArray _hist, Stream& stream)
 {
     GpuMat src = _src.getGpuMat();
 
@@ -93,7 +93,7 @@ namespace hist
     void equalizeHist(PtrStepSzb src, PtrStepSzb dst, const int* lut, cudaStream_t stream);
 }
 
-void cv::gpu::equalizeHist(InputArray _src, OutputArray _dst, InputOutputArray _buf, Stream& _stream)
+void cv::cuda::equalizeHist(InputArray _src, OutputArray _dst, InputOutputArray _buf, Stream& _stream)
 {
     GpuMat src = _src.getGpuMat();
 
@@ -114,7 +114,7 @@ void cv::gpu::equalizeHist(InputArray _src, OutputArray _dst, InputOutputArray _
     GpuMat lut(1, 256, CV_32SC1, buf.data + 256 * sizeof(int));
     GpuMat intBuf(1, intBufSize, CV_8UC1, buf.data + 2 * 256 * sizeof(int));
 
-    gpu::calcHist(src, hist, _stream);
+    cuda::calcHist(src, hist, _stream);
 
     cudaStream_t stream = StreamAccessor::getStream(_stream);
     NppStreamHandler h(stream);
@@ -135,7 +135,7 @@ namespace clahe
 
 namespace
 {
-    class CLAHE_Impl : public cv::gpu::CLAHE
+    class CLAHE_Impl : public cv::cuda::CLAHE
     {
     public:
         CLAHE_Impl(double clipLimit = 40.0, int tilesX = 8, int tilesY = 8);
@@ -205,7 +205,7 @@ namespace
 #ifndef HAVE_OPENCV_GPUARITHM
             throw_no_cuda();
 #else
-            cv::gpu::copyMakeBorder(src, srcExt_, 0, tilesY_ - (src.rows % tilesY_), 0, tilesX_ - (src.cols % tilesX_), cv::BORDER_REFLECT_101, cv::Scalar(), s);
+            cv::cuda::copyMakeBorder(src, srcExt_, 0, tilesY_ - (src.rows % tilesY_), 0, tilesX_ - (src.cols % tilesX_), cv::BORDER_REFLECT_101, cv::Scalar(), s);
 #endif
 
             tileSize = cv::Size(srcExt_.cols / tilesX_, srcExt_.rows / tilesY_);
@@ -255,7 +255,7 @@ namespace
     }
 }
 
-cv::Ptr<cv::gpu::CLAHE> cv::gpu::createCLAHE(double clipLimit, cv::Size tileGridSize)
+cv::Ptr<cv::cuda::CLAHE> cv::cuda::createCLAHE(double clipLimit, cv::Size tileGridSize)
 {
     return new CLAHE_Impl(clipLimit, tileGridSize.width, tileGridSize.height);
 }
@@ -460,7 +460,7 @@ namespace
     };
 }
 
-void cv::gpu::evenLevels(OutputArray _levels, int nLevels, int lowerLevel, int upperLevel)
+void cv::cuda::evenLevels(OutputArray _levels, int nLevels, int lowerLevel, int upperLevel)
 {
     const int kind = _levels.kind();
 
@@ -493,7 +493,7 @@ namespace
     }
 }
 
-void cv::gpu::histEven(InputArray _src, OutputArray hist, InputOutputArray buf, int histSize, int lowerLevel, int upperLevel, Stream& stream)
+void cv::cuda::histEven(InputArray _src, OutputArray hist, InputOutputArray buf, int histSize, int lowerLevel, int upperLevel, Stream& stream)
 {
     typedef void (*hist_t)(const GpuMat& src, OutputArray hist, InputOutputArray buf, int levels, int lowerLevel, int upperLevel, cudaStream_t stream);
     static const hist_t hist_callers[] =
@@ -517,7 +517,7 @@ void cv::gpu::histEven(InputArray _src, OutputArray hist, InputOutputArray buf,
     hist_callers[src.depth()](src, hist, buf, histSize, lowerLevel, upperLevel, StreamAccessor::getStream(stream));
 }
 
-void cv::gpu::histEven(InputArray _src, GpuMat hist[4], InputOutputArray buf, int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream)
+void cv::cuda::histEven(InputArray _src, GpuMat hist[4], InputOutputArray buf, int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream)
 {
     typedef void (*hist_t)(const GpuMat& src, GpuMat hist[4], InputOutputArray buf, int levels[4], int lowerLevel[4], int upperLevel[4], cudaStream_t stream);
     static const hist_t hist_callers[] =
@@ -535,7 +535,7 @@ void cv::gpu::histEven(InputArray _src, GpuMat hist[4], InputOutputArray buf, in
     hist_callers[src.depth()](src, hist, buf, histSize, lowerLevel, upperLevel, StreamAccessor::getStream(stream));
 }
 
-void cv::gpu::histRange(InputArray _src, OutputArray hist, InputArray _levels, InputOutputArray buf, Stream& stream)
+void cv::cuda::histRange(InputArray _src, OutputArray hist, InputArray _levels, InputOutputArray buf, Stream& stream)
 {
     typedef void (*hist_t)(const GpuMat& src, OutputArray hist, const GpuMat& levels, InputOutputArray buf, cudaStream_t stream);
     static const hist_t hist_callers[] =
@@ -556,7 +556,7 @@ void cv::gpu::histRange(InputArray _src, OutputArray hist, InputArray _levels, I
     hist_callers[src.depth()](src, hist, levels, buf, StreamAccessor::getStream(stream));
 }
 
-void cv::gpu::histRange(InputArray _src, GpuMat hist[4], const GpuMat levels[4], InputOutputArray buf, Stream& stream)
+void cv::cuda::histRange(InputArray _src, GpuMat hist[4], const GpuMat levels[4], InputOutputArray buf, Stream& stream)
 {
     typedef void (*hist_t)(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], InputOutputArray buf, cudaStream_t stream);
     static const hist_t hist_callers[] =
diff --git a/modules/gpuimgproc/src/hough_circles.cpp b/modules/gpuimgproc/src/hough_circles.cpp
index fa583c09c7..85e0f9b070 100644
--- a/modules/gpuimgproc/src/hough_circles.cpp
+++ b/modules/gpuimgproc/src/hough_circles.cpp
@@ -43,15 +43,15 @@
 #include "precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) || !defined(HAVE_OPENCV_GPUFILTERS)
 
-Ptr<gpu::HoughCirclesDetector> cv::gpu::createHoughCirclesDetector(float, float, int, int, int, int, int) { throw_no_cuda(); return Ptr<HoughCirclesDetector>(); }
+Ptr<cuda::HoughCirclesDetector> cv::cuda::createHoughCirclesDetector(float, float, int, int, int, int, int) { throw_no_cuda(); return Ptr<HoughCirclesDetector>(); }
 
 #else /* !defined (HAVE_CUDA) */
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace hough
     {
@@ -135,9 +135,9 @@ namespace
         GpuMat accum_;
         GpuMat list_;
         GpuMat result_;
-        Ptr<gpu::Filter> filterDx_;
-        Ptr<gpu::Filter> filterDy_;
-        Ptr<gpu::CannyEdgeDetector> canny_;
+        Ptr<cuda::Filter> filterDx_;
+        Ptr<cuda::Filter> filterDy_;
+        Ptr<cuda::CannyEdgeDetector> canny_;
     };
 
     HoughCirclesDetectorImpl::HoughCirclesDetectorImpl(float dp, float minDist, int cannyThreshold, int votesThreshold,
@@ -145,16 +145,16 @@ namespace
         dp_(dp), minDist_(minDist), cannyThreshold_(cannyThreshold), votesThreshold_(votesThreshold),
         minRadius_(minRadius), maxRadius_(maxRadius), maxCircles_(maxCircles)
     {
-        canny_ = gpu::createCannyEdgeDetector(std::max(cannyThreshold_ / 2, 1), cannyThreshold_);
+        canny_ = cuda::createCannyEdgeDetector(std::max(cannyThreshold_ / 2, 1), cannyThreshold_);
 
-        filterDx_ = gpu::createSobelFilter(CV_8UC1, CV_32S, 1, 0);
-        filterDy_ = gpu::createSobelFilter(CV_8UC1, CV_32S, 0, 1);
+        filterDx_ = cuda::createSobelFilter(CV_8UC1, CV_32S, 1, 0);
+        filterDy_ = cuda::createSobelFilter(CV_8UC1, CV_32S, 0, 1);
     }
 
     void HoughCirclesDetectorImpl::detect(InputArray _src, OutputArray circles)
     {
-        using namespace cv::gpu::cudev::hough;
-        using namespace cv::gpu::cudev::hough_circles;
+        using namespace cv::cuda::cudev::hough;
+        using namespace cv::cuda::cudev::hough_circles;
 
         GpuMat src = _src.getGpuMat();
 
@@ -289,7 +289,7 @@ namespace
     }
 }
 
-Ptr<HoughCirclesDetector> cv::gpu::createHoughCirclesDetector(float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles)
+Ptr<HoughCirclesDetector> cv::cuda::createHoughCirclesDetector(float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles)
 {
     return new HoughCirclesDetectorImpl(dp, minDist, cannyThreshold, votesThreshold, minRadius, maxRadius, maxCircles);
 }
diff --git a/modules/gpuimgproc/src/hough_lines.cpp b/modules/gpuimgproc/src/hough_lines.cpp
index e0dec305d9..562d624c42 100644
--- a/modules/gpuimgproc/src/hough_lines.cpp
+++ b/modules/gpuimgproc/src/hough_lines.cpp
@@ -43,15 +43,15 @@
 #include "precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
 
-Ptr<gpu::HoughLinesDetector> cv::gpu::createHoughLinesDetector(float, float, int, bool, int) { throw_no_cuda(); return Ptr<HoughLinesDetector>(); }
+Ptr<cuda::HoughLinesDetector> cv::cuda::createHoughLinesDetector(float, float, int, bool, int) { throw_no_cuda(); return Ptr<HoughLinesDetector>(); }
 
 #else /* !defined (HAVE_CUDA) */
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace hough
     {
@@ -127,8 +127,8 @@ namespace
 
     void HoughLinesDetectorImpl::detect(InputArray _src, OutputArray lines)
     {
-        using namespace cv::gpu::cudev::hough;
-        using namespace cv::gpu::cudev::hough_lines;
+        using namespace cv::cuda::cudev::hough;
+        using namespace cv::cuda::cudev::hough_lines;
 
         GpuMat src = _src.getGpuMat();
 
@@ -194,7 +194,7 @@ namespace
     }
 }
 
-Ptr<HoughLinesDetector> cv::gpu::createHoughLinesDetector(float rho, float theta, int threshold, bool doSort, int maxLines)
+Ptr<HoughLinesDetector> cv::cuda::createHoughLinesDetector(float rho, float theta, int threshold, bool doSort, int maxLines)
 {
     return new HoughLinesDetectorImpl(rho, theta, threshold, doSort, maxLines);
 }
diff --git a/modules/gpuimgproc/src/hough_segments.cpp b/modules/gpuimgproc/src/hough_segments.cpp
index 1f11be68b2..1ce65949eb 100644
--- a/modules/gpuimgproc/src/hough_segments.cpp
+++ b/modules/gpuimgproc/src/hough_segments.cpp
@@ -43,15 +43,15 @@
 #include "precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
 
-Ptr<gpu::HoughSegmentDetector> cv::gpu::createHoughSegmentDetector(float, float, int, int, int) { throw_no_cuda(); return Ptr<HoughSegmentDetector>(); }
+Ptr<cuda::HoughSegmentDetector> cv::cuda::createHoughSegmentDetector(float, float, int, int, int) { throw_no_cuda(); return Ptr<HoughSegmentDetector>(); }
 
 #else /* !defined (HAVE_CUDA) */
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace hough
     {
@@ -130,9 +130,9 @@ namespace
 
     void HoughSegmentDetectorImpl::detect(InputArray _src, OutputArray lines)
     {
-        using namespace cv::gpu::cudev::hough;
-        using namespace cv::gpu::cudev::hough_lines;
-        using namespace cv::gpu::cudev::hough_segments;
+        using namespace cv::cuda::cudev::hough;
+        using namespace cv::cuda::cudev::hough_lines;
+        using namespace cv::cuda::cudev::hough_segments;
 
         GpuMat src = _src.getGpuMat();
 
@@ -175,7 +175,7 @@ namespace
     }
 }
 
-Ptr<HoughSegmentDetector> cv::gpu::createHoughSegmentDetector(float rho, float theta, int minLineLength, int maxLineGap, int maxLines)
+Ptr<HoughSegmentDetector> cv::cuda::createHoughSegmentDetector(float rho, float theta, int minLineLength, int maxLineGap, int maxLines)
 {
     return new HoughSegmentDetectorImpl(rho, theta, minLineLength, maxLineGap, maxLines);
 }
diff --git a/modules/gpuimgproc/src/match_template.cpp b/modules/gpuimgproc/src/match_template.cpp
index 2b5d5cb1ca..66820026a6 100644
--- a/modules/gpuimgproc/src/match_template.cpp
+++ b/modules/gpuimgproc/src/match_template.cpp
@@ -43,15 +43,15 @@
 #include "precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 #if !defined (HAVE_CUDA) || !defined (HAVE_OPENCV_GPUARITHM) || defined (CUDA_DISABLER)
 
-Ptr<gpu::TemplateMatching> cv::gpu::createTemplateMatching(int, int, Size) { throw_no_cuda(); return Ptr<gpu::TemplateMatching>(); }
+Ptr<cuda::TemplateMatching> cv::cuda::createTemplateMatching(int, int, Size) { throw_no_cuda(); return Ptr<cuda::TemplateMatching>(); }
 
 #else
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace match_template
     {
@@ -171,18 +171,18 @@ namespace
         void match(InputArray image, InputArray templ, OutputArray result, Stream& stream = Stream::Null());
 
     private:
-        Ptr<gpu::Convolution> conv_;
+        Ptr<cuda::Convolution> conv_;
         GpuMat result_;
     };
 
     Match_CCORR_32F::Match_CCORR_32F(Size user_block_size)
     {
-        conv_ = gpu::createConvolution(user_block_size);
+        conv_ = cuda::createConvolution(user_block_size);
     }
 
     void Match_CCORR_32F::match(InputArray _image, InputArray _templ, OutputArray _result, Stream& _stream)
     {
-        using namespace cv::gpu::cudev::match_template;
+        using namespace cv::cuda::cudev::match_template;
 
         GpuMat image = _image.getGpuMat();
         GpuMat templ = _templ.getGpuMat();
@@ -232,7 +232,7 @@ namespace
 
     void Match_CCORR_8U::match(InputArray _image, InputArray _templ, OutputArray _result, Stream& stream)
     {
-        using namespace cv::gpu::cudev::match_template;
+        using namespace cv::cuda::cudev::match_template;
 
         GpuMat image = _image.getGpuMat();
         GpuMat templ = _templ.getGpuMat();
@@ -276,7 +276,7 @@ namespace
 
     void Match_CCORR_NORMED_8U::match(InputArray _image, InputArray _templ, OutputArray _result, Stream& stream)
     {
-        using namespace cv::gpu::cudev::match_template;
+        using namespace cv::cuda::cudev::match_template;
 
         GpuMat image = _image.getGpuMat();
         GpuMat templ = _templ.getGpuMat();
@@ -288,9 +288,9 @@ namespace
         match_CCORR_.match(image, templ, _result, stream);
         GpuMat result = _result.getGpuMat();
 
-        gpu::sqrIntegral(image.reshape(1), image_sqsums_, intBuffer_, stream);
+        cuda::sqrIntegral(image.reshape(1), image_sqsums_, intBuffer_, stream);
 
-        unsigned long long templ_sqsum = (unsigned long long) gpu::sqrSum(templ.reshape(1))[0];
+        unsigned long long templ_sqsum = (unsigned long long) cuda::sqrSum(templ.reshape(1))[0];
 
         normalize_8U(templ.cols, templ.rows, image_sqsums_, templ_sqsum, result, image.channels(), StreamAccessor::getStream(stream));
     }
@@ -306,7 +306,7 @@ namespace
 
     void Match_SQDIFF_32F::match(InputArray _image, InputArray _templ, OutputArray _result, Stream& stream)
     {
-        using namespace cv::gpu::cudev::match_template;
+        using namespace cv::cuda::cudev::match_template;
 
         GpuMat image = _image.getGpuMat();
         GpuMat templ = _templ.getGpuMat();
@@ -341,7 +341,7 @@ namespace
 
     void Match_SQDIFF_8U::match(InputArray _image, InputArray _templ, OutputArray _result, Stream& stream)
     {
-        using namespace cv::gpu::cudev::match_template;
+        using namespace cv::cuda::cudev::match_template;
 
         GpuMat image = _image.getGpuMat();
         GpuMat templ = _templ.getGpuMat();
@@ -359,9 +359,9 @@ namespace
             return;
         }
 
-        gpu::sqrIntegral(image.reshape(1), image_sqsums_, intBuffer_, stream);
+        cuda::sqrIntegral(image.reshape(1), image_sqsums_, intBuffer_, stream);
 
-        unsigned long long templ_sqsum = (unsigned long long) gpu::sqrSum(templ.reshape(1))[0];
+        unsigned long long templ_sqsum = (unsigned long long) cuda::sqrSum(templ.reshape(1))[0];
 
         match_CCORR_.match(image, templ, _result, stream);
         GpuMat result = _result.getGpuMat();
@@ -389,7 +389,7 @@ namespace
 
     void Match_SQDIFF_NORMED_8U::match(InputArray _image, InputArray _templ, OutputArray _result, Stream& stream)
     {
-        using namespace cv::gpu::cudev::match_template;
+        using namespace cv::cuda::cudev::match_template;
 
         GpuMat image = _image.getGpuMat();
         GpuMat templ = _templ.getGpuMat();
@@ -398,9 +398,9 @@ namespace
         CV_Assert( image.type() == templ.type() );
         CV_Assert( image.cols >= templ.cols && image.rows >= templ.rows );
 
-        gpu::sqrIntegral(image.reshape(1), image_sqsums_, intBuffer_, stream);
+        cuda::sqrIntegral(image.reshape(1), image_sqsums_, intBuffer_, stream);
 
-        unsigned long long templ_sqsum = (unsigned long long) gpu::sqrSum(templ.reshape(1))[0];
+        unsigned long long templ_sqsum = (unsigned long long) cuda::sqrSum(templ.reshape(1))[0];
 
         match_CCORR_.match(image, templ, _result, stream);
         GpuMat result = _result.getGpuMat();
@@ -429,7 +429,7 @@ namespace
 
     void Match_CCOEFF_8U::match(InputArray _image, InputArray _templ, OutputArray _result, Stream& stream)
     {
-        using namespace cv::gpu::cudev::match_template;
+        using namespace cv::cuda::cudev::match_template;
 
         GpuMat image = _image.getGpuMat();
         GpuMat templ = _templ.getGpuMat();
@@ -444,21 +444,21 @@ namespace
         if (image.channels() == 1)
         {
             image_sums_.resize(1);
-            gpu::integral(image, image_sums_[0], intBuffer_, stream);
+            cuda::integral(image, image_sums_[0], intBuffer_, stream);
 
-            unsigned int templ_sum = (unsigned int) gpu::sum(templ)[0];
+            unsigned int templ_sum = (unsigned int) cuda::sum(templ)[0];
 
             matchTemplatePrepared_CCOFF_8U(templ.cols, templ.rows, image_sums_[0], templ_sum, result, StreamAccessor::getStream(stream));
         }
         else
         {
-            gpu::split(image, images_);
+            cuda::split(image, images_);
 
             image_sums_.resize(images_.size());
             for (int i = 0; i < image.channels(); ++i)
-                gpu::integral(images_[i], image_sums_[i], intBuffer_, stream);
+                cuda::integral(images_[i], image_sums_[i], intBuffer_, stream);
 
-            Scalar templ_sum = gpu::sum(templ);
+            Scalar templ_sum = cuda::sum(templ);
 
             switch (image.channels())
             {
@@ -509,7 +509,7 @@ namespace
 
     void Match_CCOEFF_NORMED_8U::match(InputArray _image, InputArray _templ, OutputArray _result, Stream& stream)
     {
-        using namespace cv::gpu::cudev::match_template;
+        using namespace cv::cuda::cudev::match_template;
 
         GpuMat image = _image.getGpuMat();
         GpuMat templ = _templ.getGpuMat();
@@ -527,13 +527,13 @@ namespace
         if (image.channels() == 1)
         {
             image_sums_.resize(1);
-            gpu::integral(image, image_sums_[0], intBuffer_, stream);
+            cuda::integral(image, image_sums_[0], intBuffer_, stream);
 
             image_sqsums_.resize(1);
-            gpu::sqrIntegral(image, image_sqsums_[0], intBuffer_, stream);
+            cuda::sqrIntegral(image, image_sqsums_[0], intBuffer_, stream);
 
-            unsigned int templ_sum = (unsigned int) gpu::sum(templ)[0];
-            unsigned long long templ_sqsum = (unsigned long long) gpu::sqrSum(templ)[0];
+            unsigned int templ_sum = (unsigned int) cuda::sum(templ)[0];
+            unsigned long long templ_sqsum = (unsigned long long) cuda::sqrSum(templ)[0];
 
             matchTemplatePrepared_CCOFF_NORMED_8U(
                     templ.cols, templ.rows, image_sums_[0], image_sqsums_[0],
@@ -541,18 +541,18 @@ namespace
         }
         else
         {
-            gpu::split(image, images_);
+            cuda::split(image, images_);
 
             image_sums_.resize(images_.size());
             image_sqsums_.resize(images_.size());
             for (int i = 0; i < image.channels(); ++i)
             {
-                gpu::integral(images_[i], image_sums_[i], intBuffer_, stream);
-                gpu::sqrIntegral(images_[i], image_sqsums_[i], intBuffer_, stream);
+                cuda::integral(images_[i], image_sums_[i], intBuffer_, stream);
+                cuda::sqrIntegral(images_[i], image_sqsums_[i], intBuffer_, stream);
             }
 
-            Scalar templ_sum = gpu::sum(templ);
-            Scalar templ_sqsum = gpu::sqrSum(templ);
+            Scalar templ_sum = cuda::sum(templ);
+            Scalar templ_sqsum = cuda::sqrSum(templ);
 
             switch (image.channels())
             {
@@ -596,7 +596,7 @@ namespace
     }
 }
 
-Ptr<gpu::TemplateMatching> cv::gpu::createTemplateMatching(int srcType, int method, Size user_block_size)
+Ptr<cuda::TemplateMatching> cv::cuda::createTemplateMatching(int srcType, int method, Size user_block_size)
 {
     const int sdepth = CV_MAT_DEPTH(srcType);
 
@@ -614,7 +614,7 @@ Ptr<gpu::TemplateMatching> cv::gpu::createTemplateMatching(int srcType, int meth
 
         default:
             CV_Error( Error::StsBadFlag, "Unsopported method" );
-            return Ptr<gpu::TemplateMatching>();
+            return Ptr<cuda::TemplateMatching>();
         }
     }
     else
@@ -641,7 +641,7 @@ Ptr<gpu::TemplateMatching> cv::gpu::createTemplateMatching(int srcType, int meth
 
         default:
             CV_Error( Error::StsBadFlag, "Unsopported method" );
-            return Ptr<gpu::TemplateMatching>();
+            return Ptr<cuda::TemplateMatching>();
         }
     }
 }
diff --git a/modules/gpuimgproc/src/mean_shift.cpp b/modules/gpuimgproc/src/mean_shift.cpp
index 26368ca5ac..cbc0584392 100644
--- a/modules/gpuimgproc/src/mean_shift.cpp
+++ b/modules/gpuimgproc/src/mean_shift.cpp
@@ -43,19 +43,19 @@
 #include "precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
 
-void cv::gpu::meanShiftFiltering(InputArray, OutputArray, int, int, TermCriteria, Stream&) { throw_no_cuda(); }
-void cv::gpu::meanShiftProc(InputArray, OutputArray, OutputArray, int, int, TermCriteria, Stream&) { throw_no_cuda(); }
+void cv::cuda::meanShiftFiltering(InputArray, OutputArray, int, int, TermCriteria, Stream&) { throw_no_cuda(); }
+void cv::cuda::meanShiftProc(InputArray, OutputArray, OutputArray, int, int, TermCriteria, Stream&) { throw_no_cuda(); }
 
 #else /* !defined (HAVE_CUDA) */
 
 ////////////////////////////////////////////////////////////////////////
 // meanShiftFiltering
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace imgproc
     {
@@ -63,9 +63,9 @@ namespace cv { namespace gpu { namespace cudev
     }
 }}}
 
-void cv::gpu::meanShiftFiltering(InputArray _src, OutputArray _dst, int sp, int sr, TermCriteria criteria, Stream& stream)
+void cv::cuda::meanShiftFiltering(InputArray _src, OutputArray _dst, int sp, int sr, TermCriteria criteria, Stream& stream)
 {
-    using namespace ::cv::gpu::cudev::imgproc;
+    using namespace ::cv::cuda::cudev::imgproc;
 
     GpuMat src = _src.getGpuMat();
 
@@ -90,7 +90,7 @@ void cv::gpu::meanShiftFiltering(InputArray _src, OutputArray _dst, int sp, int
 ////////////////////////////////////////////////////////////////////////
 // meanShiftProc_GPU
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace imgproc
     {
@@ -98,9 +98,9 @@ namespace cv { namespace gpu { namespace cudev
     }
 }}}
 
-void cv::gpu::meanShiftProc(InputArray _src, OutputArray _dstr, OutputArray _dstsp, int sp, int sr, TermCriteria criteria, Stream& stream)
+void cv::cuda::meanShiftProc(InputArray _src, OutputArray _dstr, OutputArray _dstsp, int sp, int sr, TermCriteria criteria, Stream& stream)
 {
-    using namespace ::cv::gpu::cudev::imgproc;
+    using namespace ::cv::cuda::cudev::imgproc;
 
     GpuMat src = _src.getGpuMat();
 
diff --git a/modules/gpuimgproc/src/mssegmentation.cpp b/modules/gpuimgproc/src/mssegmentation.cpp
index ec1c5feb4b..ad5819800e 100644
--- a/modules/gpuimgproc/src/mssegmentation.cpp
+++ b/modules/gpuimgproc/src/mssegmentation.cpp
@@ -43,7 +43,7 @@
 
 #if !defined HAVE_CUDA || defined(CUDA_DISABLER)
 
-void cv::gpu::meanShiftSegmentation(InputArray, OutputArray, int, int, int, TermCriteria) { throw_no_cuda(); }
+void cv::cuda::meanShiftSegmentation(InputArray, OutputArray, int, int, int, TermCriteria) { throw_no_cuda(); }
 
 #else
 
@@ -222,7 +222,7 @@ inline int dist2(const cv::Vec2s& lhs, const cv::Vec2s& rhs)
 } // anonymous namespace
 
 
-void cv::gpu::meanShiftSegmentation(InputArray _src, OutputArray _dst, int sp, int sr, int minsize, TermCriteria criteria)
+void cv::cuda::meanShiftSegmentation(InputArray _src, OutputArray _dst, int sp, int sr, int minsize, TermCriteria criteria)
 {
     GpuMat src = _src.getGpuMat();
 
@@ -235,7 +235,7 @@ void cv::gpu::meanShiftSegmentation(InputArray _src, OutputArray _dst, int sp, i
 
     // Perform mean shift procedure and obtain region and spatial maps
     GpuMat d_rmap, d_spmap;
-    gpu::meanShiftProc(src, d_rmap, d_spmap, sp, sr, criteria);
+    cuda::meanShiftProc(src, d_rmap, d_spmap, sp, sr, criteria);
     Mat rmap(d_rmap);
     Mat spmap(d_spmap);
 
diff --git a/modules/gpuimgproc/test/test_bilateral_filter.cpp b/modules/gpuimgproc/test/test_bilateral_filter.cpp
index 23dd3b85d6..7f697a75b2 100644
--- a/modules/gpuimgproc/test/test_bilateral_filter.cpp
+++ b/modules/gpuimgproc/test/test_bilateral_filter.cpp
@@ -49,9 +49,9 @@ using namespace cvtest;
 ////////////////////////////////////////////////////////
 // BilateralFilter
 
-PARAM_TEST_CASE(BilateralFilter, cv::gpu::DeviceInfo, cv::Size, MatType)
+PARAM_TEST_CASE(BilateralFilter, cv::cuda::DeviceInfo, cv::Size, MatType)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int type;
     int kernel_size;
@@ -68,7 +68,7 @@ PARAM_TEST_CASE(BilateralFilter, cv::gpu::DeviceInfo, cv::Size, MatType)
         sigma_color = 10.f;
         sigma_spatial = 3.5f;
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -77,9 +77,9 @@ GPU_TEST_P(BilateralFilter, Accuracy)
     cv::Mat src = randomMat(size, type);
 
     src.convertTo(src, type);
-    cv::gpu::GpuMat dst;
+    cv::cuda::GpuMat dst;
 
-    cv::gpu::bilateralFilter(loadMat(src), dst, kernel_size, sigma_color, sigma_spatial);
+    cv::cuda::bilateralFilter(loadMat(src), dst, kernel_size, sigma_color, sigma_spatial);
 
     cv::Mat dst_gold;
     cv::bilateralFilter(src, dst_gold, kernel_size, sigma_color, sigma_spatial);
diff --git a/modules/gpuimgproc/test/test_blend.cpp b/modules/gpuimgproc/test/test_blend.cpp
index 87359b500c..abbbcf193b 100644
--- a/modules/gpuimgproc/test/test_blend.cpp
+++ b/modules/gpuimgproc/test/test_blend.cpp
@@ -76,9 +76,9 @@ namespace
     }
 }
 
-PARAM_TEST_CASE(Blend, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi)
+PARAM_TEST_CASE(Blend, cv::cuda::DeviceInfo, cv::Size, MatType, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int type;
     bool useRoi;
@@ -90,7 +90,7 @@ PARAM_TEST_CASE(Blend, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi)
         type = GET_PARAM(2);
         useRoi = GET_PARAM(3);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -103,8 +103,8 @@ GPU_TEST_P(Blend, Accuracy)
     cv::Mat weights1 = randomMat(size, CV_32F, 0, 1);
     cv::Mat weights2 = randomMat(size, CV_32F, 0, 1);
 
-    cv::gpu::GpuMat result;
-    cv::gpu::blendLinear(loadMat(img1, useRoi), loadMat(img2, useRoi), loadMat(weights1, useRoi), loadMat(weights2, useRoi), result);
+    cv::cuda::GpuMat result;
+    cv::cuda::blendLinear(loadMat(img1, useRoi), loadMat(img2, useRoi), loadMat(weights1, useRoi), loadMat(weights2, useRoi), result);
 
     cv::Mat result_gold;
     if (depth == CV_8U)
diff --git a/modules/gpuimgproc/test/test_canny.cpp b/modules/gpuimgproc/test/test_canny.cpp
index 3d9d350165..1d426527ac 100644
--- a/modules/gpuimgproc/test/test_canny.cpp
+++ b/modules/gpuimgproc/test/test_canny.cpp
@@ -55,9 +55,9 @@ namespace
     IMPLEMENT_PARAM_CLASS(L2gradient, bool)
 }
 
-PARAM_TEST_CASE(Canny, cv::gpu::DeviceInfo, AppertureSize, L2gradient, UseRoi)
+PARAM_TEST_CASE(Canny, cv::cuda::DeviceInfo, AppertureSize, L2gradient, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     int apperture_size;
     bool useL2gradient;
     bool useRoi;
@@ -69,7 +69,7 @@ PARAM_TEST_CASE(Canny, cv::gpu::DeviceInfo, AppertureSize, L2gradient, UseRoi)
         useL2gradient = GET_PARAM(2);
         useRoi = GET_PARAM(3);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -81,9 +81,9 @@ GPU_TEST_P(Canny, Accuracy)
     double low_thresh = 50.0;
     double high_thresh = 100.0;
 
-    cv::Ptr<cv::gpu::CannyEdgeDetector> canny = cv::gpu::createCannyEdgeDetector(low_thresh, high_thresh, apperture_size, useL2gradient);
+    cv::Ptr<cv::cuda::CannyEdgeDetector> canny = cv::cuda::createCannyEdgeDetector(low_thresh, high_thresh, apperture_size, useL2gradient);
 
-    cv::gpu::GpuMat edges;
+    cv::cuda::GpuMat edges;
     canny->detect(loadMat(img, useRoi), edges);
 
     cv::Mat edges_gold;
diff --git a/modules/gpuimgproc/test/test_color.cpp b/modules/gpuimgproc/test/test_color.cpp
index 4bd53c9194..85a72c798d 100644
--- a/modules/gpuimgproc/test/test_color.cpp
+++ b/modules/gpuimgproc/test/test_color.cpp
@@ -49,9 +49,9 @@ using namespace cvtest;
 ///////////////////////////////////////////////////////////////////////////////////////////////////////
 // cvtColor
 
-PARAM_TEST_CASE(CvtColor, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
+PARAM_TEST_CASE(CvtColor, cv::cuda::DeviceInfo, cv::Size, MatDepth, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int depth;
     bool useRoi;
@@ -65,7 +65,7 @@ PARAM_TEST_CASE(CvtColor, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
         depth = GET_PARAM(2);
         useRoi = GET_PARAM(3);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
 
         img = randomMat(size, CV_MAKE_TYPE(depth, 3), 0.0, depth == CV_32F ? 1.0 : 255.0);
     }
@@ -75,8 +75,8 @@ GPU_TEST_P(CvtColor, BGR2RGB)
 {
     cv::Mat src = img;
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR2RGB);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR2RGB);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_BGR2RGB);
@@ -88,8 +88,8 @@ GPU_TEST_P(CvtColor, BGR2RGBA)
 {
     cv::Mat src = img;
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR2RGBA);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR2RGBA);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_BGR2RGBA);
@@ -101,8 +101,8 @@ GPU_TEST_P(CvtColor, BGR2BGRA)
 {
     cv::Mat src = img;
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR2BGRA);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR2BGRA);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_BGR2BGRA);
@@ -115,8 +115,8 @@ GPU_TEST_P(CvtColor, BGRA2RGB)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2BGRA);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGRA2RGB);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGRA2RGB);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_BGRA2RGB);
@@ -129,8 +129,8 @@ GPU_TEST_P(CvtColor, BGRA2BGR)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2BGRA);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGRA2BGR);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGRA2BGR);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_BGRA2BGR);
@@ -143,8 +143,8 @@ GPU_TEST_P(CvtColor, BGRA2RGBA)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2BGRA);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGRA2RGBA);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGRA2RGBA);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_BGRA2RGBA);
@@ -156,8 +156,8 @@ GPU_TEST_P(CvtColor, BGR2GRAY)
 {
     cv::Mat src = img;
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR2GRAY);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR2GRAY);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_BGR2GRAY);
@@ -169,8 +169,8 @@ GPU_TEST_P(CvtColor, RGB2GRAY)
 {
     cv::Mat src = img;
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_RGB2GRAY);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_RGB2GRAY);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_RGB2GRAY);
@@ -183,8 +183,8 @@ GPU_TEST_P(CvtColor, GRAY2BGR)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2GRAY);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_GRAY2BGR);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_GRAY2BGR);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_GRAY2BGR);
@@ -197,8 +197,8 @@ GPU_TEST_P(CvtColor, GRAY2BGRA)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2GRAY);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_GRAY2BGRA, 4);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_GRAY2BGRA, 4);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_GRAY2BGRA, 4);
@@ -211,8 +211,8 @@ GPU_TEST_P(CvtColor, BGRA2GRAY)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2BGRA);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGRA2GRAY);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGRA2GRAY);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_BGRA2GRAY);
@@ -225,8 +225,8 @@ GPU_TEST_P(CvtColor, RGBA2GRAY)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2RGBA);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_RGBA2GRAY);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_RGBA2GRAY);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_RGBA2GRAY);
@@ -241,8 +241,8 @@ GPU_TEST_P(CvtColor, BGR2BGR565)
 
     cv::Mat src = img;
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR2BGR565);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR2BGR565);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_BGR2BGR565);
@@ -257,8 +257,8 @@ GPU_TEST_P(CvtColor, RGB2BGR565)
 
     cv::Mat src = img;
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_RGB2BGR565);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_RGB2BGR565);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_RGB2BGR565);
@@ -274,8 +274,8 @@ GPU_TEST_P(CvtColor, BGR5652BGR)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2BGR565);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR5652BGR);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR5652BGR);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_BGR5652BGR);
@@ -291,8 +291,8 @@ GPU_TEST_P(CvtColor, BGR5652RGB)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2BGR565);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR5652RGB);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR5652RGB);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_BGR5652RGB);
@@ -308,8 +308,8 @@ GPU_TEST_P(CvtColor, BGRA2BGR565)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2BGRA);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGRA2BGR565);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGRA2BGR565);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_BGRA2BGR565);
@@ -325,8 +325,8 @@ GPU_TEST_P(CvtColor, RGBA2BGR565)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2RGBA);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_RGBA2BGR565);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_RGBA2BGR565);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_RGBA2BGR565);
@@ -342,8 +342,8 @@ GPU_TEST_P(CvtColor, BGR5652BGRA)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2BGR565);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR5652BGRA, 4);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR5652BGRA, 4);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_BGR5652BGRA, 4);
@@ -359,8 +359,8 @@ GPU_TEST_P(CvtColor, BGR5652RGBA)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2BGR565);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR5652RGBA, 4);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR5652RGBA, 4);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_BGR5652RGBA, 4);
@@ -376,8 +376,8 @@ GPU_TEST_P(CvtColor, GRAY2BGR565)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2GRAY);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_GRAY2BGR565);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_GRAY2BGR565);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_GRAY2BGR565);
@@ -393,8 +393,8 @@ GPU_TEST_P(CvtColor, BGR5652GRAY)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2BGR565);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR5652GRAY);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR5652GRAY);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_BGR5652GRAY);
@@ -409,8 +409,8 @@ GPU_TEST_P(CvtColor, BGR2BGR555)
 
     cv::Mat src = img;
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR2BGR555);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR2BGR555);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_BGR2BGR555);
@@ -425,8 +425,8 @@ GPU_TEST_P(CvtColor, RGB2BGR555)
 
     cv::Mat src = img;
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_RGB2BGR555);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_RGB2BGR555);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_RGB2BGR555);
@@ -442,8 +442,8 @@ GPU_TEST_P(CvtColor, BGR5552BGR)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2BGR555);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR5552BGR);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR5552BGR);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_BGR5552BGR);
@@ -459,8 +459,8 @@ GPU_TEST_P(CvtColor, BGR5552RGB)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2BGR555);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR5552RGB);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR5552RGB);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_BGR5552RGB);
@@ -476,8 +476,8 @@ GPU_TEST_P(CvtColor, BGRA2BGR555)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2BGRA);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGRA2BGR555);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGRA2BGR555);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_BGRA2BGR555);
@@ -493,8 +493,8 @@ GPU_TEST_P(CvtColor, RGBA2BGR555)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2RGBA);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_RGBA2BGR555);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_RGBA2BGR555);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_RGBA2BGR555);
@@ -510,8 +510,8 @@ GPU_TEST_P(CvtColor, BGR5552BGRA)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2BGR555);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR5552BGRA, 4);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR5552BGRA, 4);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_BGR5552BGRA, 4);
@@ -527,8 +527,8 @@ GPU_TEST_P(CvtColor, BGR5552RGBA)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2BGR555);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR5552RGBA, 4);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR5552RGBA, 4);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_BGR5552RGBA, 4);
@@ -544,8 +544,8 @@ GPU_TEST_P(CvtColor, GRAY2BGR555)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2GRAY);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_GRAY2BGR555);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_GRAY2BGR555);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_GRAY2BGR555);
@@ -561,8 +561,8 @@ GPU_TEST_P(CvtColor, BGR5552GRAY)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2BGR555);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR5552GRAY);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR5552GRAY);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_BGR5552GRAY);
@@ -574,8 +574,8 @@ GPU_TEST_P(CvtColor, BGR2XYZ)
 {
     cv::Mat src = img;
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR2XYZ);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR2XYZ);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_BGR2XYZ);
@@ -587,8 +587,8 @@ GPU_TEST_P(CvtColor, RGB2XYZ)
 {
     cv::Mat src = img;
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_RGB2XYZ);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_RGB2XYZ);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_RGB2XYZ);
@@ -600,8 +600,8 @@ GPU_TEST_P(CvtColor, BGR2XYZ4)
 {
     cv::Mat src = img;
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR2XYZ, 4);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR2XYZ, 4);
 
     ASSERT_EQ(4, dst.channels());
 
@@ -622,8 +622,8 @@ GPU_TEST_P(CvtColor, BGRA2XYZ4)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2BGRA);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR2XYZ, 4);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR2XYZ, 4);
 
     ASSERT_EQ(4, dst.channels());
 
@@ -644,8 +644,8 @@ GPU_TEST_P(CvtColor, XYZ2BGR)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2XYZ);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_XYZ2BGR);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_XYZ2BGR);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_XYZ2BGR);
@@ -658,8 +658,8 @@ GPU_TEST_P(CvtColor, XYZ2RGB)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2XYZ);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_XYZ2RGB);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_XYZ2RGB);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_XYZ2RGB);
@@ -680,8 +680,8 @@ GPU_TEST_P(CvtColor, XYZ42BGR)
     channels[3] = cv::Mat(src.size(), depth, cv::Scalar::all(0));
     cv::merge(channels, 4, src);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_XYZ2BGR);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_XYZ2BGR);
 
     EXPECT_MAT_NEAR(dst_gold, dst, 1e-5);
 }
@@ -699,8 +699,8 @@ GPU_TEST_P(CvtColor, XYZ42BGRA)
     channels[3] = cv::Mat(src.size(), depth, cv::Scalar::all(0));
     cv::merge(channels, 4, src);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_XYZ2BGR, 4);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_XYZ2BGR, 4);
 
     EXPECT_MAT_NEAR(dst_gold, dst, 1e-5);
 }
@@ -709,8 +709,8 @@ GPU_TEST_P(CvtColor, BGR2YCrCb)
 {
     cv::Mat src = img;
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR2YCrCb);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR2YCrCb);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_BGR2YCrCb);
@@ -722,8 +722,8 @@ GPU_TEST_P(CvtColor, RGB2YCrCb)
 {
     cv::Mat src = img;
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_RGB2YCrCb);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_RGB2YCrCb);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_RGB2YCrCb);
@@ -735,8 +735,8 @@ GPU_TEST_P(CvtColor, BGR2YCrCb4)
 {
     cv::Mat src = img;
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR2YCrCb, 4);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR2YCrCb, 4);
 
     ASSERT_EQ(4, dst.channels());
 
@@ -757,8 +757,8 @@ GPU_TEST_P(CvtColor, RGBA2YCrCb4)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2RGBA);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR2YCrCb, 4);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR2YCrCb, 4);
 
     ASSERT_EQ(4, dst.channels());
 
@@ -779,8 +779,8 @@ GPU_TEST_P(CvtColor, YCrCb2BGR)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2YCrCb);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_YCrCb2BGR);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_YCrCb2BGR);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_YCrCb2BGR);
@@ -793,8 +793,8 @@ GPU_TEST_P(CvtColor, YCrCb2RGB)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2YCrCb);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_YCrCb2RGB);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_YCrCb2RGB);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_YCrCb2RGB);
@@ -815,8 +815,8 @@ GPU_TEST_P(CvtColor, YCrCb42RGB)
     channels[3] = cv::Mat(src.size(), depth, cv::Scalar::all(0));
     cv::merge(channels, 4, src);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_YCrCb2RGB);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_YCrCb2RGB);
 
     EXPECT_MAT_NEAR(dst_gold, dst, 1e-5);
 }
@@ -834,8 +834,8 @@ GPU_TEST_P(CvtColor, YCrCb42RGBA)
     channels[3] = cv::Mat(src.size(), depth, cv::Scalar::all(0));
     cv::merge(channels, 4, src);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_YCrCb2RGB, 4);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_YCrCb2RGB, 4);
 
     EXPECT_MAT_NEAR(dst_gold, dst, 1e-5);
 }
@@ -847,8 +847,8 @@ GPU_TEST_P(CvtColor, BGR2HSV)
 
     cv::Mat src = img;
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR2HSV);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR2HSV);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_BGR2HSV);
@@ -863,8 +863,8 @@ GPU_TEST_P(CvtColor, RGB2HSV)
 
     cv::Mat src = img;
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_RGB2HSV);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_RGB2HSV);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_RGB2HSV);
@@ -879,8 +879,8 @@ GPU_TEST_P(CvtColor, RGB2HSV4)
 
     cv::Mat src = img;
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_RGB2HSV, 4);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_RGB2HSV, 4);
 
     ASSERT_EQ(4, dst.channels());
 
@@ -904,8 +904,8 @@ GPU_TEST_P(CvtColor, RGBA2HSV4)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2RGBA);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_RGB2HSV, 4);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_RGB2HSV, 4);
 
     ASSERT_EQ(4, dst.channels());
 
@@ -928,8 +928,8 @@ GPU_TEST_P(CvtColor, BGR2HLS)
 
     cv::Mat src = img;
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR2HLS);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR2HLS);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_BGR2HLS);
@@ -944,8 +944,8 @@ GPU_TEST_P(CvtColor, RGB2HLS)
 
     cv::Mat src = img;
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_RGB2HLS);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_RGB2HLS);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_RGB2HLS);
@@ -960,8 +960,8 @@ GPU_TEST_P(CvtColor, RGB2HLS4)
 
     cv::Mat src = img;
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_RGB2HLS, 4);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_RGB2HLS, 4);
 
     ASSERT_EQ(4, dst.channels());
 
@@ -985,8 +985,8 @@ GPU_TEST_P(CvtColor, RGBA2HLS4)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2RGBA);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_RGB2HLS, 4);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_RGB2HLS, 4);
 
     ASSERT_EQ(4, dst.channels());
 
@@ -1010,8 +1010,8 @@ GPU_TEST_P(CvtColor, HSV2BGR)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2HSV);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_HSV2BGR);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_HSV2BGR);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_HSV2BGR);
@@ -1027,8 +1027,8 @@ GPU_TEST_P(CvtColor, HSV2RGB)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2HSV);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_HSV2RGB);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_HSV2RGB);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_HSV2RGB);
@@ -1052,8 +1052,8 @@ GPU_TEST_P(CvtColor, HSV42BGR)
     channels[3] = cv::Mat(src.size(), depth, cv::Scalar::all(0));
     cv::merge(channels, 4, src);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_HSV2BGR);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_HSV2BGR);
 
     EXPECT_MAT_NEAR(dst_gold, dst, depth == CV_32F ? 1e-2 : 1);
 }
@@ -1074,8 +1074,8 @@ GPU_TEST_P(CvtColor, HSV42BGRA)
     channels[3] = cv::Mat(src.size(), depth, cv::Scalar::all(0));
     cv::merge(channels, 4, src);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_HSV2BGR, 4);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_HSV2BGR, 4);
 
     EXPECT_MAT_NEAR(dst_gold, dst, depth == CV_32F ? 1e-2 : 1);
 }
@@ -1088,8 +1088,8 @@ GPU_TEST_P(CvtColor, HLS2BGR)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2HLS);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_HLS2BGR);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_HLS2BGR);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_HLS2BGR);
@@ -1105,8 +1105,8 @@ GPU_TEST_P(CvtColor, HLS2RGB)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2HLS);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_HLS2RGB);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_HLS2RGB);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_HLS2RGB);
@@ -1130,8 +1130,8 @@ GPU_TEST_P(CvtColor, HLS42RGB)
     channels[3] = cv::Mat(src.size(), depth, cv::Scalar::all(0));
     cv::merge(channels, 4, src);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_HLS2RGB);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_HLS2RGB);
 
     EXPECT_MAT_NEAR(dst_gold, dst, depth == CV_32F ? 1e-2 : 1);
 }
@@ -1153,8 +1153,8 @@ GPU_TEST_P(CvtColor, HLS42RGBA)
     cv::merge(channels, 4, src);
 
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_HLS2RGB, 4);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_HLS2RGB, 4);
 
     EXPECT_MAT_NEAR(dst_gold, dst, depth == CV_32F ? 1e-2 : 1);
 }
@@ -1166,8 +1166,8 @@ GPU_TEST_P(CvtColor, BGR2HSV_FULL)
 
     cv::Mat src = img;
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR2HSV_FULL);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR2HSV_FULL);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_BGR2HSV_FULL);
@@ -1182,8 +1182,8 @@ GPU_TEST_P(CvtColor, RGB2HSV_FULL)
 
     cv::Mat src = img;
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_RGB2HSV_FULL);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_RGB2HSV_FULL);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_RGB2HSV_FULL);
@@ -1198,8 +1198,8 @@ GPU_TEST_P(CvtColor, RGB2HSV4_FULL)
 
     cv::Mat src = img;
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_RGB2HSV_FULL, 4);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_RGB2HSV_FULL, 4);
 
     ASSERT_EQ(4, dst.channels());
 
@@ -1223,8 +1223,8 @@ GPU_TEST_P(CvtColor, RGBA2HSV4_FULL)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2RGBA);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_RGB2HSV_FULL, 4);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_RGB2HSV_FULL, 4);
 
     ASSERT_EQ(4, dst.channels());
 
@@ -1247,8 +1247,8 @@ GPU_TEST_P(CvtColor, BGR2HLS_FULL)
 
     cv::Mat src = img;
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR2HLS_FULL);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR2HLS_FULL);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_BGR2HLS_FULL);
@@ -1263,8 +1263,8 @@ GPU_TEST_P(CvtColor, RGB2HLS_FULL)
 
     cv::Mat src = img;
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_RGB2HLS_FULL);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_RGB2HLS_FULL);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_RGB2HLS_FULL);
@@ -1279,8 +1279,8 @@ GPU_TEST_P(CvtColor, RGB2HLS4_FULL)
 
     cv::Mat src = img;
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_RGB2HLS_FULL, 4);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_RGB2HLS_FULL, 4);
 
     ASSERT_EQ(4, dst.channels());
 
@@ -1304,8 +1304,8 @@ GPU_TEST_P(CvtColor, RGBA2HLS4_FULL)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2RGBA);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_RGB2HLS_FULL, 4);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_RGB2HLS_FULL, 4);
 
     ASSERT_EQ(4, dst.channels());
 
@@ -1329,8 +1329,8 @@ GPU_TEST_P(CvtColor, HSV2BGR_FULL)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2HSV_FULL);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_HSV2BGR_FULL);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_HSV2BGR_FULL);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_HSV2BGR_FULL);
@@ -1346,8 +1346,8 @@ GPU_TEST_P(CvtColor, HSV2RGB_FULL)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2HSV_FULL);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_HSV2RGB_FULL);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_HSV2RGB_FULL);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_HSV2RGB_FULL);
@@ -1371,8 +1371,8 @@ GPU_TEST_P(CvtColor, HSV42RGB_FULL)
     channels[3] = cv::Mat(src.size(), depth, cv::Scalar::all(0));
     cv::merge(channels, 4, src);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_HSV2RGB_FULL);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_HSV2RGB_FULL);
 
     EXPECT_MAT_NEAR(dst_gold, dst, depth == CV_32F ? 1e-2 : 1);
 }
@@ -1393,8 +1393,8 @@ GPU_TEST_P(CvtColor, HSV42RGBA_FULL)
     channels[3] = cv::Mat(src.size(), depth, cv::Scalar::all(0));
     cv::merge(channels, 4, src);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_HSV2RGB_FULL, 4);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_HSV2RGB_FULL, 4);
 
     EXPECT_MAT_NEAR(dst_gold, dst, depth == CV_32F ? 1e-2 : 1);
 }
@@ -1407,8 +1407,8 @@ GPU_TEST_P(CvtColor, HLS2BGR_FULL)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2HLS_FULL);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_HLS2BGR_FULL);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_HLS2BGR_FULL);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_HLS2BGR_FULL);
@@ -1424,8 +1424,8 @@ GPU_TEST_P(CvtColor, HLS2RGB_FULL)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2HLS_FULL);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_HLS2RGB_FULL);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_HLS2RGB_FULL);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_HLS2RGB_FULL);
@@ -1449,8 +1449,8 @@ GPU_TEST_P(CvtColor, HLS42RGB_FULL)
     channels[3] = cv::Mat(src.size(), depth, cv::Scalar::all(0));
     cv::merge(channels, 4, src);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_HLS2RGB_FULL);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_HLS2RGB_FULL);
 
     EXPECT_MAT_NEAR(dst_gold, dst, depth == CV_32F ? 1e-2 : 1);
 }
@@ -1471,8 +1471,8 @@ GPU_TEST_P(CvtColor, HLS42RGBA_FULL)
     channels[3] = cv::Mat(src.size(), depth, cv::Scalar::all(0));
     cv::merge(channels, 4, src);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_HLS2RGB_FULL, 4);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_HLS2RGB_FULL, 4);
 
     EXPECT_MAT_NEAR(dst_gold, dst, depth == CV_32F ? 1e-2 : 1);
 }
@@ -1481,8 +1481,8 @@ GPU_TEST_P(CvtColor, BGR2YUV)
 {
     cv::Mat src = img;
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR2YUV);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR2YUV);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_BGR2YUV);
@@ -1494,8 +1494,8 @@ GPU_TEST_P(CvtColor, RGB2YUV)
 {
     cv::Mat src = img;
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_RGB2YUV);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_RGB2YUV);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_RGB2YUV);
@@ -1508,8 +1508,8 @@ GPU_TEST_P(CvtColor, YUV2BGR)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2YUV);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_YUV2BGR);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_YUV2BGR);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_YUV2BGR);
@@ -1530,8 +1530,8 @@ GPU_TEST_P(CvtColor, YUV42BGR)
     channels[3] = cv::Mat(src.size(), depth, cv::Scalar::all(0));
     cv::merge(channels, 4, src);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_YUV2BGR);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_YUV2BGR);
 
     EXPECT_MAT_NEAR(dst_gold, dst, 1e-5);
 }
@@ -1549,8 +1549,8 @@ GPU_TEST_P(CvtColor, YUV42BGRA)
     channels[3] = cv::Mat(src.size(), depth, cv::Scalar::all(0));
     cv::merge(channels, 4, src);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_YUV2BGR, 4);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_YUV2BGR, 4);
 
     EXPECT_MAT_NEAR(dst_gold, dst, 1e-5);
 }
@@ -1560,8 +1560,8 @@ GPU_TEST_P(CvtColor, YUV2RGB)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_RGB2YUV);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_YUV2RGB);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_YUV2RGB);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_YUV2RGB);
@@ -1573,8 +1573,8 @@ GPU_TEST_P(CvtColor, BGR2YUV4)
 {
     cv::Mat src = img;
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR2YUV, 4);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR2YUV, 4);
 
     ASSERT_EQ(4, dst.channels());
 
@@ -1595,8 +1595,8 @@ GPU_TEST_P(CvtColor, RGBA2YUV4)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2RGBA);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_RGB2YUV, 4);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_RGB2YUV, 4);
 
     ASSERT_EQ(4, dst.channels());
 
@@ -1619,8 +1619,8 @@ GPU_TEST_P(CvtColor, BGR2Lab)
 
     cv::Mat src = img;
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR2Lab);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR2Lab);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_BGR2Lab);
@@ -1635,8 +1635,8 @@ GPU_TEST_P(CvtColor, RGB2Lab)
 
     cv::Mat src = img;
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_RGB2Lab);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_RGB2Lab);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_RGB2Lab);
@@ -1652,8 +1652,8 @@ GPU_TEST_P(CvtColor, BGRA2Lab4)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2RGBA);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR2Lab, 4);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR2Lab, 4);
 
     ASSERT_EQ(4, dst.channels());
 
@@ -1676,8 +1676,8 @@ GPU_TEST_P(CvtColor, LBGR2Lab)
 
     cv::Mat src = img;
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_LBGR2Lab);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_LBGR2Lab);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_LBGR2Lab);
@@ -1692,8 +1692,8 @@ GPU_TEST_P(CvtColor, LRGB2Lab)
 
     cv::Mat src = img;
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_LRGB2Lab);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_LRGB2Lab);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_LRGB2Lab);
@@ -1709,8 +1709,8 @@ GPU_TEST_P(CvtColor, LBGRA2Lab4)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2RGBA);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_LBGR2Lab, 4);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_LBGR2Lab, 4);
 
     ASSERT_EQ(4, dst.channels());
 
@@ -1734,8 +1734,8 @@ GPU_TEST_P(CvtColor, Lab2BGR)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2Lab);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_Lab2BGR);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_Lab2BGR);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_Lab2BGR);
@@ -1751,8 +1751,8 @@ GPU_TEST_P(CvtColor, Lab2RGB)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2Lab);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_Lab2RGB);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_Lab2RGB);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_Lab2RGB);
@@ -1768,8 +1768,8 @@ GPU_TEST_P(CvtColor, Lab2BGRA)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2Lab);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_Lab2BGR, 4);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_Lab2BGR, 4);
 
     ASSERT_EQ(4, dst.channels());
 
@@ -1787,8 +1787,8 @@ GPU_TEST_P(CvtColor, Lab2LBGR)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2Lab);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_Lab2LBGR);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_Lab2LBGR);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_Lab2LBGR);
@@ -1804,8 +1804,8 @@ GPU_TEST_P(CvtColor, Lab2LRGB)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2Lab);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_Lab2LRGB);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_Lab2LRGB);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_Lab2LRGB);
@@ -1821,8 +1821,8 @@ GPU_TEST_P(CvtColor, Lab2LRGBA)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2Lab);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_Lab2LRGB, 4);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_Lab2LRGB, 4);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_Lab2LRGB, 4);
@@ -1837,8 +1837,8 @@ GPU_TEST_P(CvtColor, BGR2Luv)
 
     cv::Mat src = img;
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR2Luv);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR2Luv);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_BGR2Luv);
@@ -1853,8 +1853,8 @@ GPU_TEST_P(CvtColor, RGB2Luv)
 
     cv::Mat src = img;
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_RGB2Luv);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_RGB2Luv);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_RGB2Luv);
@@ -1870,8 +1870,8 @@ GPU_TEST_P(CvtColor, BGRA2Luv4)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2RGBA);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR2Luv, 4);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BGR2Luv, 4);
 
     ASSERT_EQ(4, dst.channels());
 
@@ -1894,8 +1894,8 @@ GPU_TEST_P(CvtColor, LBGR2Luv)
 
     cv::Mat src = img;
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_LBGR2Luv);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_LBGR2Luv);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_LBGR2Luv);
@@ -1910,8 +1910,8 @@ GPU_TEST_P(CvtColor, LRGB2Luv)
 
     cv::Mat src = img;
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_LRGB2Luv);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_LRGB2Luv);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_LRGB2Luv);
@@ -1927,8 +1927,8 @@ GPU_TEST_P(CvtColor, LBGRA2Luv4)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2RGBA);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_LBGR2Luv, 4);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_LBGR2Luv, 4);
 
     ASSERT_EQ(4, dst.channels());
 
@@ -1952,8 +1952,8 @@ GPU_TEST_P(CvtColor, Luv2BGR)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2Luv);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_Luv2BGR);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_Luv2BGR);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_Luv2BGR);
@@ -1969,8 +1969,8 @@ GPU_TEST_P(CvtColor, Luv2RGB)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2Luv);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_Luv2RGB);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_Luv2RGB);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_Luv2RGB);
@@ -1986,8 +1986,8 @@ GPU_TEST_P(CvtColor, Luv2BGRA)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2Luv);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_Luv2BGR, 4);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_Luv2BGR, 4);
 
     ASSERT_EQ(4, dst.channels());
 
@@ -2005,8 +2005,8 @@ GPU_TEST_P(CvtColor, Luv2LBGR)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2Luv);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_Luv2LBGR);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_Luv2LBGR);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_Luv2LBGR);
@@ -2022,8 +2022,8 @@ GPU_TEST_P(CvtColor, Luv2LRGB)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2Luv);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_Luv2LRGB);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_Luv2LRGB);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_Luv2LRGB);
@@ -2039,8 +2039,8 @@ GPU_TEST_P(CvtColor, Luv2LRGBA)
     cv::Mat src;
     cv::cvtColor(img, src, cv::COLOR_BGR2Luv);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_Luv2LRGB, 4);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_Luv2LRGB, 4);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_Luv2LRGB, 4);
@@ -2057,8 +2057,8 @@ GPU_TEST_P(CvtColor, RGBA2mRGBA)
 
     cv::Mat src = randomMat(size, CV_MAKE_TYPE(depth, 4));
 
-    cv::gpu::GpuMat dst = createMat(src.size(), src.type(), useRoi);
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_RGBA2mRGBA);
+    cv::cuda::GpuMat dst = createMat(src.size(), src.type(), useRoi);
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_RGBA2mRGBA);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_RGBA2mRGBA);
@@ -2075,8 +2075,8 @@ GPU_TEST_P(CvtColor, BayerBG2BGR)
 
     cv::Mat src = randomMat(size, depth);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BayerBG2BGR);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BayerBG2BGR);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_BayerBG2BGR);
@@ -2091,8 +2091,8 @@ GPU_TEST_P(CvtColor, BayerBG2BGR4)
 
     cv::Mat src = randomMat(size, depth);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BayerBG2BGR, 4);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BayerBG2BGR, 4);
 
     ASSERT_EQ(4, dst.channels());
 
@@ -2114,8 +2114,8 @@ GPU_TEST_P(CvtColor, BayerGB2BGR)
 
     cv::Mat src = randomMat(size, depth);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BayerGB2BGR);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BayerGB2BGR);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_BayerGB2BGR);
@@ -2130,8 +2130,8 @@ GPU_TEST_P(CvtColor, BayerGB2BGR4)
 
     cv::Mat src = randomMat(size, depth);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BayerGB2BGR, 4);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BayerGB2BGR, 4);
 
     ASSERT_EQ(4, dst.channels());
 
@@ -2152,8 +2152,8 @@ GPU_TEST_P(CvtColor, BayerRG2BGR)
 
     cv::Mat src = randomMat(size, depth);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BayerRG2BGR);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BayerRG2BGR);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_BayerRG2BGR);
@@ -2168,8 +2168,8 @@ GPU_TEST_P(CvtColor, BayerRG2BGR4)
 
     cv::Mat src = randomMat(size, depth);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BayerRG2BGR, 4);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BayerRG2BGR, 4);
 
     ASSERT_EQ(4, dst.channels());
 
@@ -2190,8 +2190,8 @@ GPU_TEST_P(CvtColor, BayerGR2BGR)
 
     cv::Mat src = randomMat(size, depth);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BayerGR2BGR);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BayerGR2BGR);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_BayerGR2BGR);
@@ -2206,8 +2206,8 @@ GPU_TEST_P(CvtColor, BayerGR2BGR4)
 
     cv::Mat src = randomMat(size, depth);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BayerGR2BGR, 4);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BayerGR2BGR, 4);
 
     ASSERT_EQ(4, dst.channels());
 
@@ -2228,8 +2228,8 @@ GPU_TEST_P(CvtColor, BayerBG2Gray)
 
     cv::Mat src = randomMat(size, depth);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BayerBG2GRAY);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BayerBG2GRAY);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_BayerBG2GRAY);
@@ -2244,8 +2244,8 @@ GPU_TEST_P(CvtColor, BayerGB2Gray)
 
     cv::Mat src = randomMat(size, depth);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BayerGB2GRAY);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BayerGB2GRAY);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_BayerGB2GRAY);
@@ -2260,8 +2260,8 @@ GPU_TEST_P(CvtColor, BayerRG2Gray)
 
     cv::Mat src = randomMat(size, depth);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BayerRG2GRAY);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BayerRG2GRAY);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_BayerRG2GRAY);
@@ -2276,8 +2276,8 @@ GPU_TEST_P(CvtColor, BayerGR2Gray)
 
     cv::Mat src = randomMat(size, depth);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BayerGR2GRAY);
+    cv::cuda::GpuMat dst;
+    cv::cuda::cvtColor(loadMat(src, useRoi), dst, cv::COLOR_BayerGR2GRAY);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_BayerGR2GRAY);
@@ -2294,15 +2294,15 @@ INSTANTIATE_TEST_CASE_P(GPU_ImgProc, CvtColor, testing::Combine(
 ///////////////////////////////////////////////////////////////////////////////////////////////////////
 // Demosaicing
 
-struct Demosaicing : testing::TestWithParam<cv::gpu::DeviceInfo>
+struct Demosaicing : testing::TestWithParam<cv::cuda::DeviceInfo>
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
 
     virtual void SetUp()
     {
         devInfo = GetParam();
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 
     static void mosaic(const cv::Mat_<cv::Vec3b>& src, cv::Mat_<uchar>& dst, cv::Point firstRed)
@@ -2361,8 +2361,8 @@ GPU_TEST_P(Demosaicing, BayerBG2BGR)
     cv::Mat_<uchar> src;
     mosaic(img, src, cv::Point(1, 1));
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::demosaicing(loadMat(src), dst, cv::COLOR_BayerBG2BGR);
+    cv::cuda::GpuMat dst;
+    cv::cuda::demosaicing(loadMat(src), dst, cv::COLOR_BayerBG2BGR);
 
     EXPECT_MAT_SIMILAR(img, dst, 2e-2);
 }
@@ -2374,8 +2374,8 @@ GPU_TEST_P(Demosaicing, BayerGB2BGR)
     cv::Mat_<uchar> src;
     mosaic(img, src, cv::Point(0, 1));
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::demosaicing(loadMat(src), dst, cv::COLOR_BayerGB2BGR);
+    cv::cuda::GpuMat dst;
+    cv::cuda::demosaicing(loadMat(src), dst, cv::COLOR_BayerGB2BGR);
 
     EXPECT_MAT_SIMILAR(img, dst, 2e-2);
 }
@@ -2387,8 +2387,8 @@ GPU_TEST_P(Demosaicing, BayerRG2BGR)
     cv::Mat_<uchar> src;
     mosaic(img, src, cv::Point(0, 0));
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::demosaicing(loadMat(src), dst, cv::COLOR_BayerRG2BGR);
+    cv::cuda::GpuMat dst;
+    cv::cuda::demosaicing(loadMat(src), dst, cv::COLOR_BayerRG2BGR);
 
     EXPECT_MAT_SIMILAR(img, dst, 2e-2);
 }
@@ -2400,8 +2400,8 @@ GPU_TEST_P(Demosaicing, BayerGR2BGR)
     cv::Mat_<uchar> src;
     mosaic(img, src, cv::Point(1, 0));
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::demosaicing(loadMat(src), dst, cv::COLOR_BayerGR2BGR);
+    cv::cuda::GpuMat dst;
+    cv::cuda::demosaicing(loadMat(src), dst, cv::COLOR_BayerGR2BGR);
 
     EXPECT_MAT_SIMILAR(img, dst, 2e-2);
 }
@@ -2413,8 +2413,8 @@ GPU_TEST_P(Demosaicing, BayerBG2BGR_MHT)
     cv::Mat_<uchar> src;
     mosaic(img, src, cv::Point(1, 1));
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::demosaicing(loadMat(src), dst, cv::gpu::COLOR_BayerBG2BGR_MHT);
+    cv::cuda::GpuMat dst;
+    cv::cuda::demosaicing(loadMat(src), dst, cv::cuda::COLOR_BayerBG2BGR_MHT);
 
     EXPECT_MAT_SIMILAR(img, dst, 5e-3);
 }
@@ -2426,8 +2426,8 @@ GPU_TEST_P(Demosaicing, BayerGB2BGR_MHT)
     cv::Mat_<uchar> src;
     mosaic(img, src, cv::Point(0, 1));
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::demosaicing(loadMat(src), dst, cv::gpu::COLOR_BayerGB2BGR_MHT);
+    cv::cuda::GpuMat dst;
+    cv::cuda::demosaicing(loadMat(src), dst, cv::cuda::COLOR_BayerGB2BGR_MHT);
 
     EXPECT_MAT_SIMILAR(img, dst, 5e-3);
 }
@@ -2439,8 +2439,8 @@ GPU_TEST_P(Demosaicing, BayerRG2BGR_MHT)
     cv::Mat_<uchar> src;
     mosaic(img, src, cv::Point(0, 0));
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::demosaicing(loadMat(src), dst, cv::gpu::COLOR_BayerRG2BGR_MHT);
+    cv::cuda::GpuMat dst;
+    cv::cuda::demosaicing(loadMat(src), dst, cv::cuda::COLOR_BayerRG2BGR_MHT);
 
     EXPECT_MAT_SIMILAR(img, dst, 5e-3);
 }
@@ -2452,8 +2452,8 @@ GPU_TEST_P(Demosaicing, BayerGR2BGR_MHT)
     cv::Mat_<uchar> src;
     mosaic(img, src, cv::Point(1, 0));
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::demosaicing(loadMat(src), dst, cv::gpu::COLOR_BayerGR2BGR_MHT);
+    cv::cuda::GpuMat dst;
+    cv::cuda::demosaicing(loadMat(src), dst, cv::cuda::COLOR_BayerGR2BGR_MHT);
 
     EXPECT_MAT_SIMILAR(img, dst, 5e-3);
 }
@@ -2463,9 +2463,9 @@ INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Demosaicing, ALL_DEVICES);
 ///////////////////////////////////////////////////////////////////////////////////////////////////////
 // swapChannels
 
-PARAM_TEST_CASE(SwapChannels, cv::gpu::DeviceInfo, cv::Size, UseRoi)
+PARAM_TEST_CASE(SwapChannels, cv::cuda::DeviceInfo, cv::Size, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     bool useRoi;
 
@@ -2475,7 +2475,7 @@ PARAM_TEST_CASE(SwapChannels, cv::gpu::DeviceInfo, cv::Size, UseRoi)
         size = GET_PARAM(1);
         useRoi = GET_PARAM(2);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -2484,10 +2484,10 @@ GPU_TEST_P(SwapChannels, Accuracy)
     cv::Mat src = readImageType("stereobm/aloe-L.png", CV_8UC4);
     ASSERT_FALSE(src.empty());
 
-    cv::gpu::GpuMat d_src = loadMat(src, useRoi);
+    cv::cuda::GpuMat d_src = loadMat(src, useRoi);
 
     const int dstOrder[] = {2, 1, 0, 3};
-    cv::gpu::swapChannels(d_src, dstOrder);
+    cv::cuda::swapChannels(d_src, dstOrder);
 
     cv::Mat dst_gold;
     cv::cvtColor(src, dst_gold, cv::COLOR_BGRA2RGBA);
diff --git a/modules/gpuimgproc/test/test_corners.cpp b/modules/gpuimgproc/test/test_corners.cpp
index 2625480565..8403d19380 100644
--- a/modules/gpuimgproc/test/test_corners.cpp
+++ b/modules/gpuimgproc/test/test_corners.cpp
@@ -55,9 +55,9 @@ namespace
     IMPLEMENT_PARAM_CLASS(ApertureSize, int);
 }
 
-PARAM_TEST_CASE(CornerHarris, cv::gpu::DeviceInfo, MatType, BorderType, BlockSize, ApertureSize)
+PARAM_TEST_CASE(CornerHarris, cv::cuda::DeviceInfo, MatType, BorderType, BlockSize, ApertureSize)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     int type;
     int borderType;
     int blockSize;
@@ -71,7 +71,7 @@ PARAM_TEST_CASE(CornerHarris, cv::gpu::DeviceInfo, MatType, BorderType, BlockSiz
         blockSize = GET_PARAM(3);
         apertureSize = GET_PARAM(4);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -82,9 +82,9 @@ GPU_TEST_P(CornerHarris, Accuracy)
 
     double k = randomDouble(0.1, 0.9);
 
-    cv::Ptr<cv::gpu::CornernessCriteria> harris = cv::gpu::createHarrisCorner(src.type(), blockSize, apertureSize, k, borderType);
+    cv::Ptr<cv::cuda::CornernessCriteria> harris = cv::cuda::createHarrisCorner(src.type(), blockSize, apertureSize, k, borderType);
 
-    cv::gpu::GpuMat dst;
+    cv::cuda::GpuMat dst;
     harris->compute(loadMat(src), dst);
 
     cv::Mat dst_gold;
@@ -103,9 +103,9 @@ INSTANTIATE_TEST_CASE_P(GPU_ImgProc, CornerHarris, testing::Combine(
 ///////////////////////////////////////////////////////////////////////////////////////////////////////
 // cornerMinEigen
 
-PARAM_TEST_CASE(CornerMinEigen, cv::gpu::DeviceInfo, MatType, BorderType, BlockSize, ApertureSize)
+PARAM_TEST_CASE(CornerMinEigen, cv::cuda::DeviceInfo, MatType, BorderType, BlockSize, ApertureSize)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     int type;
     int borderType;
     int blockSize;
@@ -119,7 +119,7 @@ PARAM_TEST_CASE(CornerMinEigen, cv::gpu::DeviceInfo, MatType, BorderType, BlockS
         blockSize = GET_PARAM(3);
         apertureSize = GET_PARAM(4);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -128,9 +128,9 @@ GPU_TEST_P(CornerMinEigen, Accuracy)
     cv::Mat src = readImageType("stereobm/aloe-L.png", type);
     ASSERT_FALSE(src.empty());
 
-    cv::Ptr<cv::gpu::CornernessCriteria> minEigenVal = cv::gpu::createMinEigenValCorner(src.type(), blockSize, apertureSize, borderType);
+    cv::Ptr<cv::cuda::CornernessCriteria> minEigenVal = cv::cuda::createMinEigenValCorner(src.type(), blockSize, apertureSize, borderType);
 
-    cv::gpu::GpuMat dst;
+    cv::cuda::GpuMat dst;
     minEigenVal->compute(loadMat(src), dst);
 
     cv::Mat dst_gold;
diff --git a/modules/gpuimgproc/test/test_gftt.cpp b/modules/gpuimgproc/test/test_gftt.cpp
index 6ba6e0cfff..5daaed38df 100644
--- a/modules/gpuimgproc/test/test_gftt.cpp
+++ b/modules/gpuimgproc/test/test_gftt.cpp
@@ -54,9 +54,9 @@ namespace
     IMPLEMENT_PARAM_CLASS(MinDistance, double)
 }
 
-PARAM_TEST_CASE(GoodFeaturesToTrack, cv::gpu::DeviceInfo, MinDistance)
+PARAM_TEST_CASE(GoodFeaturesToTrack, cv::cuda::DeviceInfo, MinDistance)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     double minDistance;
 
     virtual void SetUp()
@@ -64,7 +64,7 @@ PARAM_TEST_CASE(GoodFeaturesToTrack, cv::gpu::DeviceInfo, MinDistance)
         devInfo = GET_PARAM(0);
         minDistance = GET_PARAM(1);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -76,9 +76,9 @@ GPU_TEST_P(GoodFeaturesToTrack, Accuracy)
     int maxCorners = 1000;
     double qualityLevel = 0.01;
 
-    cv::Ptr<cv::gpu::CornersDetector> detector = cv::gpu::createGoodFeaturesToTrackDetector(image.type(), maxCorners, qualityLevel, minDistance);
+    cv::Ptr<cv::cuda::CornersDetector> detector = cv::cuda::createGoodFeaturesToTrackDetector(image.type(), maxCorners, qualityLevel, minDistance);
 
-    cv::gpu::GpuMat d_pts;
+    cv::cuda::GpuMat d_pts;
     detector->detect(loadMat(image), d_pts);
 
     ASSERT_FALSE(d_pts.empty());
@@ -114,10 +114,10 @@ GPU_TEST_P(GoodFeaturesToTrack, EmptyCorners)
     int maxCorners = 1000;
     double qualityLevel = 0.01;
 
-    cv::gpu::GpuMat src(100, 100, CV_8UC1, cv::Scalar::all(0));
-    cv::gpu::GpuMat corners(1, maxCorners, CV_32FC2);
+    cv::cuda::GpuMat src(100, 100, CV_8UC1, cv::Scalar::all(0));
+    cv::cuda::GpuMat corners(1, maxCorners, CV_32FC2);
 
-    cv::Ptr<cv::gpu::CornersDetector> detector = cv::gpu::createGoodFeaturesToTrackDetector(src.type(), maxCorners, qualityLevel, minDistance);
+    cv::Ptr<cv::cuda::CornersDetector> detector = cv::cuda::createGoodFeaturesToTrackDetector(src.type(), maxCorners, qualityLevel, minDistance);
 
     detector->detect(src, corners);
 
diff --git a/modules/gpuimgproc/test/test_histogram.cpp b/modules/gpuimgproc/test/test_histogram.cpp
index 556211729b..04bba71afc 100644
--- a/modules/gpuimgproc/test/test_histogram.cpp
+++ b/modules/gpuimgproc/test/test_histogram.cpp
@@ -49,9 +49,9 @@ using namespace cvtest;
 ///////////////////////////////////////////////////////////////////////////////////////////////////////
 // HistEven
 
-PARAM_TEST_CASE(HistEven, cv::gpu::DeviceInfo, cv::Size)
+PARAM_TEST_CASE(HistEven, cv::cuda::DeviceInfo, cv::Size)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
 
     cv::Size size;
 
@@ -60,7 +60,7 @@ PARAM_TEST_CASE(HistEven, cv::gpu::DeviceInfo, cv::Size)
         devInfo = GET_PARAM(0);
         size = GET_PARAM(1);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -71,8 +71,8 @@ GPU_TEST_P(HistEven, Accuracy)
     int hbins = 30;
     float hranges[] = {50.0f, 200.0f};
 
-    cv::gpu::GpuMat hist;
-    cv::gpu::histEven(loadMat(src), hist, hbins, (int) hranges[0], (int) hranges[1]);
+    cv::cuda::GpuMat hist;
+    cv::cuda::histEven(loadMat(src), hist, hbins, (int) hranges[0], (int) hranges[1]);
 
     cv::Mat hist_gold;
 
@@ -94,9 +94,9 @@ INSTANTIATE_TEST_CASE_P(GPU_ImgProc, HistEven, testing::Combine(
 ///////////////////////////////////////////////////////////////////////////////////////////////////////
 // CalcHist
 
-PARAM_TEST_CASE(CalcHist, cv::gpu::DeviceInfo, cv::Size)
+PARAM_TEST_CASE(CalcHist, cv::cuda::DeviceInfo, cv::Size)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
 
     cv::Size size;
 
@@ -105,7 +105,7 @@ PARAM_TEST_CASE(CalcHist, cv::gpu::DeviceInfo, cv::Size)
         devInfo = GET_PARAM(0);
         size = GET_PARAM(1);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -113,8 +113,8 @@ GPU_TEST_P(CalcHist, Accuracy)
 {
     cv::Mat src = randomMat(size, CV_8UC1);
 
-    cv::gpu::GpuMat hist;
-    cv::gpu::calcHist(loadMat(src), hist);
+    cv::cuda::GpuMat hist;
+    cv::cuda::calcHist(loadMat(src), hist);
 
     cv::Mat hist_gold;
 
@@ -138,9 +138,9 @@ INSTANTIATE_TEST_CASE_P(GPU_ImgProc, CalcHist, testing::Combine(
 ///////////////////////////////////////////////////////////////////////////////////////////////////////
 // EqualizeHist
 
-PARAM_TEST_CASE(EqualizeHist, cv::gpu::DeviceInfo, cv::Size)
+PARAM_TEST_CASE(EqualizeHist, cv::cuda::DeviceInfo, cv::Size)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
 
     virtual void SetUp()
@@ -148,7 +148,7 @@ PARAM_TEST_CASE(EqualizeHist, cv::gpu::DeviceInfo, cv::Size)
         devInfo = GET_PARAM(0);
         size = GET_PARAM(1);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -156,8 +156,8 @@ GPU_TEST_P(EqualizeHist, Accuracy)
 {
     cv::Mat src = randomMat(size, CV_8UC1);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::equalizeHist(loadMat(src), dst);
+    cv::cuda::GpuMat dst;
+    cv::cuda::equalizeHist(loadMat(src), dst);
 
     cv::Mat dst_gold;
     cv::equalizeHist(src, dst_gold);
@@ -177,9 +177,9 @@ namespace
     IMPLEMENT_PARAM_CLASS(ClipLimit, double)
 }
 
-PARAM_TEST_CASE(CLAHE, cv::gpu::DeviceInfo, cv::Size, ClipLimit)
+PARAM_TEST_CASE(CLAHE, cv::cuda::DeviceInfo, cv::Size, ClipLimit)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     double clipLimit;
 
@@ -189,7 +189,7 @@ PARAM_TEST_CASE(CLAHE, cv::gpu::DeviceInfo, cv::Size, ClipLimit)
         size = GET_PARAM(1);
         clipLimit = GET_PARAM(2);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -197,8 +197,8 @@ GPU_TEST_P(CLAHE, Accuracy)
 {
     cv::Mat src = randomMat(size, CV_8UC1);
 
-    cv::Ptr<cv::gpu::CLAHE> clahe = cv::gpu::createCLAHE(clipLimit);
-    cv::gpu::GpuMat dst;
+    cv::Ptr<cv::cuda::CLAHE> clahe = cv::cuda::createCLAHE(clipLimit);
+    cv::cuda::GpuMat dst;
     clahe->apply(loadMat(src), dst);
 
     cv::Ptr<cv::CLAHE> clahe_gold = cv::createCLAHE(clipLimit);
diff --git a/modules/gpuimgproc/test/test_hough.cpp b/modules/gpuimgproc/test/test_hough.cpp
index 969899d8b6..67bb39d6c8 100644
--- a/modules/gpuimgproc/test/test_hough.cpp
+++ b/modules/gpuimgproc/test/test_hough.cpp
@@ -49,7 +49,7 @@ using namespace cvtest;
 ///////////////////////////////////////////////////////////////////////////////////////////////////////
 // HoughLines
 
-PARAM_TEST_CASE(HoughLines, cv::gpu::DeviceInfo, cv::Size, UseRoi)
+PARAM_TEST_CASE(HoughLines, cv::cuda::DeviceInfo, cv::Size, UseRoi)
 {
     static void generateLines(cv::Mat& img)
     {
@@ -82,8 +82,8 @@ PARAM_TEST_CASE(HoughLines, cv::gpu::DeviceInfo, cv::Size, UseRoi)
 
 GPU_TEST_P(HoughLines, Accuracy)
 {
-    const cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::gpu::setDevice(devInfo.deviceID());
+    const cv::cuda::DeviceInfo devInfo = GET_PARAM(0);
+    cv::cuda::setDevice(devInfo.deviceID());
     const cv::Size size = GET_PARAM(1);
     const bool useRoi = GET_PARAM(2);
 
@@ -94,9 +94,9 @@ GPU_TEST_P(HoughLines, Accuracy)
     cv::Mat src(size, CV_8UC1);
     generateLines(src);
 
-    cv::Ptr<cv::gpu::HoughLinesDetector> hough = cv::gpu::createHoughLinesDetector(rho, theta, threshold);
+    cv::Ptr<cv::cuda::HoughLinesDetector> hough = cv::cuda::createHoughLinesDetector(rho, theta, threshold);
 
-    cv::gpu::GpuMat d_lines;
+    cv::cuda::GpuMat d_lines;
     hough->detect(loadMat(src, useRoi), d_lines);
 
     std::vector<cv::Vec2f> lines;
@@ -116,7 +116,7 @@ INSTANTIATE_TEST_CASE_P(GPU_ImgProc, HoughLines, testing::Combine(
 ///////////////////////////////////////////////////////////////////////////////////////////////////////
 // HoughCircles
 
-PARAM_TEST_CASE(HoughCircles, cv::gpu::DeviceInfo, cv::Size, UseRoi)
+PARAM_TEST_CASE(HoughCircles, cv::cuda::DeviceInfo, cv::Size, UseRoi)
 {
     static void drawCircles(cv::Mat& dst, const std::vector<cv::Vec3f>& circles, bool fill)
     {
@@ -129,8 +129,8 @@ PARAM_TEST_CASE(HoughCircles, cv::gpu::DeviceInfo, cv::Size, UseRoi)
 
 GPU_TEST_P(HoughCircles, Accuracy)
 {
-    const cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::gpu::setDevice(devInfo.deviceID());
+    const cv::cuda::DeviceInfo devInfo = GET_PARAM(0);
+    cv::cuda::setDevice(devInfo.deviceID());
     const cv::Size size = GET_PARAM(1);
     const bool useRoi = GET_PARAM(2);
 
@@ -150,9 +150,9 @@ GPU_TEST_P(HoughCircles, Accuracy)
     cv::Mat src(size, CV_8UC1);
     drawCircles(src, circles_gold, true);
 
-    cv::Ptr<cv::gpu::HoughCirclesDetector> houghCircles = cv::gpu::createHoughCirclesDetector(dp, minDist, cannyThreshold, votesThreshold, minRadius, maxRadius);
+    cv::Ptr<cv::cuda::HoughCirclesDetector> houghCircles = cv::cuda::createHoughCirclesDetector(dp, minDist, cannyThreshold, votesThreshold, minRadius, maxRadius);
 
-    cv::gpu::GpuMat d_circles;
+    cv::cuda::GpuMat d_circles;
     houghCircles->detect(loadMat(src, useRoi), d_circles);
 
     std::vector<cv::Vec3f> circles;
@@ -189,14 +189,14 @@ INSTANTIATE_TEST_CASE_P(GPU_ImgProc, HoughCircles, testing::Combine(
 ///////////////////////////////////////////////////////////////////////////////////////////////////////
 // GeneralizedHough
 
-PARAM_TEST_CASE(GeneralizedHough, cv::gpu::DeviceInfo, UseRoi)
+PARAM_TEST_CASE(GeneralizedHough, cv::cuda::DeviceInfo, UseRoi)
 {
 };
 
 GPU_TEST_P(GeneralizedHough, Ballard)
 {
-    const cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
-    cv::gpu::setDevice(devInfo.deviceID());
+    const cv::cuda::DeviceInfo devInfo = GET_PARAM(0);
+    cv::cuda::setDevice(devInfo.deviceID());
     const bool useRoi = GET_PARAM(1);
 
     cv::Mat templ = readImage("../cv/shared/templ.png", cv::IMREAD_GRAYSCALE);
@@ -218,12 +218,12 @@ GPU_TEST_P(GeneralizedHough, Ballard)
         templ.copyTo(imageROI);
     }
 
-    cv::Ptr<cv::GeneralizedHoughBallard> alg = cv::gpu::createGeneralizedHoughBallard();
+    cv::Ptr<cv::GeneralizedHoughBallard> alg = cv::cuda::createGeneralizedHoughBallard();
     alg->setVotesThreshold(200);
 
     alg->setTemplate(loadMat(templ, useRoi));
 
-    cv::gpu::GpuMat d_pos;
+    cv::cuda::GpuMat d_pos;
     alg->detect(loadMat(image, useRoi), d_pos);
 
     std::vector<cv::Vec4f> pos;
diff --git a/modules/gpuimgproc/test/test_match_template.cpp b/modules/gpuimgproc/test/test_match_template.cpp
index b6fd161408..718c5503d2 100644
--- a/modules/gpuimgproc/test/test_match_template.cpp
+++ b/modules/gpuimgproc/test/test_match_template.cpp
@@ -57,9 +57,9 @@ namespace
     IMPLEMENT_PARAM_CLASS(TemplateSize, cv::Size);
 }
 
-PARAM_TEST_CASE(MatchTemplate8U, cv::gpu::DeviceInfo, cv::Size, TemplateSize, Channels, TemplateMethod)
+PARAM_TEST_CASE(MatchTemplate8U, cv::cuda::DeviceInfo, cv::Size, TemplateSize, Channels, TemplateMethod)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     cv::Size templ_size;
     int cn;
@@ -73,7 +73,7 @@ PARAM_TEST_CASE(MatchTemplate8U, cv::gpu::DeviceInfo, cv::Size, TemplateSize, Ch
         cn = GET_PARAM(3);
         method = GET_PARAM(4);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -82,9 +82,9 @@ GPU_TEST_P(MatchTemplate8U, Accuracy)
     cv::Mat image = randomMat(size, CV_MAKETYPE(CV_8U, cn));
     cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_8U, cn));
 
-    cv::Ptr<cv::gpu::TemplateMatching> alg = cv::gpu::createTemplateMatching(image.type(), method);
+    cv::Ptr<cv::cuda::TemplateMatching> alg = cv::cuda::createTemplateMatching(image.type(), method);
 
-    cv::gpu::GpuMat dst;
+    cv::cuda::GpuMat dst;
     alg->match(loadMat(image), loadMat(templ), dst);
 
     cv::Mat dst_gold;
@@ -103,9 +103,9 @@ INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate8U, testing::Combine(
 ////////////////////////////////////////////////////////////////////////////////
 // MatchTemplate32F
 
-PARAM_TEST_CASE(MatchTemplate32F, cv::gpu::DeviceInfo, cv::Size, TemplateSize, Channels, TemplateMethod)
+PARAM_TEST_CASE(MatchTemplate32F, cv::cuda::DeviceInfo, cv::Size, TemplateSize, Channels, TemplateMethod)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     cv::Size templ_size;
     int cn;
@@ -121,7 +121,7 @@ PARAM_TEST_CASE(MatchTemplate32F, cv::gpu::DeviceInfo, cv::Size, TemplateSize, C
         cn = GET_PARAM(3);
         method = GET_PARAM(4);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -130,9 +130,9 @@ GPU_TEST_P(MatchTemplate32F, Regression)
     cv::Mat image = randomMat(size, CV_MAKETYPE(CV_32F, cn));
     cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_32F, cn));
 
-    cv::Ptr<cv::gpu::TemplateMatching> alg = cv::gpu::createTemplateMatching(image.type(), method);
+    cv::Ptr<cv::cuda::TemplateMatching> alg = cv::cuda::createTemplateMatching(image.type(), method);
 
-    cv::gpu::GpuMat dst;
+    cv::cuda::GpuMat dst;
     alg->match(loadMat(image), loadMat(templ), dst);
 
     cv::Mat dst_gold;
@@ -151,9 +151,9 @@ INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate32F, testing::Combine(
 ////////////////////////////////////////////////////////////////////////////////
 // MatchTemplateBlackSource
 
-PARAM_TEST_CASE(MatchTemplateBlackSource, cv::gpu::DeviceInfo, TemplateMethod)
+PARAM_TEST_CASE(MatchTemplateBlackSource, cv::cuda::DeviceInfo, TemplateMethod)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     int method;
 
     virtual void SetUp()
@@ -161,7 +161,7 @@ PARAM_TEST_CASE(MatchTemplateBlackSource, cv::gpu::DeviceInfo, TemplateMethod)
         devInfo = GET_PARAM(0);
         method = GET_PARAM(1);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -173,9 +173,9 @@ GPU_TEST_P(MatchTemplateBlackSource, Accuracy)
     cv::Mat pattern = readImage("matchtemplate/cat.png");
     ASSERT_FALSE(pattern.empty());
 
-    cv::Ptr<cv::gpu::TemplateMatching> alg = cv::gpu::createTemplateMatching(image.type(), method);
+    cv::Ptr<cv::cuda::TemplateMatching> alg = cv::cuda::createTemplateMatching(image.type(), method);
 
-    cv::gpu::GpuMat d_dst;
+    cv::cuda::GpuMat d_dst;
     alg->match(loadMat(image), loadMat(pattern), d_dst);
 
     cv::Mat dst(d_dst);
@@ -196,9 +196,9 @@ INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplateBlackSource, testing::Combine(
 ////////////////////////////////////////////////////////////////////////////////
 // MatchTemplate_CCOEF_NORMED
 
-PARAM_TEST_CASE(MatchTemplate_CCOEF_NORMED, cv::gpu::DeviceInfo, std::pair<std::string, std::string>)
+PARAM_TEST_CASE(MatchTemplate_CCOEF_NORMED, cv::cuda::DeviceInfo, std::pair<std::string, std::string>)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     std::string imageName;
     std::string patternName;
 
@@ -208,7 +208,7 @@ PARAM_TEST_CASE(MatchTemplate_CCOEF_NORMED, cv::gpu::DeviceInfo, std::pair<std::
         imageName = GET_PARAM(1).first;
         patternName = GET_PARAM(1).second;
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -220,9 +220,9 @@ GPU_TEST_P(MatchTemplate_CCOEF_NORMED, Accuracy)
     cv::Mat pattern = readImage(patternName);
     ASSERT_FALSE(pattern.empty());
 
-    cv::Ptr<cv::gpu::TemplateMatching> alg = cv::gpu::createTemplateMatching(image.type(), cv::TM_CCOEFF_NORMED);
+    cv::Ptr<cv::cuda::TemplateMatching> alg = cv::cuda::createTemplateMatching(image.type(), cv::TM_CCOEFF_NORMED);
 
-    cv::gpu::GpuMat d_dst;
+    cv::cuda::GpuMat d_dst;
     alg->match(loadMat(image), loadMat(pattern), d_dst);
 
     cv::Mat dst(d_dst);
@@ -251,15 +251,15 @@ INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate_CCOEF_NORMED, testing::Combin
 ////////////////////////////////////////////////////////////////////////////////
 // MatchTemplate_CanFindBigTemplate
 
-struct MatchTemplate_CanFindBigTemplate : testing::TestWithParam<cv::gpu::DeviceInfo>
+struct MatchTemplate_CanFindBigTemplate : testing::TestWithParam<cv::cuda::DeviceInfo>
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
 
     virtual void SetUp()
     {
         devInfo = GetParam();
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -271,9 +271,9 @@ GPU_TEST_P(MatchTemplate_CanFindBigTemplate, SQDIFF_NORMED)
     cv::Mat templ = readImage("matchtemplate/template.png");
     ASSERT_FALSE(templ.empty());
 
-    cv::Ptr<cv::gpu::TemplateMatching> alg = cv::gpu::createTemplateMatching(scene.type(), cv::TM_SQDIFF_NORMED);
+    cv::Ptr<cv::cuda::TemplateMatching> alg = cv::cuda::createTemplateMatching(scene.type(), cv::TM_SQDIFF_NORMED);
 
-    cv::gpu::GpuMat d_result;
+    cv::cuda::GpuMat d_result;
     alg->match(loadMat(scene), loadMat(templ), d_result);
 
     cv::Mat result(d_result);
@@ -296,9 +296,9 @@ GPU_TEST_P(MatchTemplate_CanFindBigTemplate, SQDIFF)
     cv::Mat templ = readImage("matchtemplate/template.png");
     ASSERT_FALSE(templ.empty());
 
-    cv::Ptr<cv::gpu::TemplateMatching> alg = cv::gpu::createTemplateMatching(scene.type(), cv::TM_SQDIFF);
+    cv::Ptr<cv::cuda::TemplateMatching> alg = cv::cuda::createTemplateMatching(scene.type(), cv::TM_SQDIFF);
 
-    cv::gpu::GpuMat d_result;
+    cv::cuda::GpuMat d_result;
     alg->match(loadMat(scene), loadMat(templ), d_result);
 
     cv::Mat result(d_result);
diff --git a/modules/gpuimgproc/test/test_mean_shift.cpp b/modules/gpuimgproc/test/test_mean_shift.cpp
index e9101802b4..0ab12a63fe 100644
--- a/modules/gpuimgproc/test/test_mean_shift.cpp
+++ b/modules/gpuimgproc/test/test_mean_shift.cpp
@@ -49,9 +49,9 @@ using namespace cvtest;
 ////////////////////////////////////////////////////////////////////////////////
 // MeanShift
 
-struct MeanShift : testing::TestWithParam<cv::gpu::DeviceInfo>
+struct MeanShift : testing::TestWithParam<cv::cuda::DeviceInfo>
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
 
     cv::Mat img;
 
@@ -62,7 +62,7 @@ struct MeanShift : testing::TestWithParam<cv::gpu::DeviceInfo>
     {
         devInfo = GetParam();
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
 
         img = readImageType("meanshift/cones.png", CV_8UC4);
         ASSERT_FALSE(img.empty());
@@ -75,14 +75,14 @@ struct MeanShift : testing::TestWithParam<cv::gpu::DeviceInfo>
 GPU_TEST_P(MeanShift, Filtering)
 {
     cv::Mat img_template;
-    if (supportFeature(devInfo, cv::gpu::FEATURE_SET_COMPUTE_20))
+    if (supportFeature(devInfo, cv::cuda::FEATURE_SET_COMPUTE_20))
         img_template = readImage("meanshift/con_result.png");
     else
         img_template = readImage("meanshift/con_result_CC1X.png");
     ASSERT_FALSE(img_template.empty());
 
-    cv::gpu::GpuMat d_dst;
-    cv::gpu::meanShiftFiltering(loadMat(img), d_dst, spatialRad, colorRad);
+    cv::cuda::GpuMat d_dst;
+    cv::cuda::meanShiftFiltering(loadMat(img), d_dst, spatialRad, colorRad);
 
     ASSERT_EQ(CV_8UC4, d_dst.type());
 
@@ -97,7 +97,7 @@ GPU_TEST_P(MeanShift, Filtering)
 GPU_TEST_P(MeanShift, Proc)
 {
     cv::FileStorage fs;
-    if (supportFeature(devInfo, cv::gpu::FEATURE_SET_COMPUTE_20))
+    if (supportFeature(devInfo, cv::cuda::FEATURE_SET_COMPUTE_20))
         fs.open(std::string(cvtest::TS::ptr()->get_data_path()) + "meanshift/spmap.yaml", cv::FileStorage::READ);
     else
         fs.open(std::string(cvtest::TS::ptr()->get_data_path()) + "meanshift/spmap_CC1X.yaml", cv::FileStorage::READ);
@@ -107,12 +107,12 @@ GPU_TEST_P(MeanShift, Proc)
     fs["spmap"] >> spmap_template;
     ASSERT_FALSE(spmap_template.empty());
 
-    cv::gpu::GpuMat rmap_filtered;
-    cv::gpu::meanShiftFiltering(loadMat(img), rmap_filtered, spatialRad, colorRad);
+    cv::cuda::GpuMat rmap_filtered;
+    cv::cuda::meanShiftFiltering(loadMat(img), rmap_filtered, spatialRad, colorRad);
 
-    cv::gpu::GpuMat rmap;
-    cv::gpu::GpuMat spmap;
-    cv::gpu::meanShiftProc(loadMat(img), rmap, spmap, spatialRad, colorRad);
+    cv::cuda::GpuMat rmap;
+    cv::cuda::GpuMat spmap;
+    cv::cuda::meanShiftProc(loadMat(img), rmap, spmap, spatialRad, colorRad);
 
     ASSERT_EQ(CV_8UC4, rmap.type());
 
@@ -130,9 +130,9 @@ namespace
     IMPLEMENT_PARAM_CLASS(MinSize, int);
 }
 
-PARAM_TEST_CASE(MeanShiftSegmentation, cv::gpu::DeviceInfo, MinSize)
+PARAM_TEST_CASE(MeanShiftSegmentation, cv::cuda::DeviceInfo, MinSize)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     int minsize;
 
     virtual void SetUp()
@@ -140,7 +140,7 @@ PARAM_TEST_CASE(MeanShiftSegmentation, cv::gpu::DeviceInfo, MinSize)
         devInfo = GET_PARAM(0);
         minsize = GET_PARAM(1);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -151,7 +151,7 @@ GPU_TEST_P(MeanShiftSegmentation, Regression)
 
     std::ostringstream path;
     path << "meanshift/cones_segmented_sp10_sr10_minsize" << minsize;
-    if (supportFeature(devInfo, cv::gpu::FEATURE_SET_COMPUTE_20))
+    if (supportFeature(devInfo, cv::cuda::FEATURE_SET_COMPUTE_20))
         path << ".png";
     else
         path << "_CC1X.png";
@@ -159,7 +159,7 @@ GPU_TEST_P(MeanShiftSegmentation, Regression)
     ASSERT_FALSE(dst_gold.empty());
 
     cv::Mat dst;
-    cv::gpu::meanShiftSegmentation(loadMat(img), dst, 10, 10, minsize);
+    cv::cuda::meanShiftSegmentation(loadMat(img), dst, 10, 10, minsize);
 
     cv::Mat dst_rgb;
     cv::cvtColor(dst, dst_rgb, cv::COLOR_BGRA2BGR);
diff --git a/modules/gpulegacy/include/opencv2/gpulegacy/NCVPyramid.hpp b/modules/gpulegacy/include/opencv2/gpulegacy/NCVPyramid.hpp
index 88e2296bea..8fb206d895 100644
--- a/modules/gpulegacy/include/opencv2/gpulegacy/NCVPyramid.hpp
+++ b/modules/gpulegacy/include/opencv2/gpulegacy/NCVPyramid.hpp
@@ -48,7 +48,7 @@
 #include "opencv2/gpulegacy/NCV.hpp"
 #include "opencv2/core/cuda/common.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace pyramid
     {
diff --git a/modules/gpulegacy/include/opencv2/gpulegacy/private.hpp b/modules/gpulegacy/include/opencv2/gpulegacy/private.hpp
index 26c76d4dde..5a7d5331df 100644
--- a/modules/gpulegacy/include/opencv2/gpulegacy/private.hpp
+++ b/modules/gpulegacy/include/opencv2/gpulegacy/private.hpp
@@ -56,7 +56,7 @@
 
 #include "opencv2/gpulegacy.hpp"
 
-namespace cv { namespace gpu
+namespace cv { namespace cuda
 {
     class NppStStreamHandler
     {
@@ -88,9 +88,9 @@ namespace cv { namespace gpu
 }}
 
 #if defined(__GNUC__)
-    #define ncvSafeCall(expr)  cv::gpu::checkNcvError(expr, __FILE__, __LINE__, __func__)
+    #define ncvSafeCall(expr)  cv::cuda::checkNcvError(expr, __FILE__, __LINE__, __func__)
 #else /* defined(__CUDACC__) || defined(__MSVC__) */
-    #define ncvSafeCall(expr)  cv::gpu::checkNcvError(expr, __FILE__, __LINE__, "")
+    #define ncvSafeCall(expr)  cv::cuda::checkNcvError(expr, __FILE__, __LINE__, "")
 #endif
 
 #endif // __OPENCV_CORE_GPULEGACY_PRIVATE_HPP__
diff --git a/modules/gpulegacy/src/NCV.cpp b/modules/gpulegacy/src/NCV.cpp
index be82423d08..1596be4975 100644
--- a/modules/gpulegacy/src/NCV.cpp
+++ b/modules/gpulegacy/src/NCV.cpp
@@ -118,7 +118,7 @@ namespace
     const size_t ncv_error_num = sizeof(ncv_errors) / sizeof(ncv_errors[0]);
 }
 
-cv::String cv::gpu::getNcvErrorMessage(int code)
+cv::String cv::cuda::getNcvErrorMessage(int code)
 {
     size_t idx = std::find_if(ncv_errors, ncv_errors + ncv_error_num, ErrorEntryComparer(code)) - ncv_errors;
 
diff --git a/modules/gpulegacy/src/cuda/NCVBroxOpticalFlow.cu b/modules/gpulegacy/src/cuda/NCVBroxOpticalFlow.cu
index d9848ad4b9..1aebd6d5a4 100644
--- a/modules/gpulegacy/src/cuda/NCVBroxOpticalFlow.cu
+++ b/modules/gpulegacy/src/cuda/NCVBroxOpticalFlow.cu
@@ -1140,8 +1140,8 @@ NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc,
                 ScaleVector(ptrVNew->ptr(), ptrVNew->ptr(), 1.0f/scale_factor, ns * nh, stream);
                 ncvAssertCUDALastErrorReturn((int)NCV_CUDA_ERROR);
 
-                cv::gpu::cudev::swap<FloatVector*>(ptrU, ptrUNew);
-                cv::gpu::cudev::swap<FloatVector*>(ptrV, ptrVNew);
+                cv::cuda::cudev::swap<FloatVector*>(ptrU, ptrUNew);
+                cv::cuda::cudev::swap<FloatVector*>(ptrV, ptrVNew);
             }
             scale /= scale_factor;
         }
diff --git a/modules/gpulegacy/src/cuda/NCVHaarObjectDetection.cu b/modules/gpulegacy/src/cuda/NCVHaarObjectDetection.cu
index c8aaaeaced..6088f80d3b 100644
--- a/modules/gpulegacy/src/cuda/NCVHaarObjectDetection.cu
+++ b/modules/gpulegacy/src/cuda/NCVHaarObjectDetection.cu
@@ -92,13 +92,13 @@ NCV_CT_ASSERT(K_WARP_SIZE == 32); //this is required for the manual unroll of th
 __device__ Ncv32u warpScanInclusive(Ncv32u idata, volatile Ncv32u *s_Data)
 {
 #if __CUDA_ARCH__ >= 300
-    const unsigned int laneId = cv::gpu::cudev::Warp::laneId();
+    const unsigned int laneId = cv::cuda::cudev::Warp::laneId();
 
     // scan on shuffl functions
     #pragma unroll
     for (int i = 1; i <= (K_WARP_SIZE / 2); i *= 2)
     {
-        const Ncv32u n = cv::gpu::cudev::shfl_up(idata, i);
+        const Ncv32u n = cv::cuda::cudev::shfl_up(idata, i);
         if (laneId >= i)
               idata += n;
     }
diff --git a/modules/gpulegacy/src/cuda/NCVPyramid.cu b/modules/gpulegacy/src/cuda/NCVPyramid.cu
index d42b46bcb5..7ba0e2a55d 100644
--- a/modules/gpulegacy/src/cuda/NCVPyramid.cu
+++ b/modules/gpulegacy/src/cuda/NCVPyramid.cu
@@ -205,7 +205,7 @@ __global__ void kernelDownsampleX2(T *d_src,
     }
 }
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace pyramid
     {
@@ -288,7 +288,7 @@ __global__ void kernelInterpolateFrom1(T *d_srcTop,
         d_dst_line[j] = outPix;
     }
 }
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace pyramid
     {
diff --git a/modules/gpulegacy/src/cuda/NPP_staging.cu b/modules/gpulegacy/src/cuda/NPP_staging.cu
index 9234e1795b..e328fae7ad 100644
--- a/modules/gpulegacy/src/cuda/NPP_staging.cu
+++ b/modules/gpulegacy/src/cuda/NPP_staging.cu
@@ -97,13 +97,13 @@ template <class T>
 inline __device__ T warpScanInclusive(T idata, volatile T *s_Data)
 {
 #if __CUDA_ARCH__ >= 300
-    const unsigned int laneId = cv::gpu::cudev::Warp::laneId();
+    const unsigned int laneId = cv::cuda::cudev::Warp::laneId();
 
     // scan on shuffl functions
     #pragma unroll
     for (int i = 1; i <= (K_WARP_SIZE / 2); i *= 2)
     {
-        const T n = cv::gpu::cudev::shfl_up(idata, i);
+        const T n = cv::cuda::cudev::shfl_up(idata, i);
         if (laneId >= i)
               idata += n;
     }
diff --git a/modules/gpulegacy/test/test_main.cpp b/modules/gpulegacy/test/test_main.cpp
index cbb66d2963..0d7215616c 100644
--- a/modules/gpulegacy/test/test_main.cpp
+++ b/modules/gpulegacy/test/test_main.cpp
@@ -46,7 +46,7 @@
 
 using namespace std;
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 using namespace cvtest;
 using namespace testing;
 
diff --git a/modules/gpulegacy/test/test_nvidia.cpp b/modules/gpulegacy/test/test_nvidia.cpp
index 1cda187bce..afa27a56c6 100644
--- a/modules/gpulegacy/test/test_nvidia.cpp
+++ b/modules/gpulegacy/test/test_nvidia.cpp
@@ -49,9 +49,9 @@ OutputLevel nvidiaTestOutputLevel = OutputLevelNone;
 using namespace cvtest;
 using namespace testing;
 
-struct NVidiaTest : TestWithParam<cv::gpu::DeviceInfo>
+struct NVidiaTest : TestWithParam<cv::cuda::DeviceInfo>
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
 
     std::string _path;
 
@@ -59,7 +59,7 @@ struct NVidiaTest : TestWithParam<cv::gpu::DeviceInfo>
     {
         devInfo = GetParam();
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
         _path = TS::ptr()->get_data_path().c_str();
         _path = _path + "haarcascade/";
     }
diff --git a/modules/gpuoptflow/include/opencv2/gpuoptflow.hpp b/modules/gpuoptflow/include/opencv2/gpuoptflow.hpp
index e9e0719879..0921751f23 100644
--- a/modules/gpuoptflow/include/opencv2/gpuoptflow.hpp
+++ b/modules/gpuoptflow/include/opencv2/gpuoptflow.hpp
@@ -49,7 +49,7 @@
 
 #include "opencv2/core/gpu.hpp"
 
-namespace cv { namespace gpu {
+namespace cv { namespace cuda {
 
 class CV_EXPORTS BroxOpticalFlow
 {
@@ -306,6 +306,6 @@ CV_EXPORTS void interpolateFrames(const GpuMat& frame0, const GpuMat& frame1,
 
 CV_EXPORTS void createOpticalFlowNeedleMap(const GpuMat& u, const GpuMat& v, GpuMat& vertex, GpuMat& colors);
 
-}} // namespace cv { namespace gpu {
+}} // namespace cv { namespace cuda {
 
 #endif /* __OPENCV_GPUOPTFLOW_HPP__ */
diff --git a/modules/gpuoptflow/perf/perf_optflow.cpp b/modules/gpuoptflow/perf/perf_optflow.cpp
index 545225d62e..e249f5ac8a 100644
--- a/modules/gpuoptflow/perf/perf_optflow.cpp
+++ b/modules/gpuoptflow/perf/perf_optflow.cpp
@@ -68,21 +68,21 @@ PERF_TEST_P(ImagePair, InterpolateFrames,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_frame0(frame0);
-        const cv::gpu::GpuMat d_frame1(frame1);
-        cv::gpu::GpuMat d_fu, d_fv;
-        cv::gpu::GpuMat d_bu, d_bv;
+        const cv::cuda::GpuMat d_frame0(frame0);
+        const cv::cuda::GpuMat d_frame1(frame1);
+        cv::cuda::GpuMat d_fu, d_fv;
+        cv::cuda::GpuMat d_bu, d_bv;
 
-        cv::gpu::BroxOpticalFlow d_flow(0.197f /*alpha*/, 50.0f /*gamma*/, 0.8f /*scale_factor*/,
+        cv::cuda::BroxOpticalFlow d_flow(0.197f /*alpha*/, 50.0f /*gamma*/, 0.8f /*scale_factor*/,
                                         10 /*inner_iterations*/, 77 /*outer_iterations*/, 10 /*solver_iterations*/);
 
         d_flow(d_frame0, d_frame1, d_fu, d_fv);
         d_flow(d_frame1, d_frame0, d_bu, d_bv);
 
-        cv::gpu::GpuMat newFrame;
-        cv::gpu::GpuMat d_buf;
+        cv::cuda::GpuMat newFrame;
+        cv::cuda::GpuMat d_buf;
 
-        TEST_CYCLE() cv::gpu::interpolateFrames(d_frame0, d_frame1, d_fu, d_fv, d_bu, d_bv, 0.5f, newFrame, d_buf);
+        TEST_CYCLE() cv::cuda::interpolateFrames(d_frame0, d_frame1, d_fu, d_fv, d_bu, d_bv, 0.5f, newFrame, d_buf);
 
         GPU_SANITY_CHECK(newFrame, 1e-4);
     }
@@ -109,19 +109,19 @@ PERF_TEST_P(ImagePair, CreateOpticalFlowNeedleMap,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_frame0(frame0);
-        const cv::gpu::GpuMat d_frame1(frame1);
-        cv::gpu::GpuMat u;
-        cv::gpu::GpuMat v;
+        const cv::cuda::GpuMat d_frame0(frame0);
+        const cv::cuda::GpuMat d_frame1(frame1);
+        cv::cuda::GpuMat u;
+        cv::cuda::GpuMat v;
 
-        cv::gpu::BroxOpticalFlow d_flow(0.197f /*alpha*/, 50.0f /*gamma*/, 0.8f /*scale_factor*/,
+        cv::cuda::BroxOpticalFlow d_flow(0.197f /*alpha*/, 50.0f /*gamma*/, 0.8f /*scale_factor*/,
                                         10 /*inner_iterations*/, 77 /*outer_iterations*/, 10 /*solver_iterations*/);
 
         d_flow(d_frame0, d_frame1, u, v);
 
-        cv::gpu::GpuMat vertex, colors;
+        cv::cuda::GpuMat vertex, colors;
 
-        TEST_CYCLE() cv::gpu::createOpticalFlowNeedleMap(u, v, vertex, colors);
+        TEST_CYCLE() cv::cuda::createOpticalFlowNeedleMap(u, v, vertex, colors);
 
         GPU_SANITY_CHECK(vertex, 1e-6);
         GPU_SANITY_CHECK(colors);
@@ -151,12 +151,12 @@ PERF_TEST_P(ImagePair, BroxOpticalFlow,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_frame0(frame0);
-        const cv::gpu::GpuMat d_frame1(frame1);
-        cv::gpu::GpuMat u;
-        cv::gpu::GpuMat v;
+        const cv::cuda::GpuMat d_frame0(frame0);
+        const cv::cuda::GpuMat d_frame1(frame1);
+        cv::cuda::GpuMat u;
+        cv::cuda::GpuMat v;
 
-        cv::gpu::BroxOpticalFlow d_flow(0.197f /*alpha*/, 50.0f /*gamma*/, 0.8f /*scale_factor*/,
+        cv::cuda::BroxOpticalFlow d_flow(0.197f /*alpha*/, 50.0f /*gamma*/, 0.8f /*scale_factor*/,
                                         10 /*inner_iterations*/, 77 /*outer_iterations*/, 10 /*solver_iterations*/);
 
         TEST_CYCLE() d_flow(d_frame0, d_frame1, u, v);
@@ -209,17 +209,17 @@ PERF_TEST_P(ImagePair_Gray_NPts_WinSz_Levels_Iters, PyrLKOpticalFlowSparse,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_pts(pts.reshape(2, 1));
+        const cv::cuda::GpuMat d_pts(pts.reshape(2, 1));
 
-        cv::gpu::PyrLKOpticalFlow d_pyrLK;
+        cv::cuda::PyrLKOpticalFlow d_pyrLK;
         d_pyrLK.winSize = cv::Size(winSize, winSize);
         d_pyrLK.maxLevel = levels - 1;
         d_pyrLK.iters = iters;
 
-        const cv::gpu::GpuMat d_frame0(frame0);
-        const cv::gpu::GpuMat d_frame1(frame1);
-        cv::gpu::GpuMat nextPts;
-        cv::gpu::GpuMat status;
+        const cv::cuda::GpuMat d_frame0(frame0);
+        const cv::cuda::GpuMat d_frame1(frame1);
+        cv::cuda::GpuMat nextPts;
+        cv::cuda::GpuMat status;
 
         TEST_CYCLE() d_pyrLK.sparse(d_frame0, d_frame1, d_pts, nextPts, status);
 
@@ -269,12 +269,12 @@ PERF_TEST_P(ImagePair_WinSz_Levels_Iters, PyrLKOpticalFlowDense,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_frame0(frame0);
-        const cv::gpu::GpuMat d_frame1(frame1);
-        cv::gpu::GpuMat u;
-        cv::gpu::GpuMat v;
+        const cv::cuda::GpuMat d_frame0(frame0);
+        const cv::cuda::GpuMat d_frame1(frame1);
+        cv::cuda::GpuMat u;
+        cv::cuda::GpuMat v;
 
-        cv::gpu::PyrLKOpticalFlow d_pyrLK;
+        cv::cuda::PyrLKOpticalFlow d_pyrLK;
         d_pyrLK.winSize = cv::Size(winSize, winSize);
         d_pyrLK.maxLevel = levels - 1;
         d_pyrLK.iters = iters;
@@ -314,12 +314,12 @@ PERF_TEST_P(ImagePair, FarnebackOpticalFlow,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_frame0(frame0);
-        const cv::gpu::GpuMat d_frame1(frame1);
-        cv::gpu::GpuMat u;
-        cv::gpu::GpuMat v;
+        const cv::cuda::GpuMat d_frame0(frame0);
+        const cv::cuda::GpuMat d_frame1(frame1);
+        cv::cuda::GpuMat u;
+        cv::cuda::GpuMat v;
 
-        cv::gpu::FarnebackOpticalFlow d_farneback;
+        cv::cuda::FarnebackOpticalFlow d_farneback;
         d_farneback.numLevels = numLevels;
         d_farneback.pyrScale = pyrScale;
         d_farneback.winSize = winSize;
@@ -359,12 +359,12 @@ PERF_TEST_P(ImagePair, OpticalFlowDual_TVL1,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_frame0(frame0);
-        const cv::gpu::GpuMat d_frame1(frame1);
-        cv::gpu::GpuMat u;
-        cv::gpu::GpuMat v;
+        const cv::cuda::GpuMat d_frame0(frame0);
+        const cv::cuda::GpuMat d_frame1(frame1);
+        cv::cuda::GpuMat u;
+        cv::cuda::GpuMat v;
 
-        cv::gpu::OpticalFlowDual_TVL1_GPU d_alg;
+        cv::cuda::OpticalFlowDual_TVL1_GPU d_alg;
 
         TEST_CYCLE() d_alg(d_frame0, d_frame1, u, v);
 
@@ -424,11 +424,11 @@ PERF_TEST_P(ImagePair, OpticalFlowBM,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_frame0(frame0);
-        const cv::gpu::GpuMat d_frame1(frame1);
-        cv::gpu::GpuMat u, v, buf;
+        const cv::cuda::GpuMat d_frame0(frame0);
+        const cv::cuda::GpuMat d_frame1(frame1);
+        cv::cuda::GpuMat u, v, buf;
 
-        TEST_CYCLE() cv::gpu::calcOpticalFlowBM(d_frame0, d_frame1, block_size, shift_size, max_range, false, u, v, buf);
+        TEST_CYCLE() cv::cuda::calcOpticalFlowBM(d_frame0, d_frame1, block_size, shift_size, max_range, false, u, v, buf);
 
         GPU_SANITY_CHECK(u);
         GPU_SANITY_CHECK(v);
@@ -461,11 +461,11 @@ PERF_TEST_P(ImagePair, FastOpticalFlowBM,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_frame0(frame0);
-        const cv::gpu::GpuMat d_frame1(frame1);
-        cv::gpu::GpuMat u, v;
+        const cv::cuda::GpuMat d_frame0(frame0);
+        const cv::cuda::GpuMat d_frame1(frame1);
+        cv::cuda::GpuMat u, v;
 
-        cv::gpu::FastOpticalFlowBM fastBM;
+        cv::cuda::FastOpticalFlowBM fastBM;
 
         TEST_CYCLE() fastBM(d_frame0, d_frame1, u, v, max_range.width, block_size.width);
 
diff --git a/modules/gpuoptflow/src/bm.cpp b/modules/gpuoptflow/src/bm.cpp
index b8daa96b09..19ad164f85 100644
--- a/modules/gpuoptflow/src/bm.cpp
+++ b/modules/gpuoptflow/src/bm.cpp
@@ -43,11 +43,11 @@
 #include "precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 #if !defined HAVE_CUDA || defined(CUDA_DISABLER)
 
-void cv::gpu::calcOpticalFlowBM(const GpuMat&, const GpuMat&, Size, Size, Size, bool, GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
+void cv::cuda::calcOpticalFlowBM(const GpuMat&, const GpuMat&, Size, Size, Size, bool, GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
 
 #else // HAVE_CUDA
 
@@ -57,7 +57,7 @@ namespace optflowbm
               int maxX, int maxY, int acceptLevel, int escapeLevel, const short2* ss, int ssCount, cudaStream_t stream);
 }
 
-void cv::gpu::calcOpticalFlowBM(const GpuMat& prev, const GpuMat& curr, Size blockSize, Size shiftSize, Size maxRange, bool usePrevious, GpuMat& velx, GpuMat& vely, GpuMat& buf, Stream& st)
+void cv::cuda::calcOpticalFlowBM(const GpuMat& prev, const GpuMat& curr, Size blockSize, Size shiftSize, Size maxRange, bool usePrevious, GpuMat& velx, GpuMat& vely, GpuMat& buf, Stream& st)
 {
     CV_Assert( prev.type() == CV_8UC1 );
     CV_Assert( curr.size() == prev.size() && curr.type() == prev.type() );
diff --git a/modules/gpuoptflow/src/bm_fast.cpp b/modules/gpuoptflow/src/bm_fast.cpp
index edab653861..c418e4bc93 100644
--- a/modules/gpuoptflow/src/bm_fast.cpp
+++ b/modules/gpuoptflow/src/bm_fast.cpp
@@ -43,11 +43,11 @@
 #include "precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 #if !defined HAVE_CUDA || defined(CUDA_DISABLER)
 
-void cv::gpu::FastOpticalFlowBM::operator ()(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, int, int, Stream&) { throw_no_cuda(); }
+void cv::cuda::FastOpticalFlowBM::operator ()(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, int, int, Stream&) { throw_no_cuda(); }
 
 #else // HAVE_CUDA
 
@@ -59,7 +59,7 @@ namespace optflowbm_fast
     void calc(PtrStepSzb I0, PtrStepSzb I1, PtrStepSzf velx, PtrStepSzf vely, PtrStepi buffer, int search_window, int block_window, cudaStream_t stream);
 }
 
-void cv::gpu::FastOpticalFlowBM::operator ()(const GpuMat& I0, const GpuMat& I1, GpuMat& flowx, GpuMat& flowy, int search_window, int block_window, Stream& stream)
+void cv::cuda::FastOpticalFlowBM::operator ()(const GpuMat& I0, const GpuMat& I1, GpuMat& flowx, GpuMat& flowy, int search_window, int block_window, Stream& stream)
 {
     CV_Assert( I0.type() == CV_8UC1 );
     CV_Assert( I1.size() == I0.size() && I1.type() == I0.type() );
@@ -70,8 +70,8 @@ void cv::gpu::FastOpticalFlowBM::operator ()(const GpuMat& I0, const GpuMat& I1,
     ensureSizeIsEnough(esize, I0.type(), extended_I0);
     ensureSizeIsEnough(esize, I0.type(), extended_I1);
 
-    gpu::copyMakeBorder(I0, extended_I0, border_size, border_size, border_size, border_size, cv::BORDER_DEFAULT, Scalar(), stream);
-    gpu::copyMakeBorder(I1, extended_I1, border_size, border_size, border_size, border_size, cv::BORDER_DEFAULT, Scalar(), stream);
+    cuda::copyMakeBorder(I0, extended_I0, border_size, border_size, border_size, border_size, cv::BORDER_DEFAULT, Scalar(), stream);
+    cuda::copyMakeBorder(I1, extended_I1, border_size, border_size, border_size, border_size, cv::BORDER_DEFAULT, Scalar(), stream);
 
     GpuMat I0_hdr = extended_I0(Rect(Point2i(border_size, border_size), I0.size()));
     GpuMat I1_hdr = extended_I1(Rect(Point2i(border_size, border_size), I0.size()));
diff --git a/modules/gpuoptflow/src/brox.cpp b/modules/gpuoptflow/src/brox.cpp
index b5db69e2b1..da0ffd4aa1 100644
--- a/modules/gpuoptflow/src/brox.cpp
+++ b/modules/gpuoptflow/src/brox.cpp
@@ -43,11 +43,11 @@
 #include "precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 #if !defined (HAVE_CUDA) || !defined (HAVE_OPENCV_GPULEGACY) || defined (CUDA_DISABLER)
 
-void cv::gpu::BroxOpticalFlow::operator ()(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
+void cv::cuda::BroxOpticalFlow::operator ()(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
 
 #else
 
@@ -69,7 +69,7 @@ namespace
     static void outputHandler(const String &msg) { CV_Error(cv::Error::GpuApiCallError, msg.c_str()); }
 }
 
-void cv::gpu::BroxOpticalFlow::operator ()(const GpuMat& frame0, const GpuMat& frame1, GpuMat& u, GpuMat& v, Stream& s)
+void cv::cuda::BroxOpticalFlow::operator ()(const GpuMat& frame0, const GpuMat& frame1, GpuMat& u, GpuMat& v, Stream& s)
 {
     ncvSetDebugOutputHandler(outputHandler);
 
diff --git a/modules/gpuoptflow/src/cuda/bm.cu b/modules/gpuoptflow/src/cuda/bm.cu
index 9150d29a14..830bde6f81 100644
--- a/modules/gpuoptflow/src/cuda/bm.cu
+++ b/modules/gpuoptflow/src/cuda/bm.cu
@@ -47,8 +47,8 @@
 #include "opencv2/core/cuda/functional.hpp"
 #include "opencv2/core/cuda/reduce.hpp"
 
-using namespace cv::gpu;
-using namespace cv::gpu::cudev;
+using namespace cv::cuda;
+using namespace cv::cuda::cudev;
 
 namespace optflowbm
 {
diff --git a/modules/gpuoptflow/src/cuda/bm_fast.cu b/modules/gpuoptflow/src/cuda/bm_fast.cu
index 46f78a9f90..1c87f600da 100644
--- a/modules/gpuoptflow/src/cuda/bm_fast.cu
+++ b/modules/gpuoptflow/src/cuda/bm_fast.cu
@@ -47,8 +47,8 @@
 #include "opencv2/core/cuda/functional.hpp"
 #include "opencv2/core/cuda/reduce.hpp"
 
-using namespace cv::gpu;
-using namespace cv::gpu::cudev;
+using namespace cv::cuda;
+using namespace cv::cuda::cudev;
 
 namespace optflowbm_fast
 {
diff --git a/modules/gpuoptflow/src/cuda/farneback.cu b/modules/gpuoptflow/src/cuda/farneback.cu
index 68a58c16d8..34bd64e669 100644
--- a/modules/gpuoptflow/src/cuda/farneback.cu
+++ b/modules/gpuoptflow/src/cuda/farneback.cu
@@ -55,7 +55,7 @@
 #define BORDER_SIZE 5
 #define MAX_KSIZE_HALF 100
 
-namespace cv { namespace gpu { namespace cudev { namespace optflow_farneback
+namespace cv { namespace cuda { namespace cudev { namespace optflow_farneback
 {
     __constant__ float c_g[8];
     __constant__ float c_xg[8];
@@ -650,7 +650,7 @@ namespace cv { namespace gpu { namespace cudev { namespace optflow_farneback
         callers[borderMode](src, ksizeHalf, dst, stream);
     }
 
-}}}} // namespace cv { namespace gpu { namespace cudev { namespace optflow_farneback
+}}}} // namespace cv { namespace cuda { namespace cudev { namespace optflow_farneback
 
 
 #endif /* CUDA_DISABLER */
diff --git a/modules/gpuoptflow/src/cuda/needle_map.cu b/modules/gpuoptflow/src/cuda/needle_map.cu
index e0b1ef6b78..1176ed49f1 100644
--- a/modules/gpuoptflow/src/cuda/needle_map.cu
+++ b/modules/gpuoptflow/src/cuda/needle_map.cu
@@ -44,7 +44,7 @@
 
 #include "opencv2/core/cuda/common.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace optical_flow
     {
diff --git a/modules/gpuoptflow/src/cuda/pyrlk.cu b/modules/gpuoptflow/src/cuda/pyrlk.cu
index 410666fcfb..5732e57795 100644
--- a/modules/gpuoptflow/src/cuda/pyrlk.cu
+++ b/modules/gpuoptflow/src/cuda/pyrlk.cu
@@ -49,8 +49,8 @@
 #include "opencv2/core/cuda/vec_math.hpp"
 #include "opencv2/core/cuda/reduce.hpp"
 
-using namespace cv::gpu;
-using namespace cv::gpu::cudev;
+using namespace cv::cuda;
+using namespace cv::cuda::cudev;
 
 namespace pyrlk
 {
diff --git a/modules/gpuoptflow/src/cuda/tvl1flow.cu b/modules/gpuoptflow/src/cuda/tvl1flow.cu
index 3d1c612644..b600ae8514 100644
--- a/modules/gpuoptflow/src/cuda/tvl1flow.cu
+++ b/modules/gpuoptflow/src/cuda/tvl1flow.cu
@@ -46,8 +46,8 @@
 #include "opencv2/core/cuda/border_interpolate.hpp"
 #include "opencv2/core/cuda/limits.hpp"
 
-using namespace cv::gpu;
-using namespace cv::gpu::cudev;
+using namespace cv::cuda;
+using namespace cv::cuda::cudev;
 
 ////////////////////////////////////////////////////////////
 // centeredGradient
diff --git a/modules/gpuoptflow/src/farneback.cpp b/modules/gpuoptflow/src/farneback.cpp
index 9ed6403eea..e5e21e2f4a 100644
--- a/modules/gpuoptflow/src/farneback.cpp
+++ b/modules/gpuoptflow/src/farneback.cpp
@@ -51,15 +51,15 @@
 #define ENABLE_GPU_RESIZE 1
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 #if !defined HAVE_CUDA || defined(CUDA_DISABLER)
 
-void cv::gpu::FarnebackOpticalFlow::operator ()(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
+void cv::cuda::FarnebackOpticalFlow::operator ()(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
 
 #else
 
-namespace cv { namespace gpu { namespace cudev { namespace optflow_farneback
+namespace cv { namespace cuda { namespace cudev { namespace optflow_farneback
 {
     void setPolynomialExpansionConsts(
             int polyN, const float *g, const float *xg, const float *xxg,
@@ -93,10 +93,10 @@ namespace cv { namespace gpu { namespace cudev { namespace optflow_farneback
     void gaussianBlur5Gpu_CC11(
             const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, int borderType, cudaStream_t stream);
 
-}}}} // namespace cv { namespace gpu { namespace cudev { namespace optflow_farneback
+}}}} // namespace cv { namespace cuda { namespace cudev { namespace optflow_farneback
 
 
-void cv::gpu::FarnebackOpticalFlow::prepareGaussian(
+void cv::cuda::FarnebackOpticalFlow::prepareGaussian(
         int n, double sigma, float *g, float *xg, float *xxg,
         double &ig11, double &ig03, double &ig33, double &ig55)
 {
@@ -150,7 +150,7 @@ void cv::gpu::FarnebackOpticalFlow::prepareGaussian(
 }
 
 
-void cv::gpu::FarnebackOpticalFlow::setPolynomialExpansionConsts(int n, double sigma)
+void cv::cuda::FarnebackOpticalFlow::setPolynomialExpansionConsts(int n, double sigma)
 {
     std::vector<float> buf(n*6 + 3);
     float* g = &buf[0] + n;
@@ -167,7 +167,7 @@ void cv::gpu::FarnebackOpticalFlow::setPolynomialExpansionConsts(int n, double s
 }
 
 
-void cv::gpu::FarnebackOpticalFlow::updateFlow_boxFilter(
+void cv::cuda::FarnebackOpticalFlow::updateFlow_boxFilter(
         const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat &flowy,
         GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[])
 {
@@ -186,7 +186,7 @@ void cv::gpu::FarnebackOpticalFlow::updateFlow_boxFilter(
 }
 
 
-void cv::gpu::FarnebackOpticalFlow::updateFlow_gaussianBlur(
+void cv::cuda::FarnebackOpticalFlow::updateFlow_gaussianBlur(
         const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat& flowy,
         GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[])
 {
@@ -205,7 +205,7 @@ void cv::gpu::FarnebackOpticalFlow::updateFlow_gaussianBlur(
 }
 
 
-void cv::gpu::FarnebackOpticalFlow::operator ()(
+void cv::cuda::FarnebackOpticalFlow::operator ()(
         const GpuMat &frame0, const GpuMat &frame1, GpuMat &flowx, GpuMat &flowy, Stream &s)
 {
     CV_Assert(frame0.channels() == 1 && frame1.channels() == 1);
@@ -247,8 +247,8 @@ void cv::gpu::FarnebackOpticalFlow::operator ()(
         pyramid1_[0] = frames_[1];
         for (int i = 1; i <= numLevelsCropped; ++i)
         {
-            gpu::pyrDown(pyramid0_[i - 1], pyramid0_[i], streams[0]);
-            gpu::pyrDown(pyramid1_[i - 1], pyramid1_[i], streams[1]);
+            cuda::pyrDown(pyramid0_[i - 1], pyramid0_[i], streams[0]);
+            cuda::pyrDown(pyramid1_[i - 1], pyramid1_[i], streams[1]);
         }
     }
 
@@ -291,8 +291,8 @@ void cv::gpu::FarnebackOpticalFlow::operator ()(
         {
             if (flags & OPTFLOW_USE_INITIAL_FLOW)
             {
-                gpu::resize(flowx0, curFlowX, Size(width, height), 0, 0, INTER_LINEAR, streams[0]);
-                gpu::resize(flowy0, curFlowY, Size(width, height), 0, 0, INTER_LINEAR, streams[1]);
+                cuda::resize(flowx0, curFlowX, Size(width, height), 0, 0, INTER_LINEAR, streams[0]);
+                cuda::resize(flowy0, curFlowY, Size(width, height), 0, 0, INTER_LINEAR, streams[1]);
                 curFlowX.convertTo(curFlowX, curFlowX.depth(), scale, streams[0]);
                 curFlowY.convertTo(curFlowY, curFlowY.depth(), scale, streams[1]);
             }
@@ -304,8 +304,8 @@ void cv::gpu::FarnebackOpticalFlow::operator ()(
         }
         else
         {
-            gpu::resize(prevFlowX, curFlowX, Size(width, height), 0, 0, INTER_LINEAR, streams[0]);
-            gpu::resize(prevFlowY, curFlowY, Size(width, height), 0, 0, INTER_LINEAR, streams[1]);
+            cuda::resize(prevFlowX, curFlowX, Size(width, height), 0, 0, INTER_LINEAR, streams[0]);
+            cuda::resize(prevFlowY, curFlowY, Size(width, height), 0, 0, INTER_LINEAR, streams[1]);
             curFlowX.convertTo(curFlowX, curFlowX.depth(), 1./pyrScale, streams[0]);
             curFlowY.convertTo(curFlowY, curFlowY.depth(), 1./pyrScale, streams[1]);
         }
@@ -343,7 +343,7 @@ void cv::gpu::FarnebackOpticalFlow::operator ()(
             {
                 cudev::optflow_farneback::gaussianBlurGpu(
                         frames_[i], smoothSize/2, blurredFrame[i], BORDER_REFLECT101, S(streams[i]));
-                gpu::resize(blurredFrame[i], pyrLevel[i], Size(width, height), 0.0, 0.0, INTER_LINEAR, streams[i]);
+                cuda::resize(blurredFrame[i], pyrLevel[i], Size(width, height), 0.0, 0.0, INTER_LINEAR, streams[i]);
                 cudev::optflow_farneback::polynomialExpansionGpu(pyrLevel[i], polyN, R[i], S(streams[i]));
             }
         }
diff --git a/modules/gpuoptflow/src/interpolate_frames.cpp b/modules/gpuoptflow/src/interpolate_frames.cpp
index f6fe9c510c..57de79ba65 100644
--- a/modules/gpuoptflow/src/interpolate_frames.cpp
+++ b/modules/gpuoptflow/src/interpolate_frames.cpp
@@ -43,15 +43,15 @@
 #include "precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 #if !defined (HAVE_CUDA) || !defined (HAVE_OPENCV_GPULEGACY) || defined (CUDA_DISABLER)
 
-void cv::gpu::interpolateFrames(const GpuMat&, const GpuMat&, const GpuMat&, const GpuMat&, const GpuMat&, const GpuMat&, float, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
+void cv::cuda::interpolateFrames(const GpuMat&, const GpuMat&, const GpuMat&, const GpuMat&, const GpuMat&, const GpuMat&, float, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
 
 #else
 
-void cv::gpu::interpolateFrames(const GpuMat& frame0, const GpuMat& frame1, const GpuMat& fu, const GpuMat& fv, const GpuMat& bu, const GpuMat& bv,
+void cv::cuda::interpolateFrames(const GpuMat& frame0, const GpuMat& frame1, const GpuMat& fu, const GpuMat& fv, const GpuMat& bu, const GpuMat& bv,
                                 float pos, GpuMat& newFrame, GpuMat& buf, Stream& s)
 {
     CV_Assert(frame0.type() == CV_32FC1);
diff --git a/modules/gpuoptflow/src/needle_map.cpp b/modules/gpuoptflow/src/needle_map.cpp
index 9ca8fe5e44..230f26a652 100644
--- a/modules/gpuoptflow/src/needle_map.cpp
+++ b/modules/gpuoptflow/src/needle_map.cpp
@@ -43,15 +43,15 @@
 #include "precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
 
-void cv::gpu::createOpticalFlowNeedleMap(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&) { throw_no_cuda(); }
+void cv::cuda::createOpticalFlowNeedleMap(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&) { throw_no_cuda(); }
 
 #else
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace optical_flow
     {
@@ -60,9 +60,9 @@ namespace cv { namespace gpu { namespace cudev
     }
 }}}
 
-void cv::gpu::createOpticalFlowNeedleMap(const GpuMat& u, const GpuMat& v, GpuMat& vertex, GpuMat& colors)
+void cv::cuda::createOpticalFlowNeedleMap(const GpuMat& u, const GpuMat& v, GpuMat& vertex, GpuMat& colors)
 {
-    using namespace cv::gpu::cudev::optical_flow;
+    using namespace cv::cuda::cudev::optical_flow;
 
     CV_Assert(u.type() == CV_32FC1);
     CV_Assert(v.type() == u.type() && v.size() == u.size());
@@ -87,14 +87,14 @@ void cv::gpu::createOpticalFlowNeedleMap(const GpuMat& u, const GpuMat& v, GpuMa
     colors.setTo(Scalar::all(1.0));
 
     double uMax, vMax;
-    gpu::minMax(u_avg, 0, &uMax);
-    gpu::minMax(v_avg, 0, &vMax);
+    cuda::minMax(u_avg, 0, &uMax);
+    cuda::minMax(v_avg, 0, &vMax);
 
     float max_flow = static_cast<float>(std::sqrt(uMax * uMax + vMax * vMax));
 
     CreateOpticalFlowNeedleMap_gpu(u_avg, v_avg, vertex.ptr<float>(), colors.ptr<float>(), max_flow, 1.0f / u.cols, 1.0f / u.rows);
 
-    gpu::cvtColor(colors, colors, COLOR_HSV2RGB);
+    cuda::cvtColor(colors, colors, COLOR_HSV2RGB);
 }
 
 #endif /* HAVE_CUDA */
diff --git a/modules/gpuoptflow/src/pyrlk.cpp b/modules/gpuoptflow/src/pyrlk.cpp
index 1992bf9038..52ee91f2fe 100644
--- a/modules/gpuoptflow/src/pyrlk.cpp
+++ b/modules/gpuoptflow/src/pyrlk.cpp
@@ -43,14 +43,14 @@
 #include "precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
 
-cv::gpu::PyrLKOpticalFlow::PyrLKOpticalFlow() { throw_no_cuda(); }
-void cv::gpu::PyrLKOpticalFlow::sparse(const GpuMat&, const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat*) { throw_no_cuda(); }
-void cv::gpu::PyrLKOpticalFlow::dense(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat*) { throw_no_cuda(); }
-void cv::gpu::PyrLKOpticalFlow::releaseMemory() {}
+cv::cuda::PyrLKOpticalFlow::PyrLKOpticalFlow() { throw_no_cuda(); }
+void cv::cuda::PyrLKOpticalFlow::sparse(const GpuMat&, const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat*) { throw_no_cuda(); }
+void cv::cuda::PyrLKOpticalFlow::dense(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat*) { throw_no_cuda(); }
+void cv::cuda::PyrLKOpticalFlow::releaseMemory() {}
 
 #else /* !defined (HAVE_CUDA) */
 
@@ -67,7 +67,7 @@ namespace pyrlk
                PtrStepSzf err, int2 winSize, cudaStream_t stream = 0);
 }
 
-cv::gpu::PyrLKOpticalFlow::PyrLKOpticalFlow()
+cv::cuda::PyrLKOpticalFlow::PyrLKOpticalFlow()
 {
     winSize = Size(21, 21);
     maxLevel = 3;
@@ -97,7 +97,7 @@ namespace
     }
 }
 
-void cv::gpu::PyrLKOpticalFlow::sparse(const GpuMat& prevImg, const GpuMat& nextImg, const GpuMat& prevPts, GpuMat& nextPts, GpuMat& status, GpuMat* err)
+void cv::cuda::PyrLKOpticalFlow::sparse(const GpuMat& prevImg, const GpuMat& nextImg, const GpuMat& prevPts, GpuMat& nextPts, GpuMat& status, GpuMat* err)
 {
     if (prevPts.empty())
     {
@@ -124,7 +124,7 @@ void cv::gpu::PyrLKOpticalFlow::sparse(const GpuMat& prevImg, const GpuMat& next
 
     GpuMat temp1 = (useInitialFlow ? nextPts : prevPts).reshape(1);
     GpuMat temp2 = nextPts.reshape(1);
-    gpu::multiply(temp1, Scalar::all(1.0 / (1 << maxLevel) / 2.0), temp2);
+    cuda::multiply(temp1, Scalar::all(1.0 / (1 << maxLevel) / 2.0), temp2);
 
     ensureSizeIsEnough(1, prevPts.cols, CV_8UC1, status);
     status.setTo(Scalar::all(1));
@@ -146,17 +146,17 @@ void cv::gpu::PyrLKOpticalFlow::sparse(const GpuMat& prevImg, const GpuMat& next
     }
     else
     {
-        gpu::cvtColor(prevImg, buf_, COLOR_BGR2BGRA);
+        cuda::cvtColor(prevImg, buf_, COLOR_BGR2BGRA);
         buf_.convertTo(prevPyr_[0], CV_32F);
 
-        gpu::cvtColor(nextImg, buf_, COLOR_BGR2BGRA);
+        cuda::cvtColor(nextImg, buf_, COLOR_BGR2BGRA);
         buf_.convertTo(nextPyr_[0], CV_32F);
     }
 
     for (int level = 1; level <= maxLevel; ++level)
     {
-        gpu::pyrDown(prevPyr_[level - 1], prevPyr_[level]);
-        gpu::pyrDown(nextPyr_[level - 1], nextPyr_[level]);
+        cuda::pyrDown(prevPyr_[level - 1], prevPyr_[level]);
+        cuda::pyrDown(nextPyr_[level - 1], nextPyr_[level]);
     }
 
     pyrlk::loadConstants(make_int2(winSize.width, winSize.height), iters);
@@ -178,7 +178,7 @@ void cv::gpu::PyrLKOpticalFlow::sparse(const GpuMat& prevImg, const GpuMat& next
     }
 }
 
-void cv::gpu::PyrLKOpticalFlow::dense(const GpuMat& prevImg, const GpuMat& nextImg, GpuMat& u, GpuMat& v, GpuMat* err)
+void cv::cuda::PyrLKOpticalFlow::dense(const GpuMat& prevImg, const GpuMat& nextImg, GpuMat& u, GpuMat& v, GpuMat* err)
 {
     CV_Assert(prevImg.type() == CV_8UC1);
     CV_Assert(prevImg.size() == nextImg.size() && prevImg.type() == nextImg.type());
@@ -198,8 +198,8 @@ void cv::gpu::PyrLKOpticalFlow::dense(const GpuMat& prevImg, const GpuMat& nextI
 
     for (int level = 1; level <= maxLevel; ++level)
     {
-        gpu::pyrDown(prevPyr_[level - 1], prevPyr_[level]);
-        gpu::pyrDown(nextPyr_[level - 1], nextPyr_[level]);
+        cuda::pyrDown(prevPyr_[level - 1], prevPyr_[level]);
+        cuda::pyrDown(nextPyr_[level - 1], nextPyr_[level]);
     }
 
     ensureSizeIsEnough(prevImg.size(), CV_32FC1, uPyr_[0]);
@@ -233,7 +233,7 @@ void cv::gpu::PyrLKOpticalFlow::dense(const GpuMat& prevImg, const GpuMat& nextI
     vPyr_[idx].copyTo(v);
 }
 
-void cv::gpu::PyrLKOpticalFlow::releaseMemory()
+void cv::cuda::PyrLKOpticalFlow::releaseMemory()
 {
     prevPyr_.clear();
     nextPyr_.clear();
diff --git a/modules/gpuoptflow/src/tvl1flow.cpp b/modules/gpuoptflow/src/tvl1flow.cpp
index b9ef05ea1d..96d62f1e9b 100644
--- a/modules/gpuoptflow/src/tvl1flow.cpp
+++ b/modules/gpuoptflow/src/tvl1flow.cpp
@@ -44,17 +44,17 @@
 
 #if !defined HAVE_CUDA || defined(CUDA_DISABLER)
 
-cv::gpu::OpticalFlowDual_TVL1_GPU::OpticalFlowDual_TVL1_GPU() { throw_no_cuda(); }
-void cv::gpu::OpticalFlowDual_TVL1_GPU::operator ()(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&) { throw_no_cuda(); }
-void cv::gpu::OpticalFlowDual_TVL1_GPU::collectGarbage() {}
-void cv::gpu::OpticalFlowDual_TVL1_GPU::procOneScale(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&) { throw_no_cuda(); }
+cv::cuda::OpticalFlowDual_TVL1_GPU::OpticalFlowDual_TVL1_GPU() { throw_no_cuda(); }
+void cv::cuda::OpticalFlowDual_TVL1_GPU::operator ()(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&) { throw_no_cuda(); }
+void cv::cuda::OpticalFlowDual_TVL1_GPU::collectGarbage() {}
+void cv::cuda::OpticalFlowDual_TVL1_GPU::procOneScale(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&) { throw_no_cuda(); }
 
 #else
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
-cv::gpu::OpticalFlowDual_TVL1_GPU::OpticalFlowDual_TVL1_GPU()
+cv::cuda::OpticalFlowDual_TVL1_GPU::OpticalFlowDual_TVL1_GPU()
 {
     tau            = 0.25;
     lambda         = 0.15;
@@ -67,7 +67,7 @@ cv::gpu::OpticalFlowDual_TVL1_GPU::OpticalFlowDual_TVL1_GPU()
     useInitialFlow = false;
 }
 
-void cv::gpu::OpticalFlowDual_TVL1_GPU::operator ()(const GpuMat& I0, const GpuMat& I1, GpuMat& flowx, GpuMat& flowy)
+void cv::cuda::OpticalFlowDual_TVL1_GPU::operator ()(const GpuMat& I0, const GpuMat& I1, GpuMat& flowx, GpuMat& flowy)
 {
     CV_Assert( I0.type() == CV_8UC1 || I0.type() == CV_32FC1 );
     CV_Assert( I0.size() == I1.size() );
@@ -113,8 +113,8 @@ void cv::gpu::OpticalFlowDual_TVL1_GPU::operator ()(const GpuMat& I0, const GpuM
     // create the scales
     for (int s = 1; s < nscales; ++s)
     {
-        gpu::resize(I0s[s-1], I0s[s], Size(), scaleStep, scaleStep);
-        gpu::resize(I1s[s-1], I1s[s], Size(), scaleStep, scaleStep);
+        cuda::resize(I0s[s-1], I0s[s], Size(), scaleStep, scaleStep);
+        cuda::resize(I1s[s-1], I1s[s], Size(), scaleStep, scaleStep);
 
         if (I0s[s].cols < 16 || I0s[s].rows < 16)
         {
@@ -124,11 +124,11 @@ void cv::gpu::OpticalFlowDual_TVL1_GPU::operator ()(const GpuMat& I0, const GpuM
 
         if (useInitialFlow)
         {
-            gpu::resize(u1s[s-1], u1s[s], Size(), scaleStep, scaleStep);
-            gpu::resize(u2s[s-1], u2s[s], Size(), scaleStep, scaleStep);
+            cuda::resize(u1s[s-1], u1s[s], Size(), scaleStep, scaleStep);
+            cuda::resize(u2s[s-1], u2s[s], Size(), scaleStep, scaleStep);
 
-            gpu::multiply(u1s[s], Scalar::all(scaleStep), u1s[s]);
-            gpu::multiply(u2s[s], Scalar::all(scaleStep), u2s[s]);
+            cuda::multiply(u1s[s], Scalar::all(scaleStep), u1s[s]);
+            cuda::multiply(u2s[s], Scalar::all(scaleStep), u2s[s]);
         }
         else
         {
@@ -156,12 +156,12 @@ void cv::gpu::OpticalFlowDual_TVL1_GPU::operator ()(const GpuMat& I0, const GpuM
         // otherwise, upsample the optical flow
 
         // zoom the optical flow for the next finer scale
-        gpu::resize(u1s[s], u1s[s - 1], I0s[s - 1].size());
-        gpu::resize(u2s[s], u2s[s - 1], I0s[s - 1].size());
+        cuda::resize(u1s[s], u1s[s - 1], I0s[s - 1].size());
+        cuda::resize(u2s[s], u2s[s - 1], I0s[s - 1].size());
 
         // scale the optical flow with the appropriate zoom factor
-        gpu::multiply(u1s[s - 1], Scalar::all(1/scaleStep), u1s[s - 1]);
-        gpu::multiply(u2s[s - 1], Scalar::all(1/scaleStep), u2s[s - 1]);
+        cuda::multiply(u1s[s - 1], Scalar::all(1/scaleStep), u1s[s - 1]);
+        cuda::multiply(u2s[s - 1], Scalar::all(1/scaleStep), u2s[s - 1]);
     }
 }
 
@@ -177,7 +177,7 @@ namespace tvl1flow
     void estimateDualVariables(PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, float taut);
 }
 
-void cv::gpu::OpticalFlowDual_TVL1_GPU::procOneScale(const GpuMat& I0, const GpuMat& I1, GpuMat& u1, GpuMat& u2)
+void cv::cuda::OpticalFlowDual_TVL1_GPU::procOneScale(const GpuMat& I0, const GpuMat& I1, GpuMat& u1, GpuMat& u2)
 {
     using namespace tvl1flow;
 
@@ -223,14 +223,14 @@ void cv::gpu::OpticalFlowDual_TVL1_GPU::procOneScale(const GpuMat& I0, const Gpu
             estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, u1, u2, diff, l_t, static_cast<float>(theta));
 
             if (epsilon > 0)
-                error = gpu::sum(diff, norm_buf)[0];
+                error = cuda::sum(diff, norm_buf)[0];
 
             estimateDualVariables(u1, u2, p11, p12, p21, p22, taut);
         }
     }
 }
 
-void cv::gpu::OpticalFlowDual_TVL1_GPU::collectGarbage()
+void cv::cuda::OpticalFlowDual_TVL1_GPU::collectGarbage()
 {
     I0s.clear();
     I1s.clear();
diff --git a/modules/gpuoptflow/test/test_optflow.cpp b/modules/gpuoptflow/test/test_optflow.cpp
index fce07551dc..78953eb107 100644
--- a/modules/gpuoptflow/test/test_optflow.cpp
+++ b/modules/gpuoptflow/test/test_optflow.cpp
@@ -52,15 +52,15 @@ using namespace cvtest;
 
 //#define BROX_DUMP
 
-struct BroxOpticalFlow : testing::TestWithParam<cv::gpu::DeviceInfo>
+struct BroxOpticalFlow : testing::TestWithParam<cv::cuda::DeviceInfo>
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
 
     virtual void SetUp()
     {
         devInfo = GetParam();
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -72,11 +72,11 @@ GPU_TEST_P(BroxOpticalFlow, Regression)
     cv::Mat frame1 = readImageType("opticalflow/frame1.png", CV_32FC1);
     ASSERT_FALSE(frame1.empty());
 
-    cv::gpu::BroxOpticalFlow brox(0.197f /*alpha*/, 50.0f /*gamma*/, 0.8f /*scale_factor*/,
+    cv::cuda::BroxOpticalFlow brox(0.197f /*alpha*/, 50.0f /*gamma*/, 0.8f /*scale_factor*/,
                                   10 /*inner_iterations*/, 77 /*outer_iterations*/, 10 /*solver_iterations*/);
 
-    cv::gpu::GpuMat u;
-    cv::gpu::GpuMat v;
+    cv::cuda::GpuMat u;
+    cv::cuda::GpuMat v;
     brox(loadMat(frame0), loadMat(frame1), u, v);
 
     std::string fname(cvtest::TS::ptr()->get_data_path());
@@ -134,11 +134,11 @@ GPU_TEST_P(BroxOpticalFlow, OpticalFlowNan)
     cv::resize(frame0, r_frame0, cv::Size(1380,1000));
     cv::resize(frame1, r_frame1, cv::Size(1380,1000));
 
-    cv::gpu::BroxOpticalFlow brox(0.197f /*alpha*/, 50.0f /*gamma*/, 0.8f /*scale_factor*/,
+    cv::cuda::BroxOpticalFlow brox(0.197f /*alpha*/, 50.0f /*gamma*/, 0.8f /*scale_factor*/,
                                   5 /*inner_iterations*/, 150 /*outer_iterations*/, 10 /*solver_iterations*/);
 
-    cv::gpu::GpuMat u;
-    cv::gpu::GpuMat v;
+    cv::cuda::GpuMat u;
+    cv::cuda::GpuMat v;
     brox(loadMat(r_frame0), loadMat(r_frame1), u, v);
 
     cv::Mat h_u, h_v;
@@ -159,9 +159,9 @@ namespace
     IMPLEMENT_PARAM_CLASS(UseGray, bool)
 }
 
-PARAM_TEST_CASE(PyrLKOpticalFlow, cv::gpu::DeviceInfo, UseGray)
+PARAM_TEST_CASE(PyrLKOpticalFlow, cv::cuda::DeviceInfo, UseGray)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     bool useGray;
 
     virtual void SetUp()
@@ -169,7 +169,7 @@ PARAM_TEST_CASE(PyrLKOpticalFlow, cv::gpu::DeviceInfo, UseGray)
         devInfo = GET_PARAM(0);
         useGray = GET_PARAM(1);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -190,14 +190,14 @@ GPU_TEST_P(PyrLKOpticalFlow, Sparse)
     std::vector<cv::Point2f> pts;
     cv::goodFeaturesToTrack(gray_frame, pts, 1000, 0.01, 0.0);
 
-    cv::gpu::GpuMat d_pts;
+    cv::cuda::GpuMat d_pts;
     cv::Mat pts_mat(1, (int) pts.size(), CV_32FC2, (void*) &pts[0]);
     d_pts.upload(pts_mat);
 
-    cv::gpu::PyrLKOpticalFlow pyrLK;
+    cv::cuda::PyrLKOpticalFlow pyrLK;
 
-    cv::gpu::GpuMat d_nextPts;
-    cv::gpu::GpuMat d_status;
+    cv::cuda::GpuMat d_nextPts;
+    cv::cuda::GpuMat d_status;
     pyrLK.sparse(loadMat(frame0), loadMat(frame1), d_pts, d_nextPts, d_status);
 
     std::vector<cv::Point2f> nextPts(d_nextPts.cols);
@@ -256,9 +256,9 @@ namespace
     IMPLEMENT_PARAM_CLASS(UseInitFlow, bool)
 }
 
-PARAM_TEST_CASE(FarnebackOpticalFlow, cv::gpu::DeviceInfo, PyrScale, PolyN, FarnebackOptFlowFlags, UseInitFlow)
+PARAM_TEST_CASE(FarnebackOpticalFlow, cv::cuda::DeviceInfo, PyrScale, PolyN, FarnebackOptFlowFlags, UseInitFlow)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     double pyrScale;
     int polyN;
     int flags;
@@ -272,7 +272,7 @@ PARAM_TEST_CASE(FarnebackOpticalFlow, cv::gpu::DeviceInfo, PyrScale, PolyN, Farn
         flags = GET_PARAM(3);
         useInitFlow = GET_PARAM(4);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -286,13 +286,13 @@ GPU_TEST_P(FarnebackOpticalFlow, Accuracy)
 
     double polySigma = polyN <= 5 ? 1.1 : 1.5;
 
-    cv::gpu::FarnebackOpticalFlow farn;
+    cv::cuda::FarnebackOpticalFlow farn;
     farn.pyrScale = pyrScale;
     farn.polyN = polyN;
     farn.polySigma = polySigma;
     farn.flags = flags;
 
-    cv::gpu::GpuMat d_flowx, d_flowy;
+    cv::cuda::GpuMat d_flowx, d_flowy;
     farn(loadMat(frame0), loadMat(frame1), d_flowx, d_flowy);
 
     cv::Mat flow;
@@ -326,9 +326,9 @@ INSTANTIATE_TEST_CASE_P(GPU_OptFlow, FarnebackOpticalFlow, testing::Combine(
 //////////////////////////////////////////////////////
 // OpticalFlowDual_TVL1
 
-PARAM_TEST_CASE(OpticalFlowDual_TVL1, cv::gpu::DeviceInfo, UseRoi)
+PARAM_TEST_CASE(OpticalFlowDual_TVL1, cv::cuda::DeviceInfo, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     bool useRoi;
 
     virtual void SetUp()
@@ -336,7 +336,7 @@ PARAM_TEST_CASE(OpticalFlowDual_TVL1, cv::gpu::DeviceInfo, UseRoi)
         devInfo = GET_PARAM(0);
         useRoi = GET_PARAM(1);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -348,9 +348,9 @@ GPU_TEST_P(OpticalFlowDual_TVL1, Accuracy)
     cv::Mat frame1 = readImage("opticalflow/rubberwhale2.png", cv::IMREAD_GRAYSCALE);
     ASSERT_FALSE(frame1.empty());
 
-    cv::gpu::OpticalFlowDual_TVL1_GPU d_alg;
-    cv::gpu::GpuMat d_flowx = createMat(frame0.size(), CV_32FC1, useRoi);
-    cv::gpu::GpuMat d_flowy = createMat(frame0.size(), CV_32FC1, useRoi);
+    cv::cuda::OpticalFlowDual_TVL1_GPU d_alg;
+    cv::cuda::GpuMat d_flowx = createMat(frame0.size(), CV_32FC1, useRoi);
+    cv::cuda::GpuMat d_flowy = createMat(frame0.size(), CV_32FC1, useRoi);
     d_alg(loadMat(frame0, useRoi), loadMat(frame1, useRoi), d_flowx, d_flowy);
 
     cv::Ptr<cv::DenseOpticalFlow> alg = cv::createOptFlow_DualTVL1();
@@ -394,14 +394,14 @@ namespace
     }
 }
 
-struct OpticalFlowBM : testing::TestWithParam<cv::gpu::DeviceInfo>
+struct OpticalFlowBM : testing::TestWithParam<cv::cuda::DeviceInfo>
 {
 };
 
 GPU_TEST_P(OpticalFlowBM, Accuracy)
 {
-    cv::gpu::DeviceInfo devInfo = GetParam();
-    cv::gpu::setDevice(devInfo.deviceID());
+    cv::cuda::DeviceInfo devInfo = GetParam();
+    cv::cuda::setDevice(devInfo.deviceID());
 
     cv::Mat frame0 = readImage("opticalflow/rubberwhale1.png", cv::IMREAD_GRAYSCALE);
     ASSERT_FALSE(frame0.empty());
@@ -413,8 +413,8 @@ GPU_TEST_P(OpticalFlowBM, Accuracy)
     cv::Size shift_size(1, 1);
     cv::Size max_range(16, 16);
 
-    cv::gpu::GpuMat d_velx, d_vely, buf;
-    cv::gpu::calcOpticalFlowBM(loadMat(frame0), loadMat(frame1),
+    cv::cuda::GpuMat d_velx, d_vely, buf;
+    cv::cuda::calcOpticalFlowBM(loadMat(frame0), loadMat(frame1),
                                block_size, shift_size, max_range, false,
                                d_velx, d_vely, buf);
 
@@ -497,7 +497,7 @@ namespace
     }
 }
 
-struct FastOpticalFlowBM : testing::TestWithParam<cv::gpu::DeviceInfo>
+struct FastOpticalFlowBM : testing::TestWithParam<cv::cuda::DeviceInfo>
 {
 };
 
@@ -508,8 +508,8 @@ GPU_TEST_P(FastOpticalFlowBM, Accuracy)
     int search_window = 15;
     int block_window = 5;
 
-    cv::gpu::DeviceInfo devInfo = GetParam();
-    cv::gpu::setDevice(devInfo.deviceID());
+    cv::cuda::DeviceInfo devInfo = GetParam();
+    cv::cuda::setDevice(devInfo.deviceID());
 
     cv::Mat frame0 = readImage("opticalflow/rubberwhale1.png", cv::IMREAD_GRAYSCALE);
     ASSERT_FALSE(frame0.empty());
@@ -524,9 +524,9 @@ GPU_TEST_P(FastOpticalFlowBM, Accuracy)
     cv::resize(frame0, frame0_small, smallSize);
     cv::resize(frame1, frame1_small, smallSize);
 
-    cv::gpu::GpuMat d_flowx;
-    cv::gpu::GpuMat d_flowy;
-    cv::gpu::FastOpticalFlowBM fastBM;
+    cv::cuda::GpuMat d_flowx;
+    cv::cuda::GpuMat d_flowy;
+    cv::cuda::FastOpticalFlowBM fastBM;
 
     fastBM(loadMat(frame0_small), loadMat(frame1_small), d_flowx, d_flowy, search_window, block_window);
 
diff --git a/modules/gpustereo/include/opencv2/gpustereo.hpp b/modules/gpustereo/include/opencv2/gpustereo.hpp
index 250e89b85c..23e43b176c 100644
--- a/modules/gpustereo/include/opencv2/gpustereo.hpp
+++ b/modules/gpustereo/include/opencv2/gpustereo.hpp
@@ -50,7 +50,7 @@
 #include "opencv2/core/gpu.hpp"
 #include "opencv2/calib3d.hpp"
 
-namespace cv { namespace gpu {
+namespace cv { namespace cuda {
 
 /////////////////////////////////////////
 // StereoBM
@@ -63,7 +63,7 @@ public:
     virtual void compute(InputArray left, InputArray right, OutputArray disparity, Stream& stream) = 0;
 };
 
-CV_EXPORTS Ptr<gpu::StereoBM> createStereoBM(int numDisparities = 64, int blockSize = 19);
+CV_EXPORTS Ptr<cuda::StereoBM> createStereoBM(int numDisparities = 64, int blockSize = 19);
 
 /////////////////////////////////////////
 // StereoBeliefPropagation
@@ -110,7 +110,7 @@ public:
     static void estimateRecommendedParams(int width, int height, int& ndisp, int& iters, int& levels);
 };
 
-CV_EXPORTS Ptr<gpu::StereoBeliefPropagation>
+CV_EXPORTS Ptr<cuda::StereoBeliefPropagation>
     createStereoBeliefPropagation(int ndisp = 64, int iters = 5, int levels = 5, int msg_type = CV_32F);
 
 /////////////////////////////////////////
@@ -119,7 +119,7 @@ CV_EXPORTS Ptr<gpu::StereoBeliefPropagation>
 //! "A Constant-Space Belief Propagation Algorithm for Stereo Matching"
 //! Qingxiong Yang, Liang Wang, Narendra Ahuja
 //! http://vision.ai.uiuc.edu/~qyang6/
-class CV_EXPORTS StereoConstantSpaceBP : public gpu::StereoBeliefPropagation
+class CV_EXPORTS StereoConstantSpaceBP : public cuda::StereoBeliefPropagation
 {
 public:
     //! number of active disparity on the first level
@@ -132,7 +132,7 @@ public:
     static void estimateRecommendedParams(int width, int height, int& ndisp, int& iters, int& levels, int& nr_plane);
 };
 
-CV_EXPORTS Ptr<gpu::StereoConstantSpaceBP>
+CV_EXPORTS Ptr<cuda::StereoConstantSpaceBP>
     createStereoConstantSpaceBP(int ndisp = 128, int iters = 8, int levels = 4, int nr_plane = 4, int msg_type = CV_32F);
 
 /////////////////////////////////////////
@@ -170,7 +170,7 @@ public:
     virtual void setSigmaRange(double sigma_range) = 0;
 };
 
-CV_EXPORTS Ptr<gpu::DisparityBilateralFilter>
+CV_EXPORTS Ptr<cuda::DisparityBilateralFilter>
     createDisparityBilateralFilter(int ndisp = 64, int radius = 3, int iters = 1);
 
 /////////////////////////////////////////
@@ -188,6 +188,6 @@ CV_EXPORTS void reprojectImageTo3D(InputArray disp, OutputArray xyzw, InputArray
 //! Output disparity has CV_8UC4 type in BGRA format (alpha = 255).
 CV_EXPORTS void drawColorDisp(InputArray src_disp, OutputArray dst_disp, int ndisp, Stream& stream = Stream::Null());
 
-}} // namespace cv { namespace gpu {
+}} // namespace cv { namespace cuda {
 
 #endif /* __OPENCV_GPUSTEREO_HPP__ */
diff --git a/modules/gpustereo/perf/perf_stereo.cpp b/modules/gpustereo/perf/perf_stereo.cpp
index 476a591a1c..632aae79d7 100644
--- a/modules/gpustereo/perf/perf_stereo.cpp
+++ b/modules/gpustereo/perf/perf_stereo.cpp
@@ -67,11 +67,11 @@ PERF_TEST_P(ImagePair, StereoBM,
 
     if (PERF_RUN_GPU())
     {
-        cv::Ptr<cv::StereoBM> d_bm = cv::gpu::createStereoBM(ndisp);
+        cv::Ptr<cv::StereoBM> d_bm = cv::cuda::createStereoBM(ndisp);
 
-        const cv::gpu::GpuMat d_imgLeft(imgLeft);
-        const cv::gpu::GpuMat d_imgRight(imgRight);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_imgLeft(imgLeft);
+        const cv::cuda::GpuMat d_imgRight(imgRight);
+        cv::cuda::GpuMat dst;
 
         TEST_CYCLE() d_bm->compute(d_imgLeft, d_imgRight, dst);
 
@@ -107,11 +107,11 @@ PERF_TEST_P(ImagePair, StereoBeliefPropagation,
 
     if (PERF_RUN_GPU())
     {
-        cv::Ptr<cv::gpu::StereoBeliefPropagation> d_bp = cv::gpu::createStereoBeliefPropagation(ndisp);
+        cv::Ptr<cv::cuda::StereoBeliefPropagation> d_bp = cv::cuda::createStereoBeliefPropagation(ndisp);
 
-        const cv::gpu::GpuMat d_imgLeft(imgLeft);
-        const cv::gpu::GpuMat d_imgRight(imgRight);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_imgLeft(imgLeft);
+        const cv::cuda::GpuMat d_imgRight(imgRight);
+        cv::cuda::GpuMat dst;
 
         TEST_CYCLE() d_bp->compute(d_imgLeft, d_imgRight, dst);
 
@@ -141,11 +141,11 @@ PERF_TEST_P(ImagePair, StereoConstantSpaceBP,
 
     if (PERF_RUN_GPU())
     {
-        cv::Ptr<cv::gpu::StereoConstantSpaceBP> d_csbp = cv::gpu::createStereoConstantSpaceBP(ndisp);
+        cv::Ptr<cv::cuda::StereoConstantSpaceBP> d_csbp = cv::cuda::createStereoConstantSpaceBP(ndisp);
 
-        const cv::gpu::GpuMat d_imgLeft(imgLeft);
-        const cv::gpu::GpuMat d_imgRight(imgRight);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_imgLeft(imgLeft);
+        const cv::cuda::GpuMat d_imgRight(imgRight);
+        cv::cuda::GpuMat dst;
 
         TEST_CYCLE() d_csbp->compute(d_imgLeft, d_imgRight, dst);
 
@@ -173,11 +173,11 @@ PERF_TEST_P(ImagePair, DisparityBilateralFilter,
 
     if (PERF_RUN_GPU())
     {
-        cv::Ptr<cv::gpu::DisparityBilateralFilter> d_filter = cv::gpu::createDisparityBilateralFilter(ndisp);
+        cv::Ptr<cv::cuda::DisparityBilateralFilter> d_filter = cv::cuda::createDisparityBilateralFilter(ndisp);
 
-        const cv::gpu::GpuMat d_img(img);
-        const cv::gpu::GpuMat d_disp(disp);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_img(img);
+        const cv::cuda::GpuMat d_disp(disp);
+        cv::cuda::GpuMat dst;
 
         TEST_CYCLE() d_filter->apply(d_disp, d_img, dst);
 
@@ -207,10 +207,10 @@ PERF_TEST_P(Sz_Depth, ReprojectImageTo3D,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::reprojectImageTo3D(d_src, dst, Q);
+        TEST_CYCLE() cv::cuda::reprojectImageTo3D(d_src, dst, Q);
 
         GPU_SANITY_CHECK(dst);
     }
@@ -239,10 +239,10 @@ PERF_TEST_P(Sz_Depth, DrawColorDisp,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::drawColorDisp(d_src, dst, 255);
+        TEST_CYCLE() cv::cuda::drawColorDisp(d_src, dst, 255);
 
         GPU_SANITY_CHECK(dst);
     }
diff --git a/modules/gpustereo/src/cuda/disparity_bilateral_filter.cu b/modules/gpustereo/src/cuda/disparity_bilateral_filter.cu
index cfea880ecd..ce9239479c 100644
--- a/modules/gpustereo/src/cuda/disparity_bilateral_filter.cu
+++ b/modules/gpustereo/src/cuda/disparity_bilateral_filter.cu
@@ -45,7 +45,7 @@
 #include "opencv2/core/cuda/common.hpp"
 #include "opencv2/core/cuda/limits.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace disp_bilateral_filter
     {
@@ -218,6 +218,6 @@ namespace cv { namespace gpu { namespace cudev
         template void disp_bilateral_filter<uchar>(PtrStepSz<uchar> disp, PtrStepSzb img, int channels, int iters, cudaStream_t stream);
         template void disp_bilateral_filter<short>(PtrStepSz<short> disp, PtrStepSzb img, int channels, int iters, cudaStream_t stream);
     } // namespace bilateral_filter
-}}} // namespace cv { namespace gpu { namespace cudev
+}}} // namespace cv { namespace cuda { namespace cudev
 
 #endif /* CUDA_DISABLER */
diff --git a/modules/gpustereo/src/cuda/stereobm.cu b/modules/gpustereo/src/cuda/stereobm.cu
index 920616b72e..e5afa7963d 100644
--- a/modules/gpustereo/src/cuda/stereobm.cu
+++ b/modules/gpustereo/src/cuda/stereobm.cu
@@ -44,7 +44,7 @@
 
 #include "opencv2/core/cuda/common.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace stereobm
     {
@@ -534,7 +534,7 @@ namespace cv { namespace gpu { namespace cudev
             cudaSafeCall( cudaUnbindTexture (texForTF) );
         }
     } // namespace stereobm
-}}} // namespace cv { namespace gpu { namespace cudev
+}}} // namespace cv { namespace cuda { namespace cudev
 
 
 #endif /* CUDA_DISABLER */
diff --git a/modules/gpustereo/src/cuda/stereobp.cu b/modules/gpustereo/src/cuda/stereobp.cu
index ff437edc27..13121bb178 100644
--- a/modules/gpustereo/src/cuda/stereobp.cu
+++ b/modules/gpustereo/src/cuda/stereobp.cu
@@ -46,7 +46,7 @@
 #include "opencv2/core/cuda/saturate_cast.hpp"
 #include "opencv2/core/cuda/limits.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace stereobp
     {
@@ -533,6 +533,6 @@ namespace cv { namespace gpu { namespace cudev
         template void output_gpu<short>(const PtrStepSzb& u, const PtrStepSzb& d, const PtrStepSzb& l, const PtrStepSzb& r, const PtrStepSzb& data, const PtrStepSz<short>& disp, cudaStream_t stream);
         template void output_gpu<float>(const PtrStepSzb& u, const PtrStepSzb& d, const PtrStepSzb& l, const PtrStepSzb& r, const PtrStepSzb& data, const PtrStepSz<short>& disp, cudaStream_t stream);
     } // namespace stereobp
-}}} // namespace cv { namespace gpu { namespace cudev
+}}} // namespace cv { namespace cuda { namespace cudev
 
 #endif /* CUDA_DISABLER */
diff --git a/modules/gpustereo/src/cuda/stereocsbp.cu b/modules/gpustereo/src/cuda/stereocsbp.cu
index cb1340a86d..371aea5405 100644
--- a/modules/gpustereo/src/cuda/stereocsbp.cu
+++ b/modules/gpustereo/src/cuda/stereocsbp.cu
@@ -48,7 +48,7 @@
 #include "opencv2/core/cuda/reduce.hpp"
 #include "opencv2/core/cuda/functional.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace stereocsbp
     {
@@ -859,6 +859,6 @@ namespace cv { namespace gpu { namespace cudev
         template void compute_disp(const float* u, const float* d, const float* l, const float* r, const float* data_cost_selected, const float* disp_selected, size_t msg_step,
             const PtrStepSz<short>& disp, int nr_plane, cudaStream_t stream);
     } // namespace stereocsbp
-}}} // namespace cv { namespace gpu { namespace cudev {
+}}} // namespace cv { namespace cuda { namespace cudev {
 
 #endif /* CUDA_DISABLER */
diff --git a/modules/gpustereo/src/cuda/util.cu b/modules/gpustereo/src/cuda/util.cu
index 1945d2463a..a492c5b527 100644
--- a/modules/gpustereo/src/cuda/util.cu
+++ b/modules/gpustereo/src/cuda/util.cu
@@ -47,7 +47,7 @@
 #include "opencv2/core/cuda/functional.hpp"
 #include "opencv2/core/cuda/reduce.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     /////////////////////////////////// reprojectImageTo3D ///////////////////////////////////////////////
 
@@ -229,7 +229,7 @@ namespace cv { namespace gpu { namespace cudev
         if (stream == 0)
             cudaSafeCall( cudaDeviceSynchronize() );
     }
-}}} // namespace cv { namespace gpu { namespace cudev
+}}} // namespace cv { namespace cuda { namespace cudev
 
 
 #endif /* CUDA_DISABLER */
diff --git a/modules/gpustereo/src/disparity_bilateral_filter.cpp b/modules/gpustereo/src/disparity_bilateral_filter.cpp
index 689a9e76e3..8016f9af5c 100644
--- a/modules/gpustereo/src/disparity_bilateral_filter.cpp
+++ b/modules/gpustereo/src/disparity_bilateral_filter.cpp
@@ -43,15 +43,15 @@
 #include "precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
 
-Ptr<gpu::DisparityBilateralFilter> cv::gpu::createDisparityBilateralFilter(int, int, int) { throw_no_cuda(); return Ptr<gpu::DisparityBilateralFilter>(); }
+Ptr<cuda::DisparityBilateralFilter> cv::cuda::createDisparityBilateralFilter(int, int, int) { throw_no_cuda(); return Ptr<cuda::DisparityBilateralFilter>(); }
 
 #else /* !defined (HAVE_CUDA) */
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace disp_bilateral_filter
     {
@@ -64,7 +64,7 @@ namespace cv { namespace gpu { namespace cudev
 
 namespace
 {
-    class DispBilateralFilterImpl : public gpu::DisparityBilateralFilter
+    class DispBilateralFilterImpl : public cuda::DisparityBilateralFilter
     {
     public:
         DispBilateralFilterImpl(int ndisp, int radius, int iters);
@@ -160,7 +160,7 @@ namespace
                                         const GpuMat& disp, const GpuMat& img,
                                         OutputArray _dst, Stream& stream)
     {
-        using namespace cv::gpu::cudev::disp_bilateral_filter;
+        using namespace cv::cuda::cudev::disp_bilateral_filter;
 
         const short edge_disc = std::max<short>(short(1), short(ndisp * edge_threshold + 0.5));
         const short max_disc = short(ndisp * max_disc_threshold + 0.5);
@@ -198,7 +198,7 @@ namespace
     }
 }
 
-Ptr<gpu::DisparityBilateralFilter> cv::gpu::createDisparityBilateralFilter(int ndisp, int radius, int iters)
+Ptr<cuda::DisparityBilateralFilter> cv::cuda::createDisparityBilateralFilter(int ndisp, int radius, int iters)
 {
     return new DispBilateralFilterImpl(ndisp, radius, iters);
 }
diff --git a/modules/gpustereo/src/stereobm.cpp b/modules/gpustereo/src/stereobm.cpp
index 30773a6159..fe52b06511 100644
--- a/modules/gpustereo/src/stereobm.cpp
+++ b/modules/gpustereo/src/stereobm.cpp
@@ -43,15 +43,15 @@
 #include "precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
 
-Ptr<gpu::StereoBM> cv::gpu::createStereoBM(int, int) { throw_no_cuda(); return Ptr<gpu::StereoBM>(); }
+Ptr<cuda::StereoBM> cv::cuda::createStereoBM(int, int) { throw_no_cuda(); return Ptr<cuda::StereoBM>(); }
 
 #else /* !defined (HAVE_CUDA) */
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace stereobm
     {
@@ -63,7 +63,7 @@ namespace cv { namespace gpu { namespace cudev
 
 namespace
 {
-    class StereoBMImpl : public gpu::StereoBM
+    class StereoBMImpl : public cuda::StereoBM
     {
     public:
         StereoBMImpl(int numDisparities, int blockSize);
@@ -135,7 +135,7 @@ namespace
 
     void StereoBMImpl::compute(InputArray _left, InputArray _right, OutputArray _disparity, Stream& _stream)
     {
-        using namespace ::cv::gpu::cudev::stereobm;
+        using namespace ::cv::cuda::cudev::stereobm;
 
         const int max_supported_ndisp = 1 << (sizeof(unsigned char) * 8);
         CV_Assert( 0 < ndisp_ && ndisp_ <= max_supported_ndisp );
@@ -153,15 +153,15 @@ namespace
 
         cudaStream_t stream = StreamAccessor::getStream(_stream);
 
-        gpu::ensureSizeIsEnough(left.size(), CV_32SC1, minSSD_);
+        cuda::ensureSizeIsEnough(left.size(), CV_32SC1, minSSD_);
 
         PtrStepSzb le_for_bm =  left;
         PtrStepSzb ri_for_bm = right;
 
         if (preset_ == cv::StereoBM::PREFILTER_XSOBEL)
         {
-            gpu::ensureSizeIsEnough(left.size(), left.type(), leBuf_);
-            gpu::ensureSizeIsEnough(right.size(), right.type(), riBuf_);
+            cuda::ensureSizeIsEnough(left.size(), left.type(), leBuf_);
+            cuda::ensureSizeIsEnough(right.size(), right.type(), riBuf_);
 
             prefilter_xsobel( left, leBuf_, preFilterCap_, stream);
             prefilter_xsobel(right, riBuf_, preFilterCap_, stream);
@@ -177,7 +177,7 @@ namespace
     }
 }
 
-Ptr<gpu::StereoBM> cv::gpu::createStereoBM(int numDisparities, int blockSize)
+Ptr<cuda::StereoBM> cv::cuda::createStereoBM(int numDisparities, int blockSize)
 {
     return new StereoBMImpl(numDisparities, blockSize);
 }
diff --git a/modules/gpustereo/src/stereobp.cpp b/modules/gpustereo/src/stereobp.cpp
index ac3bcfe339..fb251bb7a7 100644
--- a/modules/gpustereo/src/stereobp.cpp
+++ b/modules/gpustereo/src/stereobp.cpp
@@ -43,17 +43,17 @@
 #include "precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
 
-void cv::gpu::StereoBeliefPropagation::estimateRecommendedParams(int, int, int&, int&, int&) { throw_no_cuda(); }
+void cv::cuda::StereoBeliefPropagation::estimateRecommendedParams(int, int, int&, int&, int&) { throw_no_cuda(); }
 
-Ptr<gpu::StereoBeliefPropagation> cv::gpu::createStereoBeliefPropagation(int, int, int, int) { throw_no_cuda(); return Ptr<gpu::StereoBeliefPropagation>(); }
+Ptr<cuda::StereoBeliefPropagation> cv::cuda::createStereoBeliefPropagation(int, int, int, int) { throw_no_cuda(); return Ptr<cuda::StereoBeliefPropagation>(); }
 
 #else /* !defined (HAVE_CUDA) */
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace stereobp
     {
@@ -75,7 +75,7 @@ namespace cv { namespace gpu { namespace cudev
 
 namespace
 {
-    class StereoBPImpl : public gpu::StereoBeliefPropagation
+    class StereoBPImpl : public cuda::StereoBeliefPropagation
     {
     public:
         StereoBPImpl(int ndisp, int iters, int levels, int msg_type);
@@ -164,7 +164,7 @@ namespace
 
     void StereoBPImpl::compute(InputArray _left, InputArray _right, OutputArray disparity, Stream& stream)
     {
-        using namespace cv::gpu::cudev::stereobp;
+        using namespace cv::cuda::cudev::stereobp;
 
         typedef void (*comp_data_t)(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& data, cudaStream_t stream);
         static const comp_data_t comp_data_callers[2][5] =
@@ -233,7 +233,7 @@ namespace
 
     void StereoBPImpl::init(Stream& stream)
     {
-        using namespace cv::gpu::cudev::stereobp;
+        using namespace cv::cuda::cudev::stereobp;
 
         u_.create(rows_ * ndisp_, cols_, msg_type_);
         d_.create(rows_ * ndisp_, cols_, msg_type_);
@@ -281,7 +281,7 @@ namespace
 
     void StereoBPImpl::calcBP(OutputArray disp, Stream& _stream)
     {
-        using namespace cv::gpu::cudev::stereobp;
+        using namespace cv::cuda::cudev::stereobp;
 
         typedef void (*data_step_down_t)(int dst_cols, int dst_rows, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream);
         static const data_step_down_t data_step_down_callers[2] =
@@ -359,12 +359,12 @@ namespace
     }
 }
 
-Ptr<gpu::StereoBeliefPropagation> cv::gpu::createStereoBeliefPropagation(int ndisp, int iters, int levels, int msg_type)
+Ptr<cuda::StereoBeliefPropagation> cv::cuda::createStereoBeliefPropagation(int ndisp, int iters, int levels, int msg_type)
 {
     return new StereoBPImpl(ndisp, iters, levels, msg_type);
 }
 
-void cv::gpu::StereoBeliefPropagation::estimateRecommendedParams(int width, int height, int& ndisp, int& iters, int& levels)
+void cv::cuda::StereoBeliefPropagation::estimateRecommendedParams(int width, int height, int& ndisp, int& iters, int& levels)
 {
     ndisp = width / 4;
     if ((ndisp & 1) != 0)
diff --git a/modules/gpustereo/src/stereocsbp.cpp b/modules/gpustereo/src/stereocsbp.cpp
index 9afd8d14e2..925002d8e2 100644
--- a/modules/gpustereo/src/stereocsbp.cpp
+++ b/modules/gpustereo/src/stereocsbp.cpp
@@ -43,17 +43,17 @@
 #include "precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
 
-void cv::gpu::StereoConstantSpaceBP::estimateRecommendedParams(int, int, int&, int&, int&, int&) { throw_no_cuda(); }
+void cv::cuda::StereoConstantSpaceBP::estimateRecommendedParams(int, int, int&, int&, int&, int&) { throw_no_cuda(); }
 
-Ptr<gpu::StereoConstantSpaceBP> cv::gpu::createStereoConstantSpaceBP(int, int, int, int, int) { throw_no_cuda(); return Ptr<gpu::StereoConstantSpaceBP>(); }
+Ptr<cuda::StereoConstantSpaceBP> cv::cuda::createStereoConstantSpaceBP(int, int, int, int, int) { throw_no_cuda(); return Ptr<cuda::StereoConstantSpaceBP>(); }
 
 #else /* !defined (HAVE_CUDA) */
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace stereocsbp
     {
@@ -87,7 +87,7 @@ namespace cv { namespace gpu { namespace cudev
 
 namespace
 {
-    class StereoCSBPImpl : public gpu::StereoConstantSpaceBP
+    class StereoCSBPImpl : public cuda::StereoConstantSpaceBP
     {
     public:
         StereoCSBPImpl(int ndisp, int iters, int levels, int nr_plane, int msg_type);
@@ -179,7 +179,7 @@ namespace
 
     void StereoCSBPImpl::compute(InputArray _left, InputArray _right, OutputArray disp, Stream& _stream)
     {
-        using namespace cv::gpu::cudev::stereocsbp;
+        using namespace cv::cuda::cudev::stereocsbp;
 
         CV_Assert( msg_type_ == CV_32F || msg_type_ == CV_16S );
         CV_Assert( 0 < ndisp_ && 0 < iters_ && 0 < levels_ && 0 < nr_plane_ && levels_ <= 8 );
@@ -364,12 +364,12 @@ namespace
     }
 }
 
-Ptr<gpu::StereoConstantSpaceBP> cv::gpu::createStereoConstantSpaceBP(int ndisp, int iters, int levels, int nr_plane, int msg_type)
+Ptr<cuda::StereoConstantSpaceBP> cv::cuda::createStereoConstantSpaceBP(int ndisp, int iters, int levels, int nr_plane, int msg_type)
 {
     return new StereoCSBPImpl(ndisp, iters, levels, nr_plane, msg_type);
 }
 
-void cv::gpu::StereoConstantSpaceBP::estimateRecommendedParams(int width, int height, int& ndisp, int& iters, int& levels, int& nr_plane)
+void cv::cuda::StereoConstantSpaceBP::estimateRecommendedParams(int width, int height, int& ndisp, int& iters, int& levels, int& nr_plane)
 {
     ndisp = (int) ((float) width / 3.14f);
     if ((ndisp & 1) != 0)
diff --git a/modules/gpustereo/src/util.cpp b/modules/gpustereo/src/util.cpp
index e58b5a18e0..87543f9bcf 100644
--- a/modules/gpustereo/src/util.cpp
+++ b/modules/gpustereo/src/util.cpp
@@ -43,27 +43,27 @@
 #include "precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 #if !defined HAVE_CUDA || defined(CUDA_DISABLER)
 
-void cv::gpu::reprojectImageTo3D(InputArray, OutputArray, InputArray, int, Stream&) { throw_no_cuda(); }
-void cv::gpu::drawColorDisp(InputArray, OutputArray, int, Stream&) { throw_no_cuda(); }
+void cv::cuda::reprojectImageTo3D(InputArray, OutputArray, InputArray, int, Stream&) { throw_no_cuda(); }
+void cv::cuda::drawColorDisp(InputArray, OutputArray, int, Stream&) { throw_no_cuda(); }
 
 #else
 
 ////////////////////////////////////////////////////////////////////////
 // reprojectImageTo3D
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     template <typename T, typename D>
     void reprojectImageTo3D_gpu(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream);
 }}}
 
-void cv::gpu::reprojectImageTo3D(InputArray _disp, OutputArray _xyz, InputArray _Q, int dst_cn, Stream& stream)
+void cv::cuda::reprojectImageTo3D(InputArray _disp, OutputArray _xyz, InputArray _Q, int dst_cn, Stream& stream)
 {
-    using namespace cv::gpu::cudev;
+    using namespace cv::cuda::cudev;
 
     typedef void (*func_t)(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream);
     static const func_t funcs[2][4] =
@@ -88,7 +88,7 @@ void cv::gpu::reprojectImageTo3D(InputArray _disp, OutputArray _xyz, InputArray
 ////////////////////////////////////////////////////////////////////////
 // drawColorDisp
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     void drawColorDisp_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int ndisp, const cudaStream_t& stream);
     void drawColorDisp_gpu(const PtrStepSz<short>& src, const PtrStepSzb& dst, int ndisp, const cudaStream_t& stream);
@@ -99,7 +99,7 @@ namespace
     template <typename T>
     void drawColorDisp_caller(const GpuMat& src, OutputArray _dst, int ndisp, const cudaStream_t& stream)
     {
-        using namespace ::cv::gpu::cudev;
+        using namespace ::cv::cuda::cudev;
 
         _dst.create(src.size(), CV_8UC4);
         GpuMat dst = _dst.getGpuMat();
@@ -108,7 +108,7 @@ namespace
     }
 }
 
-void cv::gpu::drawColorDisp(InputArray _src, OutputArray dst, int ndisp, Stream& stream)
+void cv::cuda::drawColorDisp(InputArray _src, OutputArray dst, int ndisp, Stream& stream)
 {
     typedef void (*drawColorDisp_caller_t)(const GpuMat& src, OutputArray dst, int ndisp, const cudaStream_t& stream);
     const drawColorDisp_caller_t drawColorDisp_callers[] = {drawColorDisp_caller<unsigned char>, 0, 0, drawColorDisp_caller<short>, 0, 0, 0, 0};
diff --git a/modules/gpustereo/test/test_stereo.cpp b/modules/gpustereo/test/test_stereo.cpp
index 9a3d94627e..b1398e881b 100644
--- a/modules/gpustereo/test/test_stereo.cpp
+++ b/modules/gpustereo/test/test_stereo.cpp
@@ -49,15 +49,15 @@ using namespace cvtest;
 //////////////////////////////////////////////////////////////////////////
 // StereoBM
 
-struct StereoBM : testing::TestWithParam<cv::gpu::DeviceInfo>
+struct StereoBM : testing::TestWithParam<cv::cuda::DeviceInfo>
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
 
     virtual void SetUp()
     {
         devInfo = GetParam();
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -71,8 +71,8 @@ GPU_TEST_P(StereoBM, Regression)
     ASSERT_FALSE(right_image.empty());
     ASSERT_FALSE(disp_gold.empty());
 
-    cv::Ptr<cv::StereoBM> bm = cv::gpu::createStereoBM(128, 19);
-    cv::gpu::GpuMat disp;
+    cv::Ptr<cv::StereoBM> bm = cv::cuda::createStereoBM(128, 19);
+    cv::cuda::GpuMat disp;
 
     bm->compute(loadMat(left_image), loadMat(right_image), disp);
 
@@ -84,15 +84,15 @@ INSTANTIATE_TEST_CASE_P(GPU_Stereo, StereoBM, ALL_DEVICES);
 //////////////////////////////////////////////////////////////////////////
 // StereoBeliefPropagation
 
-struct StereoBeliefPropagation : testing::TestWithParam<cv::gpu::DeviceInfo>
+struct StereoBeliefPropagation : testing::TestWithParam<cv::cuda::DeviceInfo>
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
 
     virtual void SetUp()
     {
         devInfo = GetParam();
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -106,13 +106,13 @@ GPU_TEST_P(StereoBeliefPropagation, Regression)
     ASSERT_FALSE(right_image.empty());
     ASSERT_FALSE(disp_gold.empty());
 
-    cv::Ptr<cv::gpu::StereoBeliefPropagation> bp = cv::gpu::createStereoBeliefPropagation(64, 8, 2, CV_16S);
+    cv::Ptr<cv::cuda::StereoBeliefPropagation> bp = cv::cuda::createStereoBeliefPropagation(64, 8, 2, CV_16S);
     bp->setMaxDataTerm(25.0);
     bp->setDataWeight(0.1);
     bp->setMaxDiscTerm(15.0);
     bp->setDiscSingleJump(1.0);
 
-    cv::gpu::GpuMat disp;
+    cv::cuda::GpuMat disp;
 
     bp->compute(loadMat(left_image), loadMat(right_image), disp);
 
@@ -127,15 +127,15 @@ INSTANTIATE_TEST_CASE_P(GPU_Stereo, StereoBeliefPropagation, ALL_DEVICES);
 //////////////////////////////////////////////////////////////////////////
 // StereoConstantSpaceBP
 
-struct StereoConstantSpaceBP : testing::TestWithParam<cv::gpu::DeviceInfo>
+struct StereoConstantSpaceBP : testing::TestWithParam<cv::cuda::DeviceInfo>
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
 
     virtual void SetUp()
     {
         devInfo = GetParam();
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -146,7 +146,7 @@ GPU_TEST_P(StereoConstantSpaceBP, Regression)
 
     cv::Mat disp_gold;
 
-    if (supportFeature(devInfo, cv::gpu::FEATURE_SET_COMPUTE_20))
+    if (supportFeature(devInfo, cv::cuda::FEATURE_SET_COMPUTE_20))
         disp_gold = readImage("csstereobp/aloe-disp.png", cv::IMREAD_GRAYSCALE);
     else
         disp_gold = readImage("csstereobp/aloe-disp_CC1X.png", cv::IMREAD_GRAYSCALE);
@@ -155,8 +155,8 @@ GPU_TEST_P(StereoConstantSpaceBP, Regression)
     ASSERT_FALSE(right_image.empty());
     ASSERT_FALSE(disp_gold.empty());
 
-    cv::Ptr<cv::gpu::StereoConstantSpaceBP> csbp = cv::gpu::createStereoConstantSpaceBP(128, 16, 4, 4);
-    cv::gpu::GpuMat disp;
+    cv::Ptr<cv::cuda::StereoConstantSpaceBP> csbp = cv::cuda::createStereoConstantSpaceBP(128, 16, 4, 4);
+    cv::cuda::GpuMat disp;
 
     csbp->compute(loadMat(left_image), loadMat(right_image), disp);
 
@@ -171,9 +171,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Stereo, StereoConstantSpaceBP, ALL_DEVICES);
 ////////////////////////////////////////////////////////////////////////////////
 // reprojectImageTo3D
 
-PARAM_TEST_CASE(ReprojectImageTo3D, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
+PARAM_TEST_CASE(ReprojectImageTo3D, cv::cuda::DeviceInfo, cv::Size, MatDepth, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int depth;
     bool useRoi;
@@ -185,7 +185,7 @@ PARAM_TEST_CASE(ReprojectImageTo3D, cv::gpu::DeviceInfo, cv::Size, MatDepth, Use
         depth = GET_PARAM(2);
         useRoi = GET_PARAM(3);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -194,8 +194,8 @@ GPU_TEST_P(ReprojectImageTo3D, Accuracy)
     cv::Mat disp = randomMat(size, depth, 5.0, 30.0);
     cv::Mat Q = randomMat(cv::Size(4, 4), CV_32FC1, 0.1, 1.0);
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::reprojectImageTo3D(loadMat(disp, useRoi), dst, Q, 3);
+    cv::cuda::GpuMat dst;
+    cv::cuda::reprojectImageTo3D(loadMat(disp, useRoi), dst, Q, 3);
 
     cv::Mat dst_gold;
     cv::reprojectImageTo3D(disp, dst_gold, Q, false);
diff --git a/modules/gpuwarping/include/opencv2/gpuwarping.hpp b/modules/gpuwarping/include/opencv2/gpuwarping.hpp
index ed17464ec4..80346ab42b 100644
--- a/modules/gpuwarping/include/opencv2/gpuwarping.hpp
+++ b/modules/gpuwarping/include/opencv2/gpuwarping.hpp
@@ -50,7 +50,7 @@
 #include "opencv2/core/gpu.hpp"
 #include "opencv2/imgproc.hpp"
 
-namespace cv { namespace gpu {
+namespace cv { namespace cuda {
 
 //! DST[x,y] = SRC[xmap[x,y],ymap[x,y]]
 //! supports only CV_32FC1 map type
@@ -108,6 +108,6 @@ public:
 
 CV_EXPORTS Ptr<ImagePyramid> createImagePyramid(InputArray img, int nLayers = -1, Stream& stream = Stream::Null());
 
-}} // namespace cv { namespace gpu {
+}} // namespace cv { namespace cuda {
 
 #endif /* __OPENCV_GPUWARPING_HPP__ */
diff --git a/modules/gpuwarping/perf/perf_warping.cpp b/modules/gpuwarping/perf/perf_warping.cpp
index 266475bb0c..1a07b6071a 100644
--- a/modules/gpuwarping/perf/perf_warping.cpp
+++ b/modules/gpuwarping/perf/perf_warping.cpp
@@ -119,12 +119,12 @@ PERF_TEST_P(Sz_Depth_Cn_Inter_Border_Mode, Remap,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        const cv::gpu::GpuMat d_xmap(xmap);
-        const cv::gpu::GpuMat d_ymap(ymap);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        const cv::cuda::GpuMat d_xmap(xmap);
+        const cv::cuda::GpuMat d_ymap(ymap);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::remap(d_src, dst, d_xmap, d_ymap, interpolation, borderMode);
+        TEST_CYCLE() cv::cuda::remap(d_src, dst, d_xmap, d_ymap, interpolation, borderMode);
 
         GPU_SANITY_CHECK(dst);
     }
@@ -165,10 +165,10 @@ PERF_TEST_P(Sz_Depth_Cn_Inter_Scale, Resize,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::resize(d_src, dst, cv::Size(), f, f, interpolation);
+        TEST_CYCLE() cv::cuda::resize(d_src, dst, cv::Size(), f, f, interpolation);
 
         GPU_SANITY_CHECK(dst, 1e-3, ERROR_RELATIVE);
     }
@@ -208,10 +208,10 @@ PERF_TEST_P(Sz_Depth_Cn_Scale, ResizeArea,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::resize(d_src, dst, cv::Size(), f, f, interpolation);
+        TEST_CYCLE() cv::cuda::resize(d_src, dst, cv::Size(), f, f, interpolation);
 
         GPU_SANITY_CHECK(dst);
     }
@@ -260,10 +260,10 @@ PERF_TEST_P(Sz_Depth_Cn_Inter_Border, WarpAffine,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::warpAffine(d_src, dst, M, size, interpolation, borderMode);
+        TEST_CYCLE() cv::cuda::warpAffine(d_src, dst, M, size, interpolation, borderMode);
 
         GPU_SANITY_CHECK(dst, 1);
     }
@@ -308,10 +308,10 @@ PERF_TEST_P(Sz_Depth_Cn_Inter_Border, WarpPerspective,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::warpPerspective(d_src, dst, M, size, interpolation, borderMode);
+        TEST_CYCLE() cv::cuda::warpPerspective(d_src, dst, M, size, interpolation, borderMode);
 
         GPU_SANITY_CHECK(dst, 1);
     }
@@ -339,10 +339,10 @@ PERF_TEST_P(Sz, BuildWarpPlaneMaps,
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat map_x;
-        cv::gpu::GpuMat map_y;
+        cv::cuda::GpuMat map_x;
+        cv::cuda::GpuMat map_y;
 
-        TEST_CYCLE() cv::gpu::buildWarpPlaneMaps(size, cv::Rect(0, 0, size.width, size.height), K, R, T, 1.0, map_x, map_y);
+        TEST_CYCLE() cv::cuda::buildWarpPlaneMaps(size, cv::Rect(0, 0, size.width, size.height), K, R, T, 1.0, map_x, map_y);
 
         GPU_SANITY_CHECK(map_x);
         GPU_SANITY_CHECK(map_y);
@@ -366,10 +366,10 @@ PERF_TEST_P(Sz, BuildWarpCylindricalMaps,
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat map_x;
-        cv::gpu::GpuMat map_y;
+        cv::cuda::GpuMat map_x;
+        cv::cuda::GpuMat map_y;
 
-        TEST_CYCLE() cv::gpu::buildWarpCylindricalMaps(size, cv::Rect(0, 0, size.width, size.height), K, R, 1.0, map_x, map_y);
+        TEST_CYCLE() cv::cuda::buildWarpCylindricalMaps(size, cv::Rect(0, 0, size.width, size.height), K, R, 1.0, map_x, map_y);
 
         GPU_SANITY_CHECK(map_x);
         GPU_SANITY_CHECK(map_y);
@@ -393,10 +393,10 @@ PERF_TEST_P(Sz, BuildWarpSphericalMaps,
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::GpuMat map_x;
-        cv::gpu::GpuMat map_y;
+        cv::cuda::GpuMat map_x;
+        cv::cuda::GpuMat map_y;
 
-        TEST_CYCLE() cv::gpu::buildWarpSphericalMaps(size, cv::Rect(0, 0, size.width, size.height), K, R, 1.0, map_x, map_y);
+        TEST_CYCLE() cv::cuda::buildWarpSphericalMaps(size, cv::Rect(0, 0, size.width, size.height), K, R, 1.0, map_x, map_y);
 
         GPU_SANITY_CHECK(map_x);
         GPU_SANITY_CHECK(map_y);
@@ -430,10 +430,10 @@ PERF_TEST_P(Sz_Depth_Cn_Inter, Rotate,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::rotate(d_src, dst, size, 30.0, 0, 0, interpolation);
+        TEST_CYCLE() cv::cuda::rotate(d_src, dst, size, 30.0, 0, 0, interpolation);
 
         GPU_SANITY_CHECK(dst, 1e-3, ERROR_RELATIVE);
     }
@@ -462,10 +462,10 @@ PERF_TEST_P(Sz_Depth_Cn, PyrDown,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::pyrDown(d_src, dst);
+        TEST_CYCLE() cv::cuda::pyrDown(d_src, dst);
 
         GPU_SANITY_CHECK(dst);
     }
@@ -498,10 +498,10 @@ PERF_TEST_P(Sz_Depth_Cn, PyrUp,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::pyrUp(d_src, dst);
+        TEST_CYCLE() cv::cuda::pyrUp(d_src, dst);
 
         GPU_SANITY_CHECK(dst);
     }
@@ -537,10 +537,10 @@ PERF_TEST_P(Sz_Depth_Cn, ImagePyramidGetLayer,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        cv::Ptr<cv::gpu::ImagePyramid> d_pyr = cv::gpu::createImagePyramid(d_src, nLayers);
+        cv::Ptr<cv::cuda::ImagePyramid> d_pyr = cv::cuda::createImagePyramid(d_src, nLayers);
 
         TEST_CYCLE() d_pyr->getLayer(dst, dstSize);
 
diff --git a/modules/gpuwarping/src/cuda/build_warp_maps.cu b/modules/gpuwarping/src/cuda/build_warp_maps.cu
index 6bd4e335bb..698c92a070 100644
--- a/modules/gpuwarping/src/cuda/build_warp_maps.cu
+++ b/modules/gpuwarping/src/cuda/build_warp_maps.cu
@@ -48,7 +48,7 @@
 #include "opencv2/core/cuda/saturate_cast.hpp"
 #include "opencv2/core/cuda/border_interpolate.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace imgproc
     {
@@ -215,7 +215,7 @@ namespace cv { namespace gpu { namespace cudev
                 cudaSafeCall(cudaDeviceSynchronize());
         }
     } // namespace imgproc
-}}} // namespace cv { namespace gpu { namespace cudev {
+}}} // namespace cv { namespace cuda { namespace cudev {
 
 
 #endif /* CUDA_DISABLER */
diff --git a/modules/gpuwarping/src/cuda/pyr_down.cu b/modules/gpuwarping/src/cuda/pyr_down.cu
index 904f549bad..57ee868410 100644
--- a/modules/gpuwarping/src/cuda/pyr_down.cu
+++ b/modules/gpuwarping/src/cuda/pyr_down.cu
@@ -48,7 +48,7 @@
 #include "opencv2/core/cuda/vec_math.hpp"
 #include "opencv2/core/cuda/saturate_cast.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace imgproc
     {
@@ -222,7 +222,7 @@ namespace cv { namespace gpu { namespace cudev
         template void pyrDown_gpu<float3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
         template void pyrDown_gpu<float4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
     } // namespace imgproc
-}}} // namespace cv { namespace gpu { namespace cudev
+}}} // namespace cv { namespace cuda { namespace cudev
 
 
 #endif /* CUDA_DISABLER */
diff --git a/modules/gpuwarping/src/cuda/pyr_up.cu b/modules/gpuwarping/src/cuda/pyr_up.cu
index 36a72274cf..f6a9daba85 100644
--- a/modules/gpuwarping/src/cuda/pyr_up.cu
+++ b/modules/gpuwarping/src/cuda/pyr_up.cu
@@ -48,7 +48,7 @@
 #include "opencv2/core/cuda/vec_math.hpp"
 #include "opencv2/core/cuda/saturate_cast.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace imgproc
     {
@@ -191,6 +191,6 @@ namespace cv { namespace gpu { namespace cudev
         template void pyrUp_gpu<float3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
         template void pyrUp_gpu<float4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
     } // namespace imgproc
-}}} // namespace cv { namespace gpu { namespace cudev
+}}} // namespace cv { namespace cuda { namespace cudev
 
 #endif /* CUDA_DISABLER */
diff --git a/modules/gpuwarping/src/cuda/remap.cu b/modules/gpuwarping/src/cuda/remap.cu
index c4ea317fbd..87c17e0301 100644
--- a/modules/gpuwarping/src/cuda/remap.cu
+++ b/modules/gpuwarping/src/cuda/remap.cu
@@ -49,7 +49,7 @@
 #include "opencv2/core/cuda/saturate_cast.hpp"
 #include "opencv2/core/cuda/filters.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace imgproc
     {
@@ -268,7 +268,7 @@ namespace cv { namespace gpu { namespace cudev
         template void remap_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
         template void remap_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
     } // namespace imgproc
-}}} // namespace cv { namespace gpu { namespace cudev
+}}} // namespace cv { namespace cuda { namespace cudev
 
 
 #endif /* CUDA_DISABLER */
diff --git a/modules/gpuwarping/src/cuda/resize.cu b/modules/gpuwarping/src/cuda/resize.cu
index 037718980d..3f88a35e41 100644
--- a/modules/gpuwarping/src/cuda/resize.cu
+++ b/modules/gpuwarping/src/cuda/resize.cu
@@ -50,7 +50,7 @@
 #include "opencv2/core/cuda/saturate_cast.hpp"
 #include "opencv2/core/cuda/filters.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     // kernels
 
diff --git a/modules/gpuwarping/src/cuda/warp.cu b/modules/gpuwarping/src/cuda/warp.cu
index 83db79ebf1..8432d967f2 100644
--- a/modules/gpuwarping/src/cuda/warp.cu
+++ b/modules/gpuwarping/src/cuda/warp.cu
@@ -49,7 +49,7 @@
 #include "opencv2/core/cuda/saturate_cast.hpp"
 #include "opencv2/core/cuda/filters.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace imgproc
     {
@@ -383,7 +383,7 @@ namespace cv { namespace gpu { namespace cudev
         template void warpPerspective_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
         template void warpPerspective_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
     } // namespace imgproc
-}}} // namespace cv { namespace gpu { namespace cudev
+}}} // namespace cv { namespace cuda { namespace cudev
 
 
 #endif /* CUDA_DISABLER */
diff --git a/modules/gpuwarping/src/pyramids.cpp b/modules/gpuwarping/src/pyramids.cpp
index 0e8445df2c..54505d8573 100644
--- a/modules/gpuwarping/src/pyramids.cpp
+++ b/modules/gpuwarping/src/pyramids.cpp
@@ -43,21 +43,21 @@
 #include "precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 #if !defined HAVE_CUDA || defined(CUDA_DISABLER)
 
-void cv::gpu::pyrDown(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
-void cv::gpu::pyrUp(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
+void cv::cuda::pyrDown(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
+void cv::cuda::pyrUp(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
 
-Ptr<ImagePyramid> cv::gpu::createImagePyramid(InputArray, int, Stream&) { throw_no_cuda(); return Ptr<ImagePyramid>(); }
+Ptr<ImagePyramid> cv::cuda::createImagePyramid(InputArray, int, Stream&) { throw_no_cuda(); return Ptr<ImagePyramid>(); }
 
 #else // HAVE_CUDA
 
 //////////////////////////////////////////////////////////////////////////////
 // pyrDown
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace imgproc
     {
@@ -65,9 +65,9 @@ namespace cv { namespace gpu { namespace cudev
     }
 }}}
 
-void cv::gpu::pyrDown(InputArray _src, OutputArray _dst, Stream& stream)
+void cv::cuda::pyrDown(InputArray _src, OutputArray _dst, Stream& stream)
 {
-    using namespace cv::gpu::cudev::imgproc;
+    using namespace cv::cuda::cudev::imgproc;
 
     typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
     static const func_t funcs[6][4] =
@@ -97,7 +97,7 @@ void cv::gpu::pyrDown(InputArray _src, OutputArray _dst, Stream& stream)
 //////////////////////////////////////////////////////////////////////////////
 // pyrUp
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace imgproc
     {
@@ -105,9 +105,9 @@ namespace cv { namespace gpu { namespace cudev
     }
 }}}
 
-void cv::gpu::pyrUp(InputArray _src, OutputArray _dst, Stream& stream)
+void cv::cuda::pyrUp(InputArray _src, OutputArray _dst, Stream& stream)
 {
-    using namespace cv::gpu::cudev::imgproc;
+    using namespace cv::cuda::cudev::imgproc;
 
     typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
     static const func_t funcs[6][4] =
@@ -181,7 +181,7 @@ namespace
 
             const GpuMat& prevLayer = i == 0 ? layer0_ : pyramid_[i - 1];
 
-            cv::gpu::cudev::pyramid::downsampleX2(prevLayer, pyramid_[i], img.depth(), img.channels(), StreamAccessor::getStream(stream));
+            cv::cuda::cudev::pyramid::downsampleX2(prevLayer, pyramid_[i], img.depth(), img.channels(), StreamAccessor::getStream(stream));
 
             szLastLayer = szCurLayer;
         }
@@ -222,13 +222,13 @@ namespace
             lastLayer = curLayer;
         }
 
-        cv::gpu::cudev::pyramid::interpolateFrom1(lastLayer, outImg, outImg.depth(), outImg.channels(), StreamAccessor::getStream(stream));
+        cv::cuda::cudev::pyramid::interpolateFrom1(lastLayer, outImg, outImg.depth(), outImg.channels(), StreamAccessor::getStream(stream));
     }
 }
 
 #endif
 
-Ptr<ImagePyramid> cv::gpu::createImagePyramid(InputArray img, int nLayers, Stream& stream)
+Ptr<ImagePyramid> cv::cuda::createImagePyramid(InputArray img, int nLayers, Stream& stream)
 {
 #ifndef HAVE_OPENCV_GPULEGACY
     (void) img;
diff --git a/modules/gpuwarping/src/remap.cpp b/modules/gpuwarping/src/remap.cpp
index c3d797783a..897d3917c3 100644
--- a/modules/gpuwarping/src/remap.cpp
+++ b/modules/gpuwarping/src/remap.cpp
@@ -44,11 +44,11 @@
 
 #if !defined HAVE_CUDA || defined(CUDA_DISABLER)
 
-void cv::gpu::remap(InputArray, OutputArray, InputArray, InputArray, int, int, Scalar, Stream&){ throw_no_cuda(); }
+void cv::cuda::remap(InputArray, OutputArray, InputArray, InputArray, int, int, Scalar, Stream&){ throw_no_cuda(); }
 
 #else // HAVE_CUDA
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace imgproc
     {
@@ -58,9 +58,9 @@ namespace cv { namespace gpu { namespace cudev
     }
 }}}
 
-void cv::gpu::remap(InputArray _src, OutputArray _dst, InputArray _xmap, InputArray _ymap, int interpolation, int borderMode, Scalar borderValue, Stream& stream)
+void cv::cuda::remap(InputArray _src, OutputArray _dst, InputArray _xmap, InputArray _ymap, int interpolation, int borderMode, Scalar borderValue, Stream& stream)
 {
-    using namespace cv::gpu::cudev::imgproc;
+    using namespace cv::cuda::cudev::imgproc;
 
     typedef void (*func_t)(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation,
         int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
diff --git a/modules/gpuwarping/src/resize.cpp b/modules/gpuwarping/src/resize.cpp
index 9c82318f9f..44c5c59c24 100644
--- a/modules/gpuwarping/src/resize.cpp
+++ b/modules/gpuwarping/src/resize.cpp
@@ -44,17 +44,17 @@
 
 #if !defined HAVE_CUDA || defined(CUDA_DISABLER)
 
-void cv::gpu::resize(InputArray, OutputArray, Size, double, double, int, Stream&) { throw_no_cuda(); }
+void cv::cuda::resize(InputArray, OutputArray, Size, double, double, int, Stream&) { throw_no_cuda(); }
 
 #else // HAVE_CUDA
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     template <typename T>
     void resize(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
 }}}
 
-void cv::gpu::resize(InputArray _src, OutputArray _dst, Size dsize, double fx, double fy, int interpolation, Stream& stream)
+void cv::cuda::resize(InputArray _src, OutputArray _dst, Size dsize, double fx, double fy, int interpolation, Stream& stream)
 {
     GpuMat src = _src.getGpuMat();
 
diff --git a/modules/gpuwarping/src/warp.cpp b/modules/gpuwarping/src/warp.cpp
index 2775fc0847..2dbe595500 100644
--- a/modules/gpuwarping/src/warp.cpp
+++ b/modules/gpuwarping/src/warp.cpp
@@ -43,25 +43,25 @@
 #include "precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 #if !defined HAVE_CUDA || defined(CUDA_DISABLER)
 
-void cv::gpu::warpAffine(InputArray, OutputArray, InputArray, Size, int, int, Scalar, Stream&) { throw_no_cuda(); }
-void cv::gpu::buildWarpAffineMaps(InputArray, bool, Size, OutputArray, OutputArray, Stream&) { throw_no_cuda(); }
+void cv::cuda::warpAffine(InputArray, OutputArray, InputArray, Size, int, int, Scalar, Stream&) { throw_no_cuda(); }
+void cv::cuda::buildWarpAffineMaps(InputArray, bool, Size, OutputArray, OutputArray, Stream&) { throw_no_cuda(); }
 
-void cv::gpu::warpPerspective(InputArray, OutputArray, InputArray, Size, int, int, Scalar, Stream&) { throw_no_cuda(); }
-void cv::gpu::buildWarpPerspectiveMaps(InputArray, bool, Size, OutputArray, OutputArray, Stream&) { throw_no_cuda(); }
+void cv::cuda::warpPerspective(InputArray, OutputArray, InputArray, Size, int, int, Scalar, Stream&) { throw_no_cuda(); }
+void cv::cuda::buildWarpPerspectiveMaps(InputArray, bool, Size, OutputArray, OutputArray, Stream&) { throw_no_cuda(); }
 
-void cv::gpu::buildWarpPlaneMaps(Size, Rect, InputArray, InputArray, InputArray, float, OutputArray, OutputArray, Stream&) { throw_no_cuda(); }
-void cv::gpu::buildWarpCylindricalMaps(Size, Rect, InputArray, InputArray, float, OutputArray, OutputArray, Stream&) { throw_no_cuda(); }
-void cv::gpu::buildWarpSphericalMaps(Size, Rect, InputArray, InputArray, float, OutputArray, OutputArray, Stream&) { throw_no_cuda(); }
+void cv::cuda::buildWarpPlaneMaps(Size, Rect, InputArray, InputArray, InputArray, float, OutputArray, OutputArray, Stream&) { throw_no_cuda(); }
+void cv::cuda::buildWarpCylindricalMaps(Size, Rect, InputArray, InputArray, float, OutputArray, OutputArray, Stream&) { throw_no_cuda(); }
+void cv::cuda::buildWarpSphericalMaps(Size, Rect, InputArray, InputArray, float, OutputArray, OutputArray, Stream&) { throw_no_cuda(); }
 
-void cv::gpu::rotate(InputArray, OutputArray, Size, double, double, double, int, Stream&) { throw_no_cuda(); }
+void cv::cuda::rotate(InputArray, OutputArray, Size, double, double, double, int, Stream&) { throw_no_cuda(); }
 
 #else // HAVE_CUDA
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace imgproc
     {
@@ -79,9 +79,9 @@ namespace cv { namespace gpu { namespace cudev
     }
 }}}
 
-void cv::gpu::buildWarpAffineMaps(InputArray _M, bool inverse, Size dsize, OutputArray _xmap, OutputArray _ymap, Stream& stream)
+void cv::cuda::buildWarpAffineMaps(InputArray _M, bool inverse, Size dsize, OutputArray _xmap, OutputArray _ymap, Stream& stream)
 {
-    using namespace cv::gpu::cudev::imgproc;
+    using namespace cv::cuda::cudev::imgproc;
 
     Mat M = _M.getMat();
 
@@ -108,9 +108,9 @@ void cv::gpu::buildWarpAffineMaps(InputArray _M, bool inverse, Size dsize, Outpu
     buildWarpAffineMaps_gpu(coeffs, xmap, ymap, StreamAccessor::getStream(stream));
 }
 
-void cv::gpu::buildWarpPerspectiveMaps(InputArray _M, bool inverse, Size dsize, OutputArray _xmap, OutputArray _ymap, Stream& stream)
+void cv::cuda::buildWarpPerspectiveMaps(InputArray _M, bool inverse, Size dsize, OutputArray _xmap, OutputArray _ymap, Stream& stream)
 {
-    using namespace cv::gpu::cudev::imgproc;
+    using namespace cv::cuda::cudev::imgproc;
 
     Mat M = _M.getMat();
 
@@ -152,7 +152,7 @@ namespace
     {
         typedef typename NppWarpFunc<DEPTH>::npp_type npp_type;
 
-        static void call(const cv::gpu::GpuMat& src, cv::gpu::GpuMat& dst, double coeffs[][3], int interpolation, cudaStream_t stream)
+        static void call(const cv::cuda::GpuMat& src, cv::cuda::GpuMat& dst, double coeffs[][3], int interpolation, cudaStream_t stream)
         {
             static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC};
 
@@ -172,7 +172,7 @@ namespace
             dstroi.height = dst.rows;
             dstroi.width = dst.cols;
 
-            cv::gpu::NppStreamHandler h(stream);
+            cv::cuda::NppStreamHandler h(stream);
 
             nppSafeCall( func(src.ptr<npp_type>(), srcsz, static_cast<int>(src.step), srcroi,
                               dst.ptr<npp_type>(), static_cast<int>(dst.step), dstroi,
@@ -184,7 +184,7 @@ namespace
     };
 }
 
-void cv::gpu::warpAffine(InputArray _src, OutputArray _dst, InputArray _M, Size dsize, int flags, int borderMode, Scalar borderValue, Stream& stream)
+void cv::cuda::warpAffine(InputArray _src, OutputArray _dst, InputArray _M, Size dsize, int flags, int borderMode, Scalar borderValue, Stream& stream)
 {
     GpuMat src = _src.getGpuMat();
     Mat M = _M.getMat();
@@ -250,7 +250,7 @@ void cv::gpu::warpAffine(InputArray _src, OutputArray _dst, InputArray _M, Size
 
     if (useNpp)
     {
-        typedef void (*func_t)(const cv::gpu::GpuMat& src, cv::gpu::GpuMat& dst, double coeffs[][3], int flags, cudaStream_t stream);
+        typedef void (*func_t)(const cv::cuda::GpuMat& src, cv::cuda::GpuMat& dst, double coeffs[][3], int flags, cudaStream_t stream);
 
         static const func_t funcs[2][6][4] =
         {
@@ -285,7 +285,7 @@ void cv::gpu::warpAffine(InputArray _src, OutputArray _dst, InputArray _M, Size
     }
     else
     {
-        using namespace cv::gpu::cudev::imgproc;
+        using namespace cv::cuda::cudev::imgproc;
 
         typedef void (*func_t)(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation,
             int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
@@ -323,7 +323,7 @@ void cv::gpu::warpAffine(InputArray _src, OutputArray _dst, InputArray _M, Size
     }
 }
 
-void cv::gpu::warpPerspective(InputArray _src, OutputArray _dst, InputArray _M, Size dsize, int flags, int borderMode, Scalar borderValue, Stream& stream)
+void cv::cuda::warpPerspective(InputArray _src, OutputArray _dst, InputArray _M, Size dsize, int flags, int borderMode, Scalar borderValue, Stream& stream)
 {
     GpuMat src = _src.getGpuMat();
     Mat M = _M.getMat();
@@ -389,7 +389,7 @@ void cv::gpu::warpPerspective(InputArray _src, OutputArray _dst, InputArray _M,
 
     if (useNpp)
     {
-        typedef void (*func_t)(const cv::gpu::GpuMat& src, cv::gpu::GpuMat& dst, double coeffs[][3], int flags, cudaStream_t stream);
+        typedef void (*func_t)(const cv::cuda::GpuMat& src, cv::cuda::GpuMat& dst, double coeffs[][3], int flags, cudaStream_t stream);
 
         static const func_t funcs[2][6][4] =
         {
@@ -424,7 +424,7 @@ void cv::gpu::warpPerspective(InputArray _src, OutputArray _dst, InputArray _M,
     }
     else
     {
-        using namespace cv::gpu::cudev::imgproc;
+        using namespace cv::cuda::cudev::imgproc;
 
         typedef void (*func_t)(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation,
             int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
@@ -465,7 +465,7 @@ void cv::gpu::warpPerspective(InputArray _src, OutputArray _dst, InputArray _M,
 //////////////////////////////////////////////////////////////////////////////
 // buildWarpPlaneMaps
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace imgproc
     {
@@ -475,7 +475,7 @@ namespace cv { namespace gpu { namespace cudev
     }
 }}}
 
-void cv::gpu::buildWarpPlaneMaps(Size src_size, Rect dst_roi, InputArray _K, InputArray _R, InputArray _T,
+void cv::cuda::buildWarpPlaneMaps(Size src_size, Rect dst_roi, InputArray _K, InputArray _R, InputArray _T,
                                  float scale, OutputArray _map_x, OutputArray _map_y, Stream& stream)
 {
     (void) src_size;
@@ -506,7 +506,7 @@ void cv::gpu::buildWarpPlaneMaps(Size src_size, Rect dst_roi, InputArray _K, Inp
 //////////////////////////////////////////////////////////////////////////////
 // buildWarpCylyndricalMaps
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace imgproc
     {
@@ -516,7 +516,7 @@ namespace cv { namespace gpu { namespace cudev
     }
 }}}
 
-void cv::gpu::buildWarpCylindricalMaps(Size src_size, Rect dst_roi, InputArray _K, InputArray _R, float scale,
+void cv::cuda::buildWarpCylindricalMaps(Size src_size, Rect dst_roi, InputArray _K, InputArray _R, float scale,
                                        OutputArray _map_x, OutputArray _map_y, Stream& stream)
 {
     (void) src_size;
@@ -545,7 +545,7 @@ void cv::gpu::buildWarpCylindricalMaps(Size src_size, Rect dst_roi, InputArray _
 //////////////////////////////////////////////////////////////////////////////
 // buildWarpSphericalMaps
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace imgproc
     {
@@ -555,7 +555,7 @@ namespace cv { namespace gpu { namespace cudev
     }
 }}}
 
-void cv::gpu::buildWarpSphericalMaps(Size src_size, Rect dst_roi, InputArray _K, InputArray _R, float scale,
+void cv::cuda::buildWarpSphericalMaps(Size src_size, Rect dst_roi, InputArray _K, InputArray _R, float scale,
                                      OutputArray _map_x, OutputArray _map_y, Stream& stream)
 {
     (void) src_size;
@@ -626,7 +626,7 @@ namespace
     };
 }
 
-void cv::gpu::rotate(InputArray _src, OutputArray _dst, Size dsize, double angle, double xShift, double yShift, int interpolation, Stream& stream)
+void cv::cuda::rotate(InputArray _src, OutputArray _dst, Size dsize, double angle, double xShift, double yShift, int interpolation, Stream& stream)
 {
     typedef void (*func_t)(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift, double yShift, int interpolation, cudaStream_t stream);
     static const func_t funcs[6][4] =
diff --git a/modules/gpuwarping/test/test_pyramids.cpp b/modules/gpuwarping/test/test_pyramids.cpp
index f296b7d6f2..6b45f801d3 100644
--- a/modules/gpuwarping/test/test_pyramids.cpp
+++ b/modules/gpuwarping/test/test_pyramids.cpp
@@ -49,9 +49,9 @@ using namespace cvtest;
 ////////////////////////////////////////////////////////
 // pyrDown
 
-PARAM_TEST_CASE(PyrDown, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi)
+PARAM_TEST_CASE(PyrDown, cv::cuda::DeviceInfo, cv::Size, MatType, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int type;
     bool useRoi;
@@ -63,7 +63,7 @@ PARAM_TEST_CASE(PyrDown, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi)
         type = GET_PARAM(2);
         useRoi = GET_PARAM(3);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -71,8 +71,8 @@ GPU_TEST_P(PyrDown, Accuracy)
 {
     cv::Mat src = randomMat(size, type);
 
-    cv::gpu::GpuMat dst = createMat(cv::Size((size.width + 1) / 2, (size.height + 1) / 2), type, useRoi);
-    cv::gpu::pyrDown(loadMat(src, useRoi), dst);
+    cv::cuda::GpuMat dst = createMat(cv::Size((size.width + 1) / 2, (size.height + 1) / 2), type, useRoi);
+    cv::cuda::pyrDown(loadMat(src, useRoi), dst);
 
     cv::Mat dst_gold;
     cv::pyrDown(src, dst_gold);
@@ -89,9 +89,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Warping, PyrDown, testing::Combine(
 ////////////////////////////////////////////////////////
 // pyrUp
 
-PARAM_TEST_CASE(PyrUp, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi)
+PARAM_TEST_CASE(PyrUp, cv::cuda::DeviceInfo, cv::Size, MatType, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int type;
     bool useRoi;
@@ -103,7 +103,7 @@ PARAM_TEST_CASE(PyrUp, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi)
         type = GET_PARAM(2);
         useRoi = GET_PARAM(3);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -111,8 +111,8 @@ GPU_TEST_P(PyrUp, Accuracy)
 {
     cv::Mat src = randomMat(size, type);
 
-    cv::gpu::GpuMat dst = createMat(cv::Size(size.width * 2, size.height * 2), type, useRoi);
-    cv::gpu::pyrUp(loadMat(src, useRoi), dst);
+    cv::cuda::GpuMat dst = createMat(cv::Size(size.width * 2, size.height * 2), type, useRoi);
+    cv::cuda::pyrUp(loadMat(src, useRoi), dst);
 
     cv::Mat dst_gold;
     cv::pyrUp(src, dst_gold);
diff --git a/modules/gpuwarping/test/test_remap.cpp b/modules/gpuwarping/test/test_remap.cpp
index c1899ff611..a914b0f137 100644
--- a/modules/gpuwarping/test/test_remap.cpp
+++ b/modules/gpuwarping/test/test_remap.cpp
@@ -112,9 +112,9 @@ namespace
 ///////////////////////////////////////////////////////////////////
 // Test
 
-PARAM_TEST_CASE(Remap, cv::gpu::DeviceInfo, cv::Size, MatType, Interpolation, BorderType, UseRoi)
+PARAM_TEST_CASE(Remap, cv::cuda::DeviceInfo, cv::Size, MatType, Interpolation, BorderType, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int type;
     int interpolation;
@@ -133,7 +133,7 @@ PARAM_TEST_CASE(Remap, cv::gpu::DeviceInfo, cv::Size, MatType, Interpolation, Bo
         borderType = GET_PARAM(4);
         useRoi = GET_PARAM(5);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
 
         // rotation matrix
 
@@ -160,8 +160,8 @@ GPU_TEST_P(Remap, Accuracy)
     cv::Mat src = randomMat(size, type);
     cv::Scalar val = randomScalar(0.0, 255.0);
 
-    cv::gpu::GpuMat dst = createMat(xmap.size(), type, useRoi);
-    cv::gpu::remap(loadMat(src, useRoi), dst, loadMat(xmap, useRoi), loadMat(ymap, useRoi), interpolation, borderType, val);
+    cv::cuda::GpuMat dst = createMat(xmap.size(), type, useRoi);
+    cv::cuda::remap(loadMat(src, useRoi), dst, loadMat(xmap, useRoi), loadMat(ymap, useRoi), interpolation, borderType, val);
 
     cv::Mat dst_gold;
     remapGold(src, xmap, ymap, dst_gold, interpolation, borderType, val);
diff --git a/modules/gpuwarping/test/test_resize.cpp b/modules/gpuwarping/test/test_resize.cpp
index 13326c07bb..16260e76ba 100644
--- a/modules/gpuwarping/test/test_resize.cpp
+++ b/modules/gpuwarping/test/test_resize.cpp
@@ -117,9 +117,9 @@ namespace
 ///////////////////////////////////////////////////////////////////
 // Test
 
-PARAM_TEST_CASE(Resize, cv::gpu::DeviceInfo, cv::Size, MatType, double, Interpolation, UseRoi)
+PARAM_TEST_CASE(Resize, cv::cuda::DeviceInfo, cv::Size, MatType, double, Interpolation, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     double coeff;
     int interpolation;
@@ -135,7 +135,7 @@ PARAM_TEST_CASE(Resize, cv::gpu::DeviceInfo, cv::Size, MatType, double, Interpol
         interpolation = GET_PARAM(4);
         useRoi = GET_PARAM(5);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -143,8 +143,8 @@ GPU_TEST_P(Resize, Accuracy)
 {
     cv::Mat src = randomMat(size, type);
 
-    cv::gpu::GpuMat dst = createMat(cv::Size(cv::saturate_cast<int>(src.cols * coeff), cv::saturate_cast<int>(src.rows * coeff)), type, useRoi);
-    cv::gpu::resize(loadMat(src, useRoi), dst, cv::Size(), coeff, coeff, interpolation);
+    cv::cuda::GpuMat dst = createMat(cv::Size(cv::saturate_cast<int>(src.cols * coeff), cv::saturate_cast<int>(src.rows * coeff)), type, useRoi);
+    cv::cuda::resize(loadMat(src, useRoi), dst, cv::Size(), coeff, coeff, interpolation);
 
     cv::Mat dst_gold;
     resizeGold(src, dst_gold, coeff, coeff, interpolation);
@@ -162,9 +162,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Warping, Resize, testing::Combine(
 
 /////////////////
 
-PARAM_TEST_CASE(ResizeSameAsHost, cv::gpu::DeviceInfo, cv::Size, MatType, double, Interpolation, UseRoi)
+PARAM_TEST_CASE(ResizeSameAsHost, cv::cuda::DeviceInfo, cv::Size, MatType, double, Interpolation, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     double coeff;
     int interpolation;
@@ -180,7 +180,7 @@ PARAM_TEST_CASE(ResizeSameAsHost, cv::gpu::DeviceInfo, cv::Size, MatType, double
         interpolation = GET_PARAM(4);
         useRoi = GET_PARAM(5);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -189,8 +189,8 @@ GPU_TEST_P(ResizeSameAsHost, Accuracy)
 {
     cv::Mat src = randomMat(size, type);
 
-    cv::gpu::GpuMat dst = createMat(cv::Size(cv::saturate_cast<int>(src.cols * coeff), cv::saturate_cast<int>(src.rows * coeff)), type, useRoi);
-    cv::gpu::resize(loadMat(src, useRoi), dst, cv::Size(), coeff, coeff, interpolation);
+    cv::cuda::GpuMat dst = createMat(cv::Size(cv::saturate_cast<int>(src.cols * coeff), cv::saturate_cast<int>(src.rows * coeff)), type, useRoi);
+    cv::cuda::resize(loadMat(src, useRoi), dst, cv::Size(), coeff, coeff, interpolation);
 
     cv::Mat dst_gold;
     cv::resize(src, dst_gold, cv::Size(), coeff, coeff, interpolation);
diff --git a/modules/gpuwarping/test/test_warp_affine.cpp b/modules/gpuwarping/test/test_warp_affine.cpp
index 206446c4d0..290f8e4402 100644
--- a/modules/gpuwarping/test/test_warp_affine.cpp
+++ b/modules/gpuwarping/test/test_warp_affine.cpp
@@ -62,9 +62,9 @@ namespace
 ///////////////////////////////////////////////////////////////////
 // Test buildWarpAffineMaps
 
-PARAM_TEST_CASE(BuildWarpAffineMaps, cv::gpu::DeviceInfo, cv::Size, Inverse)
+PARAM_TEST_CASE(BuildWarpAffineMaps, cv::cuda::DeviceInfo, cv::Size, Inverse)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     bool inverse;
 
@@ -74,7 +74,7 @@ PARAM_TEST_CASE(BuildWarpAffineMaps, cv::gpu::DeviceInfo, cv::Size, Inverse)
         size = GET_PARAM(1);
         inverse = GET_PARAM(2);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -83,8 +83,8 @@ GPU_TEST_P(BuildWarpAffineMaps, Accuracy)
     cv::Mat M = createTransfomMatrix(size, CV_PI / 4);
     cv::Mat src = randomMat(randomSize(200, 400), CV_8UC1);
 
-    cv::gpu::GpuMat xmap, ymap;
-    cv::gpu::buildWarpAffineMaps(M, inverse, size, xmap, ymap);
+    cv::cuda::GpuMat xmap, ymap;
+    cv::cuda::buildWarpAffineMaps(M, inverse, size, xmap, ymap);
 
     int interpolation = cv::INTER_NEAREST;
     int borderMode = cv::BORDER_CONSTANT;
@@ -180,9 +180,9 @@ namespace
 ///////////////////////////////////////////////////////////////////
 // Test
 
-PARAM_TEST_CASE(WarpAffine, cv::gpu::DeviceInfo, cv::Size, MatType, Inverse, Interpolation, BorderType, UseRoi)
+PARAM_TEST_CASE(WarpAffine, cv::cuda::DeviceInfo, cv::Size, MatType, Inverse, Interpolation, BorderType, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int type;
     bool inverse;
@@ -200,7 +200,7 @@ PARAM_TEST_CASE(WarpAffine, cv::gpu::DeviceInfo, cv::Size, MatType, Inverse, Int
         borderType = GET_PARAM(5);
         useRoi = GET_PARAM(6);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -213,8 +213,8 @@ GPU_TEST_P(WarpAffine, Accuracy)
         flags |= cv::WARP_INVERSE_MAP;
     cv::Scalar val = randomScalar(0.0, 255.0);
 
-    cv::gpu::GpuMat dst = createMat(size, type, useRoi);
-    cv::gpu::warpAffine(loadMat(src, useRoi), dst, M, size, flags, borderType, val);
+    cv::cuda::GpuMat dst = createMat(size, type, useRoi);
+    cv::cuda::warpAffine(loadMat(src, useRoi), dst, M, size, flags, borderType, val);
 
     cv::Mat dst_gold;
     warpAffineGold(src, M, inverse, size, dst_gold, interpolation, borderType, val);
@@ -234,9 +234,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Warping, WarpAffine, testing::Combine(
 ///////////////////////////////////////////////////////////////////
 // Test NPP
 
-PARAM_TEST_CASE(WarpAffineNPP, cv::gpu::DeviceInfo, MatType, Inverse, Interpolation)
+PARAM_TEST_CASE(WarpAffineNPP, cv::cuda::DeviceInfo, MatType, Inverse, Interpolation)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     int type;
     bool inverse;
     int interpolation;
@@ -248,7 +248,7 @@ PARAM_TEST_CASE(WarpAffineNPP, cv::gpu::DeviceInfo, MatType, Inverse, Interpolat
         inverse = GET_PARAM(2);
         interpolation = GET_PARAM(3);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -262,8 +262,8 @@ GPU_TEST_P(WarpAffineNPP, Accuracy)
     if (inverse)
         flags |= cv::WARP_INVERSE_MAP;
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::warpAffine(loadMat(src), dst, M, src.size(), flags);
+    cv::cuda::GpuMat dst;
+    cv::cuda::warpAffine(loadMat(src), dst, M, src.size(), flags);
 
     cv::Mat dst_gold;
     warpAffineGold(src, M, inverse, src.size(), dst_gold, interpolation, cv::BORDER_CONSTANT, cv::Scalar::all(0));
diff --git a/modules/gpuwarping/test/test_warp_perspective.cpp b/modules/gpuwarping/test/test_warp_perspective.cpp
index 49f844c3f6..1b9b1e53ea 100644
--- a/modules/gpuwarping/test/test_warp_perspective.cpp
+++ b/modules/gpuwarping/test/test_warp_perspective.cpp
@@ -63,9 +63,9 @@ namespace
 ///////////////////////////////////////////////////////////////////
 // Test buildWarpPerspectiveMaps
 
-PARAM_TEST_CASE(BuildWarpPerspectiveMaps, cv::gpu::DeviceInfo, cv::Size, Inverse)
+PARAM_TEST_CASE(BuildWarpPerspectiveMaps, cv::cuda::DeviceInfo, cv::Size, Inverse)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     bool inverse;
 
@@ -75,7 +75,7 @@ PARAM_TEST_CASE(BuildWarpPerspectiveMaps, cv::gpu::DeviceInfo, cv::Size, Inverse
         size = GET_PARAM(1);
         inverse = GET_PARAM(2);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -83,8 +83,8 @@ GPU_TEST_P(BuildWarpPerspectiveMaps, Accuracy)
 {
     cv::Mat M = createTransfomMatrix(size, CV_PI / 4);
 
-    cv::gpu::GpuMat xmap, ymap;
-    cv::gpu::buildWarpPerspectiveMaps(M, inverse, size, xmap, ymap);
+    cv::cuda::GpuMat xmap, ymap;
+    cv::cuda::buildWarpPerspectiveMaps(M, inverse, size, xmap, ymap);
 
     cv::Mat src = randomMat(randomSize(200, 400), CV_8UC1);
     int interpolation = cv::INTER_NEAREST;
@@ -183,9 +183,9 @@ namespace
 ///////////////////////////////////////////////////////////////////
 // Test
 
-PARAM_TEST_CASE(WarpPerspective, cv::gpu::DeviceInfo, cv::Size, MatType, Inverse, Interpolation, BorderType, UseRoi)
+PARAM_TEST_CASE(WarpPerspective, cv::cuda::DeviceInfo, cv::Size, MatType, Inverse, Interpolation, BorderType, UseRoi)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     cv::Size size;
     int type;
     bool inverse;
@@ -203,7 +203,7 @@ PARAM_TEST_CASE(WarpPerspective, cv::gpu::DeviceInfo, cv::Size, MatType, Inverse
         borderType = GET_PARAM(5);
         useRoi = GET_PARAM(6);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -216,8 +216,8 @@ GPU_TEST_P(WarpPerspective, Accuracy)
         flags |= cv::WARP_INVERSE_MAP;
     cv::Scalar val = randomScalar(0.0, 255.0);
 
-    cv::gpu::GpuMat dst = createMat(size, type, useRoi);
-    cv::gpu::warpPerspective(loadMat(src, useRoi), dst, M, size, flags, borderType, val);
+    cv::cuda::GpuMat dst = createMat(size, type, useRoi);
+    cv::cuda::warpPerspective(loadMat(src, useRoi), dst, M, size, flags, borderType, val);
 
     cv::Mat dst_gold;
     warpPerspectiveGold(src, M, inverse, size, dst_gold, interpolation, borderType, val);
@@ -237,9 +237,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Warping, WarpPerspective, testing::Combine(
 ///////////////////////////////////////////////////////////////////
 // Test NPP
 
-PARAM_TEST_CASE(WarpPerspectiveNPP, cv::gpu::DeviceInfo, MatType, Inverse, Interpolation)
+PARAM_TEST_CASE(WarpPerspectiveNPP, cv::cuda::DeviceInfo, MatType, Inverse, Interpolation)
 {
-    cv::gpu::DeviceInfo devInfo;
+    cv::cuda::DeviceInfo devInfo;
     int type;
     bool inverse;
     int interpolation;
@@ -251,7 +251,7 @@ PARAM_TEST_CASE(WarpPerspectiveNPP, cv::gpu::DeviceInfo, MatType, Inverse, Inter
         inverse = GET_PARAM(2);
         interpolation = GET_PARAM(3);
 
-        cv::gpu::setDevice(devInfo.deviceID());
+        cv::cuda::setDevice(devInfo.deviceID());
     }
 };
 
@@ -265,8 +265,8 @@ GPU_TEST_P(WarpPerspectiveNPP, Accuracy)
     if (inverse)
         flags |= cv::WARP_INVERSE_MAP;
 
-    cv::gpu::GpuMat dst;
-    cv::gpu::warpPerspective(loadMat(src), dst, M, src.size(), flags);
+    cv::cuda::GpuMat dst;
+    cv::cuda::warpPerspective(loadMat(src), dst, M, src.size(), flags);
 
     cv::Mat dst_gold;
     warpPerspectiveGold(src, M, inverse, src.size(), dst_gold, interpolation, cv::BORDER_CONSTANT, cv::Scalar::all(0));
diff --git a/modules/nonfree/include/opencv2/nonfree/gpu.hpp b/modules/nonfree/include/opencv2/nonfree/gpu.hpp
index b94a99cf72..c900fc3ea3 100644
--- a/modules/nonfree/include/opencv2/nonfree/gpu.hpp
+++ b/modules/nonfree/include/opencv2/nonfree/gpu.hpp
@@ -45,7 +45,7 @@
 
 #include "opencv2/core/gpu.hpp"
 
-namespace cv { namespace gpu {
+namespace cv { namespace cuda {
 
 class CV_EXPORTS SURF_GPU
 {
@@ -121,6 +121,6 @@ public:
     GpuMat maxPosBuffer;
 };
 
-}} // namespace cv { namespace gpu {
+}} // namespace cv { namespace cuda {
 
 #endif // __OPENCV_NONFREE_GPU_HPP__
diff --git a/modules/nonfree/perf/perf_gpu.cpp b/modules/nonfree/perf/perf_gpu.cpp
index 95708227ae..c2a8ea130d 100644
--- a/modules/nonfree/perf/perf_gpu.cpp
+++ b/modules/nonfree/perf/perf_gpu.cpp
@@ -67,12 +67,12 @@ PERF_TEST_P(Image, GPU_SURF,
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::SURF_GPU d_surf;
+        cv::cuda::SURF_GPU d_surf;
 
-        const cv::gpu::GpuMat d_img(img);
-        cv::gpu::GpuMat d_keypoints, d_descriptors;
+        const cv::cuda::GpuMat d_img(img);
+        cv::cuda::GpuMat d_keypoints, d_descriptors;
 
-        TEST_CYCLE() d_surf(d_img, cv::gpu::GpuMat(), d_keypoints, d_descriptors);
+        TEST_CYCLE() d_surf(d_img, cv::cuda::GpuMat(), d_keypoints, d_descriptors);
 
         std::vector<cv::KeyPoint> gpu_keypoints;
         d_surf.downloadKeypoints(d_keypoints, gpu_keypoints);
diff --git a/modules/nonfree/src/cuda/surf.cu b/modules/nonfree/src/cuda/surf.cu
index cdd54e4caf..e2fd9656cf 100644
--- a/modules/nonfree/src/cuda/surf.cu
+++ b/modules/nonfree/src/cuda/surf.cu
@@ -52,7 +52,7 @@
 #include "opencv2/core/cuda/functional.hpp"
 #include "opencv2/core/cuda/filters.hpp"
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace surf
     {
@@ -79,7 +79,7 @@ namespace cv { namespace gpu { namespace cudev
     }
 }}}
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace surf
     {
@@ -955,6 +955,6 @@ namespace cv { namespace gpu { namespace cudev
             }
         }
     } // namespace surf
-}}} // namespace cv { namespace gpu { namespace cudev
+}}} // namespace cv { namespace cuda { namespace cudev
 
 #endif // HAVE_OPENCV_GPUARITHM
diff --git a/modules/nonfree/src/surf_gpu.cpp b/modules/nonfree/src/surf_gpu.cpp
index 35805470b2..21aa12f063 100644
--- a/modules/nonfree/src/surf_gpu.cpp
+++ b/modules/nonfree/src/surf_gpu.cpp
@@ -43,26 +43,26 @@
 #include "precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 #if !defined (HAVE_CUDA) || !defined (HAVE_OPENCV_GPUARITHM)
 
-cv::gpu::SURF_GPU::SURF_GPU() { throw_no_cuda(); }
-cv::gpu::SURF_GPU::SURF_GPU(double, int, int, bool, float, bool) { throw_no_cuda(); }
-int cv::gpu::SURF_GPU::descriptorSize() const { throw_no_cuda(); return 0;}
-void cv::gpu::SURF_GPU::uploadKeypoints(const std::vector<KeyPoint>&, GpuMat&) { throw_no_cuda(); }
-void cv::gpu::SURF_GPU::downloadKeypoints(const GpuMat&, std::vector<KeyPoint>&) { throw_no_cuda(); }
-void cv::gpu::SURF_GPU::downloadDescriptors(const GpuMat&, std::vector<float>&) { throw_no_cuda(); }
-void cv::gpu::SURF_GPU::operator()(const GpuMat&, const GpuMat&, GpuMat&) { throw_no_cuda(); }
-void cv::gpu::SURF_GPU::operator()(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, bool) { throw_no_cuda(); }
-void cv::gpu::SURF_GPU::operator()(const GpuMat&, const GpuMat&, std::vector<KeyPoint>&) { throw_no_cuda(); }
-void cv::gpu::SURF_GPU::operator()(const GpuMat&, const GpuMat&, std::vector<KeyPoint>&, GpuMat&, bool) { throw_no_cuda(); }
-void cv::gpu::SURF_GPU::operator()(const GpuMat&, const GpuMat&, std::vector<KeyPoint>&, std::vector<float>&, bool) { throw_no_cuda(); }
-void cv::gpu::SURF_GPU::releaseMemory() { throw_no_cuda(); }
+cv::cuda::SURF_GPU::SURF_GPU() { throw_no_cuda(); }
+cv::cuda::SURF_GPU::SURF_GPU(double, int, int, bool, float, bool) { throw_no_cuda(); }
+int cv::cuda::SURF_GPU::descriptorSize() const { throw_no_cuda(); return 0;}
+void cv::cuda::SURF_GPU::uploadKeypoints(const std::vector<KeyPoint>&, GpuMat&) { throw_no_cuda(); }
+void cv::cuda::SURF_GPU::downloadKeypoints(const GpuMat&, std::vector<KeyPoint>&) { throw_no_cuda(); }
+void cv::cuda::SURF_GPU::downloadDescriptors(const GpuMat&, std::vector<float>&) { throw_no_cuda(); }
+void cv::cuda::SURF_GPU::operator()(const GpuMat&, const GpuMat&, GpuMat&) { throw_no_cuda(); }
+void cv::cuda::SURF_GPU::operator()(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, bool) { throw_no_cuda(); }
+void cv::cuda::SURF_GPU::operator()(const GpuMat&, const GpuMat&, std::vector<KeyPoint>&) { throw_no_cuda(); }
+void cv::cuda::SURF_GPU::operator()(const GpuMat&, const GpuMat&, std::vector<KeyPoint>&, GpuMat&, bool) { throw_no_cuda(); }
+void cv::cuda::SURF_GPU::operator()(const GpuMat&, const GpuMat&, std::vector<KeyPoint>&, std::vector<float>&, bool) { throw_no_cuda(); }
+void cv::cuda::SURF_GPU::releaseMemory() { throw_no_cuda(); }
 
 #else // !defined (HAVE_CUDA)
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace surf
     {
@@ -89,7 +89,7 @@ namespace cv { namespace gpu { namespace cudev
     }
 }}}
 
-using namespace ::cv::gpu::cudev::surf;
+using namespace ::cv::cuda::cudev::surf;
 
 namespace
 {
@@ -142,13 +142,13 @@ namespace
 
             bindImgTex(img);
 
-            gpu::integral(img, surf_.sum, surf_.intBuffer);
+            cuda::integral(img, surf_.sum, surf_.intBuffer);
             sumOffset = bindSumTex(surf_.sum);
 
             if (use_mask)
             {
-                gpu::min(mask, 1.0, surf_.mask1);
-                gpu::integral(surf_.mask1, surf_.maskSum, surf_.intBuffer);
+                cuda::min(mask, 1.0, surf_.mask1);
+                cuda::integral(surf_.mask1, surf_.maskSum, surf_.intBuffer);
                 maskOffset = bindMaskSumTex(surf_.maskSum);
             }
         }
@@ -240,7 +240,7 @@ namespace
     };
 }
 
-cv::gpu::SURF_GPU::SURF_GPU()
+cv::cuda::SURF_GPU::SURF_GPU()
 {
     hessianThreshold = 100;
     extended = true;
@@ -250,7 +250,7 @@ cv::gpu::SURF_GPU::SURF_GPU()
     upright = false;
 }
 
-cv::gpu::SURF_GPU::SURF_GPU(double _threshold, int _nOctaves, int _nOctaveLayers, bool _extended, float _keypointsRatio, bool _upright)
+cv::cuda::SURF_GPU::SURF_GPU(double _threshold, int _nOctaves, int _nOctaveLayers, bool _extended, float _keypointsRatio, bool _upright)
 {
     hessianThreshold = _threshold;
     extended = _extended;
@@ -260,12 +260,12 @@ cv::gpu::SURF_GPU::SURF_GPU(double _threshold, int _nOctaves, int _nOctaveLayers
     upright = _upright;
 }
 
-int cv::gpu::SURF_GPU::descriptorSize() const
+int cv::cuda::SURF_GPU::descriptorSize() const
 {
     return extended ? 128 : 64;
 }
 
-void cv::gpu::SURF_GPU::uploadKeypoints(const std::vector<KeyPoint>& keypoints, GpuMat& keypointsGPU)
+void cv::cuda::SURF_GPU::uploadKeypoints(const std::vector<KeyPoint>& keypoints, GpuMat& keypointsGPU)
 {
     if (keypoints.empty())
         keypointsGPU.release();
@@ -297,7 +297,7 @@ void cv::gpu::SURF_GPU::uploadKeypoints(const std::vector<KeyPoint>& keypoints,
     }
 }
 
-void cv::gpu::SURF_GPU::downloadKeypoints(const GpuMat& keypointsGPU, std::vector<KeyPoint>& keypoints)
+void cv::cuda::SURF_GPU::downloadKeypoints(const GpuMat& keypointsGPU, std::vector<KeyPoint>& keypoints)
 {
     const int nFeatures = keypointsGPU.cols;
 
@@ -333,7 +333,7 @@ void cv::gpu::SURF_GPU::downloadKeypoints(const GpuMat& keypointsGPU, std::vecto
     }
 }
 
-void cv::gpu::SURF_GPU::downloadDescriptors(const GpuMat& descriptorsGPU, std::vector<float>& descriptors)
+void cv::cuda::SURF_GPU::downloadDescriptors(const GpuMat& descriptorsGPU, std::vector<float>& descriptors)
 {
     if (descriptorsGPU.empty())
         descriptors.clear();
@@ -347,7 +347,7 @@ void cv::gpu::SURF_GPU::downloadDescriptors(const GpuMat& descriptorsGPU, std::v
     }
 }
 
-void cv::gpu::SURF_GPU::operator()(const GpuMat& img, const GpuMat& mask, GpuMat& keypoints)
+void cv::cuda::SURF_GPU::operator()(const GpuMat& img, const GpuMat& mask, GpuMat& keypoints)
 {
     if (!img.empty())
     {
@@ -357,7 +357,7 @@ void cv::gpu::SURF_GPU::operator()(const GpuMat& img, const GpuMat& mask, GpuMat
     }
 }
 
-void cv::gpu::SURF_GPU::operator()(const GpuMat& img, const GpuMat& mask, GpuMat& keypoints, GpuMat& descriptors,
+void cv::cuda::SURF_GPU::operator()(const GpuMat& img, const GpuMat& mask, GpuMat& keypoints, GpuMat& descriptors,
                                    bool useProvidedKeypoints)
 {
     if (!img.empty())
@@ -375,7 +375,7 @@ void cv::gpu::SURF_GPU::operator()(const GpuMat& img, const GpuMat& mask, GpuMat
     }
 }
 
-void cv::gpu::SURF_GPU::operator()(const GpuMat& img, const GpuMat& mask, std::vector<KeyPoint>& keypoints)
+void cv::cuda::SURF_GPU::operator()(const GpuMat& img, const GpuMat& mask, std::vector<KeyPoint>& keypoints)
 {
     GpuMat keypointsGPU;
 
@@ -384,7 +384,7 @@ void cv::gpu::SURF_GPU::operator()(const GpuMat& img, const GpuMat& mask, std::v
     downloadKeypoints(keypointsGPU, keypoints);
 }
 
-void cv::gpu::SURF_GPU::operator()(const GpuMat& img, const GpuMat& mask, std::vector<KeyPoint>& keypoints,
+void cv::cuda::SURF_GPU::operator()(const GpuMat& img, const GpuMat& mask, std::vector<KeyPoint>& keypoints,
     GpuMat& descriptors, bool useProvidedKeypoints)
 {
     GpuMat keypointsGPU;
@@ -397,7 +397,7 @@ void cv::gpu::SURF_GPU::operator()(const GpuMat& img, const GpuMat& mask, std::v
     downloadKeypoints(keypointsGPU, keypoints);
 }
 
-void cv::gpu::SURF_GPU::operator()(const GpuMat& img, const GpuMat& mask, std::vector<KeyPoint>& keypoints,
+void cv::cuda::SURF_GPU::operator()(const GpuMat& img, const GpuMat& mask, std::vector<KeyPoint>& keypoints,
     std::vector<float>& descriptors, bool useProvidedKeypoints)
 {
     GpuMat descriptorsGPU;
@@ -407,7 +407,7 @@ void cv::gpu::SURF_GPU::operator()(const GpuMat& img, const GpuMat& mask, std::v
     downloadDescriptors(descriptorsGPU, descriptors);
 }
 
-void cv::gpu::SURF_GPU::releaseMemory()
+void cv::cuda::SURF_GPU::releaseMemory()
 {
     sum.release();
     mask1.release();
diff --git a/modules/nonfree/test/test_gpu.cpp b/modules/nonfree/test/test_gpu.cpp
index 2836d6b116..7094a49d16 100644
--- a/modules/nonfree/test/test_gpu.cpp
+++ b/modules/nonfree/test/test_gpu.cpp
@@ -83,7 +83,7 @@ GPU_TEST_P(SURF, Detector)
     cv::Mat image = readImage("../gpu/features2d/aloe.png", cv::IMREAD_GRAYSCALE);
     ASSERT_FALSE(image.empty());
 
-    cv::gpu::SURF_GPU surf;
+    cv::cuda::SURF_GPU surf;
     surf.hessianThreshold = hessianThreshold;
     surf.nOctaves = nOctaves;
     surf.nOctaveLayers = nOctaveLayers;
@@ -92,7 +92,7 @@ GPU_TEST_P(SURF, Detector)
     surf.keypointsRatio = 0.05f;
 
     std::vector<cv::KeyPoint> keypoints;
-    surf(loadMat(image), cv::gpu::GpuMat(), keypoints);
+    surf(loadMat(image), cv::cuda::GpuMat(), keypoints);
 
     cv::SURF surf_gold;
     surf_gold.hessianThreshold = hessianThreshold;
@@ -119,7 +119,7 @@ GPU_TEST_P(SURF, Detector_Masked)
     cv::Mat mask(image.size(), CV_8UC1, cv::Scalar::all(1));
     mask(cv::Range(0, image.rows / 2), cv::Range(0, image.cols / 2)).setTo(cv::Scalar::all(0));
 
-    cv::gpu::SURF_GPU surf;
+    cv::cuda::SURF_GPU surf;
     surf.hessianThreshold = hessianThreshold;
     surf.nOctaves = nOctaves;
     surf.nOctaveLayers = nOctaveLayers;
@@ -152,7 +152,7 @@ GPU_TEST_P(SURF, Descriptor)
     cv::Mat image = readImage("../gpu/features2d/aloe.png", cv::IMREAD_GRAYSCALE);
     ASSERT_FALSE(image.empty());
 
-    cv::gpu::SURF_GPU surf;
+    cv::cuda::SURF_GPU surf;
     surf.hessianThreshold = hessianThreshold;
     surf.nOctaves = nOctaves;
     surf.nOctaveLayers = nOctaveLayers;
@@ -170,8 +170,8 @@ GPU_TEST_P(SURF, Descriptor)
     std::vector<cv::KeyPoint> keypoints;
     surf_gold(image, cv::noArray(), keypoints);
 
-    cv::gpu::GpuMat descriptors;
-    surf(loadMat(image), cv::gpu::GpuMat(), keypoints, descriptors, true);
+    cv::cuda::GpuMat descriptors;
+    surf(loadMat(image), cv::cuda::GpuMat(), keypoints, descriptors, true);
 
     cv::Mat descriptors_gold;
     surf_gold(image, cv::noArray(), keypoints, descriptors_gold, true);
diff --git a/modules/photo/include/opencv2/photo/gpu.hpp b/modules/photo/include/opencv2/photo/gpu.hpp
index a8b3859e75..596b4b6095 100644
--- a/modules/photo/include/opencv2/photo/gpu.hpp
+++ b/modules/photo/include/opencv2/photo/gpu.hpp
@@ -45,7 +45,7 @@
 
 #include "opencv2/core/gpu.hpp"
 
-namespace cv { namespace gpu {
+namespace cv { namespace cuda {
 
 //! Brute force non-local means algorith (slow but universal)
 CV_EXPORTS void nonLocalMeans(const GpuMat& src, GpuMat& dst, float h, int search_window = 21, int block_size = 7, int borderMode = BORDER_DEFAULT, Stream& s = Stream::Null());
@@ -66,6 +66,6 @@ private:
     GpuMat lab, l, ab;
 };
 
-}} // namespace cv { namespace gpu {
+}} // namespace cv { namespace cuda {
 
 #endif /* __OPENCV_PHOTO_GPU_HPP__ */
diff --git a/modules/photo/perf/perf_gpu.cpp b/modules/photo/perf/perf_gpu.cpp
index ec62f7a0cc..7e9b7c09c7 100644
--- a/modules/photo/perf/perf_gpu.cpp
+++ b/modules/photo/perf/perf_gpu.cpp
@@ -85,10 +85,10 @@ PERF_TEST_P(Sz_Depth_Cn_WinSz_BlockSz, GPU_NonLocalMeans,
 
     if (PERF_RUN_GPU())
     {
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
-        TEST_CYCLE() cv::gpu::nonLocalMeans(d_src, dst, h, search_widow_size, block_size, borderMode);
+        TEST_CYCLE() cv::cuda::nonLocalMeans(d_src, dst, h, search_widow_size, block_size, borderMode);
 
         GPU_SANITY_CHECK(dst);
     }
@@ -126,10 +126,10 @@ PERF_TEST_P(Sz_Depth_Cn_WinSz_BlockSz, GPU_FastNonLocalMeans,
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::FastNonLocalMeansDenoising fnlmd;
+        cv::cuda::FastNonLocalMeansDenoising fnlmd;
 
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
         TEST_CYCLE() fnlmd.simpleMethod(d_src, dst, h, search_widow_size, block_size);
 
@@ -171,10 +171,10 @@ PERF_TEST_P(Sz_Depth_WinSz_BlockSz, GPU_FastNonLocalMeansColored,
 
     if (PERF_RUN_GPU())
     {
-        cv::gpu::FastNonLocalMeansDenoising fnlmd;
+        cv::cuda::FastNonLocalMeansDenoising fnlmd;
 
-        const cv::gpu::GpuMat d_src(src);
-        cv::gpu::GpuMat dst;
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
 
         TEST_CYCLE() fnlmd.labMethod(d_src, dst, h, h, search_widow_size, block_size);
 
diff --git a/modules/photo/src/cuda/nlm.cu b/modules/photo/src/cuda/nlm.cu
index 44ed4dc6b9..4fd09678bf 100644
--- a/modules/photo/src/cuda/nlm.cu
+++ b/modules/photo/src/cuda/nlm.cu
@@ -47,7 +47,7 @@
 #include "opencv2/core/cuda/reduce.hpp"
 #include "opencv2/core/cuda/border_interpolate.hpp"
 
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 typedef unsigned char uchar;
 typedef unsigned short ushort;
@@ -55,7 +55,7 @@ typedef unsigned short ushort;
 //////////////////////////////////////////////////////////////////////////////////
 //// Non Local Means Denosing
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace imgproc
     {
@@ -177,7 +177,7 @@ namespace cv { namespace gpu { namespace cudev
 //////////////////////////////////////////////////////////////////////////////////
 //// Non Local Means Denosing (fast approximate version)
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace imgproc
     {
@@ -188,7 +188,7 @@ namespace cv { namespace gpu { namespace cudev
             template <int BLOCK_SIZE>
             static __device__ __forceinline__ thrust::tuple<volatile float*, volatile float*> smem_tuple(float* smem)
             {
-                return cv::gpu::cudev::smem_tuple(smem, smem + BLOCK_SIZE);
+                return cv::cuda::cudev::smem_tuple(smem, smem + BLOCK_SIZE);
             }
 
             static __device__ __forceinline__ thrust::tuple<float&, float&> tie(float& val1, float& val2)
@@ -207,7 +207,7 @@ namespace cv { namespace gpu { namespace cudev
             template <int BLOCK_SIZE>
             static __device__ __forceinline__ thrust::tuple<volatile float*, volatile float*, volatile float*> smem_tuple(float* smem)
             {
-                return cv::gpu::cudev::smem_tuple(smem, smem + BLOCK_SIZE, smem + 2 * BLOCK_SIZE);
+                return cv::cuda::cudev::smem_tuple(smem, smem + BLOCK_SIZE, smem + 2 * BLOCK_SIZE);
             }
 
             static __device__ __forceinline__ thrust::tuple<float&, float&, float&> tie(float& val1, float2& val2)
@@ -226,7 +226,7 @@ namespace cv { namespace gpu { namespace cudev
             template <int BLOCK_SIZE>
             static __device__ __forceinline__ thrust::tuple<volatile float*, volatile float*, volatile float*, volatile float*> smem_tuple(float* smem)
             {
-                return cv::gpu::cudev::smem_tuple(smem, smem + BLOCK_SIZE, smem + 2 * BLOCK_SIZE, smem + 3 * BLOCK_SIZE);
+                return cv::cuda::cudev::smem_tuple(smem, smem + BLOCK_SIZE, smem + 2 * BLOCK_SIZE, smem + 3 * BLOCK_SIZE);
             }
 
             static __device__ __forceinline__ thrust::tuple<float&, float&, float&, float&> tie(float& val1, float3& val2)
@@ -245,7 +245,7 @@ namespace cv { namespace gpu { namespace cudev
             template <int BLOCK_SIZE>
             static __device__ __forceinline__ thrust::tuple<volatile float*, volatile float*, volatile float*, volatile float*, volatile float*> smem_tuple(float* smem)
             {
-                return cv::gpu::cudev::smem_tuple(smem, smem + BLOCK_SIZE, smem + 2 * BLOCK_SIZE, smem + 3 * BLOCK_SIZE, smem + 4 * BLOCK_SIZE);
+                return cv::cuda::cudev::smem_tuple(smem, smem + BLOCK_SIZE, smem + 2 * BLOCK_SIZE, smem + 3 * BLOCK_SIZE, smem + 4 * BLOCK_SIZE);
             }
 
             static __device__ __forceinline__ thrust::tuple<float&, float&, float&, float&, float&> tie(float& val1, float4& val2)
diff --git a/modules/photo/src/denoising_gpu.cpp b/modules/photo/src/denoising_gpu.cpp
index 2c1fabe14a..ea0fed9291 100644
--- a/modules/photo/src/denoising_gpu.cpp
+++ b/modules/photo/src/denoising_gpu.cpp
@@ -56,20 +56,20 @@
 #endif
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 #if !defined (HAVE_CUDA) || !defined(HAVE_OPENCV_GPUARITHM) || !defined(HAVE_OPENCV_GPUIMGPROC)
 
-void cv::gpu::nonLocalMeans(const GpuMat&, GpuMat&, float, int, int, int, Stream&) { throw_no_cuda(); }
-void cv::gpu::FastNonLocalMeansDenoising::simpleMethod(const GpuMat&, GpuMat&, float, int, int, Stream&) { throw_no_cuda(); }
-void cv::gpu::FastNonLocalMeansDenoising::labMethod( const GpuMat&, GpuMat&, float, float, int, int, Stream&) { throw_no_cuda(); }
+void cv::cuda::nonLocalMeans(const GpuMat&, GpuMat&, float, int, int, int, Stream&) { throw_no_cuda(); }
+void cv::cuda::FastNonLocalMeansDenoising::simpleMethod(const GpuMat&, GpuMat&, float, int, int, Stream&) { throw_no_cuda(); }
+void cv::cuda::FastNonLocalMeansDenoising::labMethod( const GpuMat&, GpuMat&, float, float, int, int, Stream&) { throw_no_cuda(); }
 
 #else
 
 //////////////////////////////////////////////////////////////////////////////////
 //// Non Local Means Denosing (brute force)
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace imgproc
     {
@@ -78,9 +78,9 @@ namespace cv { namespace gpu { namespace cudev
     }
 }}}
 
-void cv::gpu::nonLocalMeans(const GpuMat& src, GpuMat& dst, float h, int search_window, int block_window, int borderMode, Stream& s)
+void cv::cuda::nonLocalMeans(const GpuMat& src, GpuMat& dst, float h, int search_window, int block_window, int borderMode, Stream& s)
 {
-    using cv::gpu::cudev::imgproc::nlm_bruteforce_gpu;
+    using cv::cuda::cudev::imgproc::nlm_bruteforce_gpu;
     typedef void (*func_t)(const PtrStepSzb& src, PtrStepSzb dst, int search_radius, int block_radius, float h, int borderMode, cudaStream_t stream);
 
     static const func_t funcs[4] = { nlm_bruteforce_gpu<uchar>, nlm_bruteforce_gpu<uchar2>, nlm_bruteforce_gpu<uchar3>, 0/*nlm_bruteforce_gpu<uchar4>,*/ };
@@ -97,7 +97,7 @@ void cv::gpu::nonLocalMeans(const GpuMat& src, GpuMat& dst, float h, int search_
     func(src, dst, search_window/2, block_window/2, h, borderMode, StreamAccessor::getStream(s));
 }
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     namespace imgproc
     {
@@ -112,24 +112,24 @@ namespace cv { namespace gpu { namespace cudev
      }
 }}}
 
-void cv::gpu::FastNonLocalMeansDenoising::simpleMethod(const GpuMat& src, GpuMat& dst, float h, int search_window, int block_window, Stream& s)
+void cv::cuda::FastNonLocalMeansDenoising::simpleMethod(const GpuMat& src, GpuMat& dst, float h, int search_window, int block_window, Stream& s)
 {
     CV_Assert(src.depth() == CV_8U && src.channels() < 4);
 
     int border_size = search_window/2 + block_window/2;
     Size esize = src.size() + Size(border_size, border_size) * 2;
 
-    cv::gpu::ensureSizeIsEnough(esize, CV_8UC3, extended_src_buffer);
+    cv::cuda::ensureSizeIsEnough(esize, CV_8UC3, extended_src_buffer);
     GpuMat extended_src(esize, src.type(), extended_src_buffer.ptr(), extended_src_buffer.step);
 
-    cv::gpu::copyMakeBorder(src, extended_src, border_size, border_size, border_size, border_size, cv::BORDER_DEFAULT, Scalar(), s);
+    cv::cuda::copyMakeBorder(src, extended_src, border_size, border_size, border_size, border_size, cv::BORDER_DEFAULT, Scalar(), s);
     GpuMat src_hdr = extended_src(Rect(Point2i(border_size, border_size), src.size()));
 
     int bcols, brows;
     cudev::imgproc::nln_fast_get_buffer_size(src_hdr, search_window, block_window, bcols, brows);
     buffer.create(brows, bcols, CV_32S);
 
-    using namespace cv::gpu::cudev::imgproc;
+    using namespace cv::cuda::cudev::imgproc;
     typedef void (*nlm_fast_t)(const PtrStepSzb&, PtrStepSzb, PtrStepi, int, int, float, cudaStream_t);
     static const nlm_fast_t funcs[] = { nlm_fast_gpu<uchar>, nlm_fast_gpu<uchar2>, nlm_fast_gpu<uchar3>, 0};
 
@@ -137,12 +137,12 @@ void cv::gpu::FastNonLocalMeansDenoising::simpleMethod(const GpuMat& src, GpuMat
     funcs[src.channels()-1](src_hdr, dst, buffer, search_window, block_window, h, StreamAccessor::getStream(s));
 }
 
-void cv::gpu::FastNonLocalMeansDenoising::labMethod( const GpuMat& src, GpuMat& dst, float h_luminance, float h_color, int search_window, int block_window, Stream& s)
+void cv::cuda::FastNonLocalMeansDenoising::labMethod( const GpuMat& src, GpuMat& dst, float h_luminance, float h_color, int search_window, int block_window, Stream& s)
 {
     CV_Assert(src.type() == CV_8UC3);
 
     lab.create(src.size(), src.type());
-    cv::gpu::cvtColor(src, lab, cv::COLOR_BGR2Lab, 0, s);
+    cv::cuda::cvtColor(src, lab, cv::COLOR_BGR2Lab, 0, s);
 
     l.create(src.size(), CV_8U);
     ab.create(src.size(), CV_8UC2);
@@ -152,7 +152,7 @@ void cv::gpu::FastNonLocalMeansDenoising::labMethod( const GpuMat& src, GpuMat&
     simpleMethod(ab, ab, h_color, search_window, block_window, s);
 
     cudev::imgproc::fnlm_merge_channels(l, ab, lab, StreamAccessor::getStream(s));
-    cv::gpu::cvtColor(lab, dst, cv::COLOR_Lab2BGR, 0, s);
+    cv::cuda::cvtColor(lab, dst, cv::COLOR_Lab2BGR, 0, s);
 }
 
 #endif
diff --git a/modules/photo/test/test_denoising_gpu.cpp b/modules/photo/test/test_denoising_gpu.cpp
index f8de826f2a..fe2d041810 100644
--- a/modules/photo/test/test_denoising_gpu.cpp
+++ b/modules/photo/test/test_denoising_gpu.cpp
@@ -57,7 +57,7 @@ using namespace cvtest;
 
 TEST(GPU_BruteForceNonLocalMeans, Regression)
 {
-    using cv::gpu::GpuMat;
+    using cv::cuda::GpuMat;
 
     cv::Mat bgr  = readImage("../gpu/denoising/lena_noised_gaussian_sigma=20_multi_0.png", cv::IMREAD_COLOR);
     ASSERT_FALSE(bgr.empty());
@@ -66,8 +66,8 @@ TEST(GPU_BruteForceNonLocalMeans, Regression)
     cv::cvtColor(bgr, gray, cv::COLOR_BGR2GRAY);
 
     GpuMat dbgr, dgray;
-    cv::gpu::nonLocalMeans(GpuMat(bgr),  dbgr, 20);
-    cv::gpu::nonLocalMeans(GpuMat(gray), dgray, 20);
+    cv::cuda::nonLocalMeans(GpuMat(bgr),  dbgr, 20);
+    cv::cuda::nonLocalMeans(GpuMat(gray), dgray, 20);
 
 #if 0
     dumpImage("../gpu/denoising/nlm_denoised_lena_bgr.png", cv::Mat(dbgr));
@@ -87,7 +87,7 @@ TEST(GPU_BruteForceNonLocalMeans, Regression)
 
 TEST(GPU_FastNonLocalMeans, Regression)
 {
-    using cv::gpu::GpuMat;
+    using cv::cuda::GpuMat;
 
     cv::Mat bgr  = readImage("../gpu/denoising/lena_noised_gaussian_sigma=20_multi_0.png", cv::IMREAD_COLOR);
     ASSERT_FALSE(bgr.empty());
@@ -96,7 +96,7 @@ TEST(GPU_FastNonLocalMeans, Regression)
     cv::cvtColor(bgr, gray, cv::COLOR_BGR2GRAY);
 
     GpuMat dbgr, dgray;
-    cv::gpu::FastNonLocalMeansDenoising fnlmd;
+    cv::cuda::FastNonLocalMeansDenoising fnlmd;
 
     fnlmd.simpleMethod(GpuMat(gray),  dgray, 20);
     fnlmd.labMethod(GpuMat(bgr),  dbgr, 20, 10);
diff --git a/modules/softcascade/include/opencv2/softcascade.hpp b/modules/softcascade/include/opencv2/softcascade.hpp
index 60f33aa052..0270080c8a 100644
--- a/modules/softcascade/include/opencv2/softcascade.hpp
+++ b/modules/softcascade/include/opencv2/softcascade.hpp
@@ -233,7 +233,7 @@ public:
     // Param frame is an input 3-channel bgr image.
     // Param channels is a GPU matrix of optionally shrinked channels
     // Param stream is stream is a high-level CUDA stream abstraction used for asynchronous execution.
-    virtual void apply(InputArray frame, OutputArray channels, cv::gpu::Stream& stream = cv::gpu::Stream::Null()) = 0;
+    virtual void apply(InputArray frame, OutputArray channels, cv::cuda::Stream& stream = cv::cuda::Stream::Null()) = 0;
 
     // Creates a specific preprocessor implementation.
     // Param shrinkage is a resizing factor. Resize is applied before the computing integral sum
@@ -280,7 +280,7 @@ public:
     // Param objects is an output array of Detections represented as GpuMat of detections (SCascade::Detection)
     //    The first element of the matrix is  actually a count of detections.
     // Param stream is stream is a high-level CUDA stream abstraction used for asynchronous execution
-    virtual void detect(InputArray image, InputArray rois, OutputArray objects, cv::gpu::Stream& stream = cv::gpu::Stream::Null()) const;
+    virtual void detect(InputArray image, InputArray rois, OutputArray objects, cv::cuda::Stream& stream = cv::cuda::Stream::Null()) const;
 
 private:
 
diff --git a/modules/softcascade/perf/perf_cuda_softcascade.cpp b/modules/softcascade/perf/perf_cuda_softcascade.cpp
index 09f1aad0bf..ab3993342a 100644
--- a/modules/softcascade/perf/perf_cuda_softcascade.cpp
+++ b/modules/softcascade/perf/perf_cuda_softcascade.cpp
@@ -37,7 +37,7 @@ namespace {
         }
     };
 
-    cv::Mat sortDetections(cv::gpu::GpuMat& objects)
+    cv::Mat sortDetections(cv::cuda::GpuMat& objects)
     {
         cv::Mat detections(objects);
 
@@ -64,7 +64,7 @@ RUN_GPU(SCascadeTest, detect)
 {
     cv::Mat cpu = cv::imread(getDataPath(get<1>(GetParam())));;
     ASSERT_FALSE(cpu.empty());
-    cv::gpu::GpuMat colored(cpu);
+    cv::cuda::GpuMat colored(cpu);
 
     cv::softcascade::SCascade cascade;
 
@@ -73,7 +73,7 @@ RUN_GPU(SCascadeTest, detect)
 
     ASSERT_TRUE(cascade.load(fs.getFirstTopLevelNode()));
 
-    cv::gpu::GpuMat objectBoxes(1, 10000 * sizeof(cv::softcascade::Detection), CV_8UC1), rois(colored.size(), CV_8UC1);
+    cv::cuda::GpuMat objectBoxes(1, 10000 * sizeof(cv::softcascade::Detection), CV_8UC1), rois(colored.size(), CV_8UC1);
     rois.setTo(1);
 
     cascade.detect(colored, rois, objectBoxes);
@@ -122,7 +122,7 @@ RUN_GPU(SCascadeTestRoi, detectInRoi)
 {
     cv::Mat cpu = cv::imread(getDataPath(get<1>(GetParam())));
     ASSERT_FALSE(cpu.empty());
-    cv::gpu::GpuMat colored(cpu);
+    cv::cuda::GpuMat colored(cpu);
 
     cv::softcascade::SCascade cascade;
 
@@ -131,7 +131,7 @@ RUN_GPU(SCascadeTestRoi, detectInRoi)
 
     ASSERT_TRUE(cascade.load(fs.getFirstTopLevelNode()));
 
-    cv::gpu::GpuMat objectBoxes(1, 16384 * 20, CV_8UC1), rois(colored.size(), CV_8UC1);
+    cv::cuda::GpuMat objectBoxes(1, 16384 * 20, CV_8UC1), rois(colored.size(), CV_8UC1);
     rois.setTo(0);
 
     int nroi = get<2>(GetParam());
@@ -139,7 +139,7 @@ RUN_GPU(SCascadeTestRoi, detectInRoi)
     for (int i = 0; i < nroi; ++i)
     {
         cv::Rect r = getFromTable(rng(10));
-        cv::gpu::GpuMat sub(rois, r);
+        cv::cuda::GpuMat sub(rois, r);
         sub.setTo(1);
     }
 
@@ -167,7 +167,7 @@ RUN_GPU(SCascadeTestRoi, detectEachRoi)
 {
     cv::Mat cpu = cv::imread(getDataPath(get<1>(GetParam())));
     ASSERT_FALSE(cpu.empty());
-    cv::gpu::GpuMat colored(cpu);
+    cv::cuda::GpuMat colored(cpu);
 
     cv::softcascade::SCascade cascade;
 
@@ -176,12 +176,12 @@ RUN_GPU(SCascadeTestRoi, detectEachRoi)
 
     ASSERT_TRUE(cascade.load(fs.getFirstTopLevelNode()));
 
-    cv::gpu::GpuMat objectBoxes(1, 16384 * 20, CV_8UC1), rois(colored.size(), CV_8UC1);
+    cv::cuda::GpuMat objectBoxes(1, 16384 * 20, CV_8UC1), rois(colored.size(), CV_8UC1);
     rois.setTo(0);
 
     int idx = get<2>(GetParam());
     cv::Rect r = getFromTable(idx);
-    cv::gpu::GpuMat sub(rois, r);
+    cv::cuda::GpuMat sub(rois, r);
     sub.setTo(1);
 
     cascade.detect(colored, rois, objectBoxes);
@@ -206,7 +206,7 @@ RUN_GPU(SCascadeTest, detectStream)
 {
     cv::Mat cpu = cv::imread(getDataPath(get<1>(GetParam())));
     ASSERT_FALSE(cpu.empty());
-    cv::gpu::GpuMat colored(cpu);
+    cv::cuda::GpuMat colored(cpu);
 
     cv::softcascade::SCascade cascade;
 
@@ -215,10 +215,10 @@ RUN_GPU(SCascadeTest, detectStream)
 
     ASSERT_TRUE(cascade.load(fs.getFirstTopLevelNode()));
 
-    cv::gpu::GpuMat objectBoxes(1, 10000 * sizeof(cv::softcascade::Detection), CV_8UC1), rois(colored.size(), CV_8UC1);
+    cv::cuda::GpuMat objectBoxes(1, 10000 * sizeof(cv::softcascade::Detection), CV_8UC1), rois(colored.size(), CV_8UC1);
     rois.setTo(1);
 
-    cv::gpu::Stream s;
+    cv::cuda::Stream s;
 
     cascade.detect(colored, rois, objectBoxes, s);
 
diff --git a/modules/softcascade/src/cuda/channels.cu b/modules/softcascade/src/cuda/channels.cu
index 78489e1f8b..bf922c599c 100644
--- a/modules/softcascade/src/cuda/channels.cu
+++ b/modules/softcascade/src/cuda/channels.cu
@@ -59,7 +59,7 @@ namespace cv { namespace softcascade { namespace cudev
         return bytes;
     }
 
-    __global__ void shfl_integral_horizontal(const cv::gpu::PtrStep<uint4> img, cv::gpu::PtrStep<uint4> integral)
+    __global__ void shfl_integral_horizontal(const cv::cuda::PtrStep<uint4> img, cv::cuda::PtrStep<uint4> integral)
     {
     #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 300)
         __shared__ int sums[128];
@@ -299,7 +299,7 @@ namespace cv { namespace softcascade { namespace cudev
     // The final set of sums from the block is then propgated, with the block
     // computing "down" the image and adding the running sum to the local
     // block sums.
-    __global__ void shfl_integral_vertical(cv::gpu::PtrStepSz<unsigned int> integral)
+    __global__ void shfl_integral_vertical(cv::cuda::PtrStepSz<unsigned int> integral)
     {
     #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 300)
         __shared__ unsigned int sums[32][9];
@@ -357,7 +357,7 @@ namespace cv { namespace softcascade { namespace cudev
     #endif
     }
 
-    void shfl_integral(const cv::gpu::PtrStepSzb& img, cv::gpu::PtrStepSz<unsigned int> integral, cudaStream_t stream)
+    void shfl_integral(const cv::cuda::PtrStepSzb& img, cv::cuda::PtrStepSz<unsigned int> integral, cudaStream_t stream)
     {
         {
             // each thread handles 16 values, use 1 block/row
@@ -369,13 +369,13 @@ namespace cv { namespace softcascade { namespace cudev
 
             cudaSafeCall( cudaFuncSetCacheConfig(shfl_integral_horizontal, cudaFuncCachePreferL1) );
 
-            shfl_integral_horizontal<<<grid, block, 0, stream>>>((const cv::gpu::PtrStepSz<uint4>) img, (cv::gpu::PtrStepSz<uint4>) integral);
+            shfl_integral_horizontal<<<grid, block, 0, stream>>>((const cv::cuda::PtrStepSz<uint4>) img, (cv::cuda::PtrStepSz<uint4>) integral);
             cudaSafeCall( cudaGetLastError() );
         }
 
         {
             const dim3 block(32, 8);
-            const dim3 grid(cv::gpu::cudev::divUp(integral.cols, block.x), 1);
+            const dim3 grid(cv::cuda::cudev::divUp(integral.cols, block.x), 1);
 
             shfl_integral_vertical<<<grid, block, 0, stream>>>(integral);
             cudaSafeCall( cudaGetLastError() );
@@ -385,7 +385,7 @@ namespace cv { namespace softcascade { namespace cudev
             cudaSafeCall( cudaDeviceSynchronize() );
     }
 
-    __global__ void shfl_integral_vertical(cv::gpu::PtrStepSz<unsigned int> buffer, cv::gpu::PtrStepSz<unsigned int> integral)
+    __global__ void shfl_integral_vertical(cv::cuda::PtrStepSz<unsigned int> buffer, cv::cuda::PtrStepSz<unsigned int> integral)
     {
     #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 300)
         __shared__ unsigned int sums[32][9];
@@ -445,7 +445,7 @@ namespace cv { namespace softcascade { namespace cudev
     }
 
     // used for frame preprocessing before Soft Cascade evaluation: no synchronization needed
-    void shfl_integral_gpu_buffered(cv::gpu::PtrStepSzb img, cv::gpu::PtrStepSz<uint4> buffer, cv::gpu::PtrStepSz<unsigned int> integral,
+    void shfl_integral_gpu_buffered(cv::cuda::PtrStepSzb img, cv::cuda::PtrStepSz<uint4> buffer, cv::cuda::PtrStepSz<unsigned int> integral,
         int blockStep, cudaStream_t stream)
     {
         {
@@ -454,15 +454,15 @@ namespace cv { namespace softcascade { namespace cudev
 
             cudaSafeCall( cudaFuncSetCacheConfig(shfl_integral_horizontal, cudaFuncCachePreferL1) );
 
-            shfl_integral_horizontal<<<grid, block, 0, stream>>>((cv::gpu::PtrStepSz<uint4>) img, buffer);
+            shfl_integral_horizontal<<<grid, block, 0, stream>>>((cv::cuda::PtrStepSz<uint4>) img, buffer);
             cudaSafeCall( cudaGetLastError() );
         }
 
         {
             const dim3 block(32, 8);
-            const dim3 grid(cv::gpu::cudev::divUp(integral.cols, block.x), 1);
+            const dim3 grid(cv::cuda::cudev::divUp(integral.cols, block.x), 1);
 
-            shfl_integral_vertical<<<grid, block, 0, stream>>>((cv::gpu::PtrStepSz<unsigned int>)buffer, integral);
+            shfl_integral_vertical<<<grid, block, 0, stream>>>((cv::cuda::PtrStepSz<unsigned int>)buffer, integral);
             cudaSafeCall( cudaGetLastError() );
         }
     }
@@ -486,7 +486,7 @@ namespace cv { namespace softcascade { namespace cudev
         return CV_DESCALE((unsigned int)(b * B2Y + g * G2Y + r * R2Y), yuv_shift);
     }
 
-    __global__ void device_transform(const cv::gpu::PtrStepSz<uchar3> bgr, cv::gpu::PtrStepSzb gray)
+    __global__ void device_transform(const cv::cuda::PtrStepSz<uchar3> bgr, cv::cuda::PtrStepSzb gray)
     {
         const int y = blockIdx.y * blockDim.y + threadIdx.y;
         const int x = blockIdx.x * blockDim.x + threadIdx.x;
@@ -497,10 +497,10 @@ namespace cv { namespace softcascade { namespace cudev
     }
 
     ///////
-    void transform(const cv::gpu::PtrStepSz<uchar3>& bgr, cv::gpu::PtrStepSzb gray)
+    void transform(const cv::cuda::PtrStepSz<uchar3>& bgr, cv::cuda::PtrStepSzb gray)
     {
         const dim3 block(32, 8);
-        const dim3 grid(cv::gpu::cudev::divUp(bgr.cols, block.x), cv::gpu::cudev::divUp(bgr.rows, block.y));
+        const dim3 grid(cv::cuda::cudev::divUp(bgr.cols, block.x), cv::cuda::cudev::divUp(bgr.rows, block.y));
         device_transform<<<grid, block>>>(bgr, gray);
         cudaSafeCall(cudaDeviceSynchronize());
     }
diff --git a/modules/softcascade/src/cuda/icf-sc.cu b/modules/softcascade/src/cuda/icf-sc.cu
index b119209dbe..8781734667 100644
--- a/modules/softcascade/src/cuda/icf-sc.cu
+++ b/modules/softcascade/src/cuda/icf-sc.cu
@@ -76,7 +76,7 @@ typedef unsigned char uchar;
         shrank[ y * outPitch + x] = shrink<FACTOR>(ptr, inPitch, y, x);
     }
 
-    void shrink(const cv::gpu::PtrStepSzb& channels, cv::gpu::PtrStepSzb shrunk)
+    void shrink(const cv::cuda::PtrStepSzb& channels, cv::cuda::PtrStepSzb shrunk)
     {
         dim3 block(32, 8);
         dim3 grid(shrunk.cols / 32, shrunk.rows / 8);
@@ -124,7 +124,7 @@ typedef unsigned char uchar;
         luvg[luvgPitch * (y + 2 * 480) + x] = v;
     }
 
-    void bgr2Luv(const cv::gpu::PtrStepSzb& bgr, cv::gpu::PtrStepSzb luv)
+    void bgr2Luv(const cv::cuda::PtrStepSzb& bgr, cv::cuda::PtrStepSzb luv)
     {
         dim3 block(32, 8);
         dim3 grid(bgr.cols / 32, bgr.rows / 8);
@@ -206,7 +206,7 @@ typedef unsigned char uchar;
     texture<uchar,  cudaTextureType2D, cudaReadModeElementType> tgray;
 
     template<bool isDefaultNum>
-    __global__ void gray2hog(cv::gpu::PtrStepSzb mag)
+    __global__ void gray2hog(cv::cuda::PtrStepSzb mag)
     {
         const int x = blockIdx.x * blockDim.x + threadIdx.x;
         const int y = blockIdx.y * blockDim.y + threadIdx.y;
@@ -221,7 +221,7 @@ typedef unsigned char uchar;
         mag( 480 * fast_angle_bin<isDefaultNum>(dy, dx) + y, x) = cmag;
     }
 
-    void gray2hog(const cv::gpu::PtrStepSzb& gray, cv::gpu::PtrStepSzb mag, const int bins)
+    void gray2hog(const cv::cuda::PtrStepSzb& gray, cv::cuda::PtrStepSzb mag, const int bins)
     {
         dim3 block(32, 8);
         dim3 grid(gray.cols / 32, gray.rows / 8);
@@ -250,7 +250,7 @@ typedef unsigned char uchar;
         hog[((fh * bin) + y) * hogPitch + x] = val;
     }
 
-    void fillBins(cv::gpu::PtrStepSzb hogluv, const cv::gpu::PtrStepSzf& nangle,
+    void fillBins(cv::cuda::PtrStepSzb hogluv, const cv::cuda::PtrStepSzf& nangle,
                   const int fw,  const int fh, const int bins, cudaStream_t stream )
     {
         const uchar* mag = (const uchar*)hogluv.ptr(fh * bins);
@@ -324,8 +324,8 @@ typedef unsigned char uchar;
         }
     }
 
-    void suppress(const cv::gpu::PtrStepSzb& objects, cv::gpu::PtrStepSzb overlaps, cv::gpu::PtrStepSzi ndetections,
-        cv::gpu::PtrStepSzb suppressed, cudaStream_t stream)
+    void suppress(const cv::cuda::PtrStepSzb& objects, cv::cuda::PtrStepSzb overlaps, cv::cuda::PtrStepSzi ndetections,
+        cv::cuda::PtrStepSzb suppressed, cudaStream_t stream)
     {
         int block = 192;
         int grid = 1;
@@ -527,8 +527,8 @@ __global__ void soft_cascade(const CascadeInvoker<Policy> invoker, Detection* ob
 }
 
 template<typename Policy>
-void CascadeInvoker<Policy>::operator()(const cv::gpu::PtrStepSzb& roi, const cv::gpu::PtrStepSzi& hogluv,
-    cv::gpu::PtrStepSz<uchar4> objects, const int downscales, const cudaStream_t& stream) const
+void CascadeInvoker<Policy>::operator()(const cv::cuda::PtrStepSzb& roi, const cv::cuda::PtrStepSzi& hogluv,
+    cv::cuda::PtrStepSz<uchar4> objects, const int downscales, const cudaStream_t& stream) const
 {
     int fw = roi.rows;
     int fh = roi.cols;
@@ -560,7 +560,7 @@ void CascadeInvoker<Policy>::operator()(const cv::gpu::PtrStepSzb& roi, const cv
     }
 }
 
-template void CascadeInvoker<GK107PolicyX4>::operator()(const cv::gpu::PtrStepSzb& roi, const cv::gpu::PtrStepSzi& hogluv,
-    cv::gpu::PtrStepSz<uchar4> objects, const int downscales, const cudaStream_t& stream) const;
+template void CascadeInvoker<GK107PolicyX4>::operator()(const cv::cuda::PtrStepSzb& roi, const cv::cuda::PtrStepSzi& hogluv,
+    cv::cuda::PtrStepSz<uchar4> objects, const int downscales, const cudaStream_t& stream) const;
 
 }}}
diff --git a/modules/softcascade/src/cuda_invoker.hpp b/modules/softcascade/src/cuda_invoker.hpp
index 92f30d01d2..bd478ee286 100644
--- a/modules/softcascade/src/cuda_invoker.hpp
+++ b/modules/softcascade/src/cuda_invoker.hpp
@@ -128,8 +128,8 @@ struct CascadeInvoker
 {
     CascadeInvoker(): levels(0), stages(0), nodes(0), leaves(0), scales(0) {}
 
-    CascadeInvoker(const cv::gpu::PtrStepSzb& _levels, const cv::gpu::PtrStepSzf& _stages,
-                   const cv::gpu::PtrStepSzb& _nodes,  const cv::gpu::PtrStepSzf& _leaves)
+    CascadeInvoker(const cv::cuda::PtrStepSzb& _levels, const cv::cuda::PtrStepSzf& _stages,
+                   const cv::cuda::PtrStepSzb& _nodes,  const cv::cuda::PtrStepSzf& _leaves)
     : levels((const Level*)_levels.ptr()),
       stages((const float*)_stages.ptr()),
       nodes((const Node*)_nodes.ptr()), leaves((const float*)_leaves.ptr()),
@@ -144,7 +144,7 @@ struct CascadeInvoker
 
     int scales;
 
-    void operator()(const cv::gpu::PtrStepSzb& roi, const cv::gpu::PtrStepSzi& hogluv, cv::gpu::PtrStepSz<uchar4> objects,
+    void operator()(const cv::cuda::PtrStepSzb& roi, const cv::cuda::PtrStepSzi& hogluv, cv::cuda::PtrStepSz<uchar4> objects,
         const int downscales, const cudaStream_t& stream = 0) const;
 
     template<bool isUp>
diff --git a/modules/softcascade/src/detector_cuda.cpp b/modules/softcascade/src/detector_cuda.cpp
index a3a1512e24..6747c232ca 100644
--- a/modules/softcascade/src/detector_cuda.cpp
+++ b/modules/softcascade/src/detector_cuda.cpp
@@ -50,7 +50,7 @@ cv::softcascade::SCascade::~SCascade() { throw_no_cuda(); }
 
 bool cv::softcascade::SCascade::load(const FileNode&) { throw_no_cuda(); return false;}
 
-void cv::softcascade::SCascade::detect(InputArray, InputArray, OutputArray, cv::gpu::Stream&) const { throw_no_cuda(); }
+void cv::softcascade::SCascade::detect(InputArray, InputArray, OutputArray, cv::cuda::Stream&) const { throw_no_cuda(); }
 
 void cv::softcascade::SCascade::read(const FileNode& fn) { Algorithm::read(fn); }
 
@@ -85,18 +85,18 @@ cv::softcascade::cudev::Level::Level(int idx, const Octave& oct, const float sca
 
 namespace cv { namespace softcascade { namespace cudev {
 
-    void fillBins(cv::gpu::PtrStepSzb hogluv, const cv::gpu::PtrStepSzf& nangle,
+    void fillBins(cv::cuda::PtrStepSzb hogluv, const cv::cuda::PtrStepSzf& nangle,
         const int fw, const int fh, const int bins, cudaStream_t stream);
 
-    void suppress(const cv::gpu::PtrStepSzb& objects, cv::gpu::PtrStepSzb overlaps, cv::gpu::PtrStepSzi ndetections,
-        cv::gpu::PtrStepSzb suppressed, cudaStream_t stream);
+    void suppress(const cv::cuda::PtrStepSzb& objects, cv::cuda::PtrStepSzb overlaps, cv::cuda::PtrStepSzi ndetections,
+        cv::cuda::PtrStepSzb suppressed, cudaStream_t stream);
 
-    void bgr2Luv(const cv::gpu::PtrStepSzb& bgr, cv::gpu::PtrStepSzb luv);
-    void transform(const cv::gpu::PtrStepSz<uchar3>& bgr, cv::gpu::PtrStepSzb gray);
-    void gray2hog(const cv::gpu::PtrStepSzb& gray, cv::gpu::PtrStepSzb mag, const int bins);
-    void shrink(const cv::gpu::PtrStepSzb& channels, cv::gpu::PtrStepSzb shrunk);
+    void bgr2Luv(const cv::cuda::PtrStepSzb& bgr, cv::cuda::PtrStepSzb luv);
+    void transform(const cv::cuda::PtrStepSz<uchar3>& bgr, cv::cuda::PtrStepSzb gray);
+    void gray2hog(const cv::cuda::PtrStepSzb& gray, cv::cuda::PtrStepSzb mag, const int bins);
+    void shrink(const cv::cuda::PtrStepSzb& channels, cv::cuda::PtrStepSzb shrunk);
 
-    void shfl_integral(const cv::gpu::PtrStepSzb& img, cv::gpu::PtrStepSz<unsigned int> integral, cudaStream_t stream);
+    void shfl_integral(const cv::cuda::PtrStepSzb& img, cv::cuda::PtrStepSz<unsigned int> integral, cudaStream_t stream);
 }}}
 
 struct cv::softcascade::SCascade::Fields
@@ -333,7 +333,7 @@ struct cv::softcascade::SCascade::Fields
         preprocessor = ChannelsProcessor::create(shrinkage, 6, method);
     }
 
-    void detect(cv::gpu::GpuMat& objects, cv::gpu::Stream& s) const
+    void detect(cv::cuda::GpuMat& objects, cv::cuda::Stream& s) const
     {
         objects.setTo(Scalar::all(0), s);
 
@@ -342,19 +342,19 @@ struct cv::softcascade::SCascade::Fields
         cudev::CascadeInvoker<cudev::GK107PolicyX4> invoker
         = cudev::CascadeInvoker<cudev::GK107PolicyX4>(levels, stages, nodes, leaves);
 
-        cudaStream_t stream = cv::gpu::StreamAccessor::getStream(s);
+        cudaStream_t stream = cv::cuda::StreamAccessor::getStream(s);
         invoker(mask, hogluv, objects, downscales, stream);
     }
 
-    void suppress(cv::gpu::GpuMat& objects, cv::gpu::Stream& s)
+    void suppress(cv::cuda::GpuMat& objects, cv::cuda::Stream& s)
     {
-        cv::gpu::GpuMat ndetections = cv::gpu::GpuMat(objects, cv::Rect(0, 0, sizeof(Detection), 1));
+        cv::cuda::GpuMat ndetections = cv::cuda::GpuMat(objects, cv::Rect(0, 0, sizeof(Detection), 1));
         ensureSizeIsEnough(objects.rows, objects.cols, CV_8UC1, overlaps);
 
         overlaps.setTo(0, s);
         suppressed.setTo(0, s);
 
-        cudaStream_t stream = cv::gpu::StreamAccessor::getStream(s);
+        cudaStream_t stream = cv::cuda::StreamAccessor::getStream(s);
         cudev::suppress(objects, overlaps, ndetections, suppressed, stream);
     }
 
@@ -398,34 +398,34 @@ public:
 
 
     // 160x120x10
-    cv::gpu::GpuMat shrunk;
+    cv::cuda::GpuMat shrunk;
 
     // temporal mat for integral
-    cv::gpu::GpuMat integralBuffer;
+    cv::cuda::GpuMat integralBuffer;
 
     // 161x121x10
-    cv::gpu::GpuMat hogluv;
+    cv::cuda::GpuMat hogluv;
 
 
     // used for suppression
-    cv::gpu::GpuMat suppressed;
+    cv::cuda::GpuMat suppressed;
     // used for area overlap computing during
-    cv::gpu::GpuMat overlaps;
+    cv::cuda::GpuMat overlaps;
 
 
     // Cascade from xml
-    cv::gpu::GpuMat octaves;
-    cv::gpu::GpuMat stages;
-    cv::gpu::GpuMat nodes;
-    cv::gpu::GpuMat leaves;
-    cv::gpu::GpuMat levels;
+    cv::cuda::GpuMat octaves;
+    cv::cuda::GpuMat stages;
+    cv::cuda::GpuMat nodes;
+    cv::cuda::GpuMat leaves;
+    cv::cuda::GpuMat levels;
 
 
     // For ROI
-    cv::gpu::GpuMat mask;
-    cv::gpu::GpuMat genRoiTmp;
+    cv::cuda::GpuMat mask;
+    cv::cuda::GpuMat genRoiTmp;
 
-//     cv::gpu::GpuMat collected;
+//     cv::cuda::GpuMat collected;
 
 
     std::vector<cudev::Octave> voctaves;
@@ -458,18 +458,18 @@ bool cv::softcascade::SCascade::load(const FileNode& fn)
 
 namespace {
 
-void integral(const cv::gpu::GpuMat& src, cv::gpu::GpuMat& sum, cv::gpu::GpuMat& buffer, cv::gpu::Stream& s)
+void integral(const cv::cuda::GpuMat& src, cv::cuda::GpuMat& sum, cv::cuda::GpuMat& buffer, cv::cuda::Stream& s)
 {
     CV_Assert(src.type() == CV_8UC1);
 
-    cudaStream_t stream = cv::gpu::StreamAccessor::getStream(s);
+    cudaStream_t stream = cv::cuda::StreamAccessor::getStream(s);
 
     cv::Size whole;
     cv::Point offset;
 
     src.locateROI(whole, offset);
 
-    if (cv::gpu::deviceSupports(cv::gpu::WARP_SHUFFLE_FUNCTIONS) && src.cols <= 2048
+    if (cv::cuda::deviceSupports(cv::cuda::WARP_SHUFFLE_FUNCTIONS) && src.cols <= 2048
         && offset.x % 16 == 0 && ((src.cols + 63) / 64) * 64 <= (static_cast<int>(src.step) - offset.x))
     {
         ensureSizeIsEnough(((src.rows + 7) / 8) * 8, ((src.cols + 63) / 64) * 64, CV_32SC1, buffer);
@@ -479,8 +479,8 @@ void integral(const cv::gpu::GpuMat& src, cv::gpu::GpuMat& sum, cv::gpu::GpuMat&
         sum.create(src.rows + 1, src.cols + 1, CV_32SC1);
         sum.setTo(cv::Scalar::all(0), s);
 
-        cv::gpu::GpuMat inner = sum(cv::Rect(1, 1, src.cols, src.rows));
-        cv::gpu::GpuMat res = buffer(cv::Rect(0, 0, src.cols, src.rows));
+        cv::cuda::GpuMat inner = sum(cv::Rect(1, 1, src.cols, src.rows));
+        cv::cuda::GpuMat res = buffer(cv::Rect(0, 0, src.cols, src.rows));
 
         res.copyTo(inner, s);
     }
@@ -489,7 +489,7 @@ void integral(const cv::gpu::GpuMat& src, cv::gpu::GpuMat& sum, cv::gpu::GpuMat&
 
 }
 
-void cv::softcascade::SCascade::detect(InputArray _image, InputArray _rois, OutputArray _objects, cv::gpu::Stream& s) const
+void cv::softcascade::SCascade::detect(InputArray _image, InputArray _rois, OutputArray _objects, cv::cuda::Stream& s) const
 {
     CV_Assert(fields);
 
@@ -497,11 +497,11 @@ void cv::softcascade::SCascade::detect(InputArray _image, InputArray _rois, Outp
     int type = _image.type();
     CV_Assert(type == CV_8UC3 || type == CV_32SC1 || (!_rois.empty()));
 
-    const cv::gpu::GpuMat image = _image.getGpuMat();
+    const cv::cuda::GpuMat image = _image.getGpuMat();
 
     if (_objects.empty()) _objects.create(1, 4096 * sizeof(Detection), CV_8UC1);
 
-    cv::gpu::GpuMat rois = _rois.getGpuMat(), objects = _objects.getGpuMat();
+    cv::cuda::GpuMat rois = _rois.getGpuMat(), objects = _objects.getGpuMat();
 
     /// roi
     Fields& flds = *fields;
@@ -510,7 +510,7 @@ void cv::softcascade::SCascade::detect(InputArray _image, InputArray _rois, Outp
     flds.mask.create( rois.cols / shr, rois.rows / shr, rois.type());
 
     cudev::shrink(rois, flds.mask);
-    //cv::gpu::transpose(flds.genRoiTmp, flds.mask, s);
+    //cv::cuda::transpose(flds.genRoiTmp, flds.mask, s);
 
     if (type == CV_8UC3)
     {
@@ -531,7 +531,7 @@ void cv::softcascade::SCascade::detect(InputArray _image, InputArray _rois, Outp
 
     if ( (flags && NMS_MASK) != NO_REJECT)
     {
-        cv::gpu::GpuMat spr(objects, cv::Rect(0, 0, flds.suppressed.cols, flds.suppressed.rows));
+        cv::cuda::GpuMat spr(objects, cv::Rect(0, 0, flds.suppressed.cols, flds.suppressed.rows));
         flds.suppress(objects, s);
         flds.suppressed.copyTo(spr);
     }
@@ -546,10 +546,10 @@ namespace {
 
 using cv::InputArray;
 using cv::OutputArray;
-using cv::gpu::Stream;
-using cv::gpu::GpuMat;
+using cv::cuda::Stream;
+using cv::cuda::GpuMat;
 
-inline void setZero(cv::gpu::GpuMat& m, cv::gpu::Stream& s)
+inline void setZero(cv::cuda::GpuMat& m, cv::cuda::Stream& s)
 {
     m.setTo(0, s);
 }
@@ -559,22 +559,22 @@ struct SeparablePreprocessor : public cv::softcascade::ChannelsProcessor
     SeparablePreprocessor(const int s, const int b) : cv::softcascade::ChannelsProcessor(), shrinkage(s), bins(b) {}
     virtual ~SeparablePreprocessor() {}
 
-    virtual void apply(InputArray _frame, OutputArray _shrunk, cv::gpu::Stream& s = cv::gpu::Stream::Null())
+    virtual void apply(InputArray _frame, OutputArray _shrunk, cv::cuda::Stream& s = cv::cuda::Stream::Null())
     {
         bgr = _frame.getGpuMat();
-        //cv::gpu::GaussianBlur(frame, bgr, cv::Size(3, 3), -1.0);
+        //cv::cuda::GaussianBlur(frame, bgr, cv::Size(3, 3), -1.0);
 
         _shrunk.create(bgr.rows * (4 + bins) / shrinkage, bgr.cols / shrinkage, CV_8UC1);
-        cv::gpu::GpuMat shrunk = _shrunk.getGpuMat();
+        cv::cuda::GpuMat shrunk = _shrunk.getGpuMat();
 
         channels.create(bgr.rows * (4 + bins), bgr.cols, CV_8UC1);
         setZero(channels, s);
 
         gray.create(bgr.size(), CV_8UC1);
-        cv::softcascade::cudev::transform(bgr, gray); //cv::gpu::cvtColor(bgr, gray, CV_BGR2GRAY);
+        cv::softcascade::cudev::transform(bgr, gray); //cv::cuda::cvtColor(bgr, gray, CV_BGR2GRAY);
         cv::softcascade::cudev::gray2hog(gray, channels(cv::Rect(0, 0, bgr.cols, bgr.rows * (bins + 1))), bins);
 
-        cv::gpu::GpuMat luv(channels, cv::Rect(0, bgr.rows * (bins + 1), bgr.cols, bgr.rows * 3));
+        cv::cuda::GpuMat luv(channels, cv::Rect(0, bgr.rows * (bins + 1), bgr.cols, bgr.rows * 3));
         cv::softcascade::cudev::bgr2Luv(bgr, luv);
         cv::softcascade::cudev::shrink(channels, shrunk);
     }
@@ -583,9 +583,9 @@ private:
     const int shrinkage;
     const int bins;
 
-    cv::gpu::GpuMat bgr;
-    cv::gpu::GpuMat gray;
-    cv::gpu::GpuMat channels;
+    cv::cuda::GpuMat bgr;
+    cv::cuda::GpuMat gray;
+    cv::cuda::GpuMat channels;
     SeparablePreprocessor& operator=( const SeparablePreprocessor& );
 };
 
diff --git a/modules/softcascade/test/test_cuda_softcascade.cpp b/modules/softcascade/test/test_cuda_softcascade.cpp
index 5973d25f21..548d7a5752 100644
--- a/modules/softcascade/test/test_cuda_softcascade.cpp
+++ b/modules/softcascade/test/test_cuda_softcascade.cpp
@@ -156,11 +156,11 @@ namespace
 #endif
 }
 
-class SCascadeTestRoi : public ::testing::TestWithParam<std::tr1::tuple<cv::gpu::DeviceInfo, std::string, std::string, int> >
+class SCascadeTestRoi : public ::testing::TestWithParam<std::tr1::tuple<cv::cuda::DeviceInfo, std::string, std::string, int> >
 {
     virtual void SetUp()
     {
-        cv::gpu::setDevice(get<0>(GetParam()).deviceID());
+        cv::cuda::setDevice(get<0>(GetParam()).deviceID());
     }
 };
 
@@ -176,7 +176,7 @@ TEST_P(SCascadeTestRoi, Detect)
 
     ASSERT_TRUE(cascade.load(fs.getFirstTopLevelNode()));
 
-    cv::gpu::GpuMat colored(coloredCpu), objectBoxes(1, 16384, CV_8UC1), rois(colored.size(), CV_8UC1);
+    cv::cuda::GpuMat colored(coloredCpu), objectBoxes(1, 16384, CV_8UC1), rois(colored.size(), CV_8UC1);
     rois.setTo(0);
 
     int nroi = get<3>(GetParam());
@@ -185,7 +185,7 @@ TEST_P(SCascadeTestRoi, Detect)
     for (int i = 0; i < nroi; ++i)
     {
         cv::Rect r = getFromTable(rng(10));
-        cv::gpu::GpuMat sub(rois, r);
+        cv::cuda::GpuMat sub(rois, r);
         sub.setTo(1);
         cv::rectangle(result, r, cv::Scalar(0, 0, 255, 255), 1);
     }
@@ -230,7 +230,7 @@ struct Fixture
 };
 }
 
-typedef std::tr1::tuple<cv::gpu::DeviceInfo, Fixture> SCascadeTestAllFixture;
+typedef std::tr1::tuple<cv::cuda::DeviceInfo, Fixture> SCascadeTestAllFixture;
 class SCascadeTestAll : public ::testing::TestWithParam<SCascadeTestAllFixture>
 {
 protected:
@@ -239,7 +239,7 @@ protected:
 
     virtual void SetUp()
     {
-        cv::gpu::setDevice(get<0>(GetParam()).deviceID());
+        cv::cuda::setDevice(get<0>(GetParam()).deviceID());
         xml = path(get<1>(GetParam()).path);
         expected = get<1>(GetParam()).expected;
     }
@@ -257,7 +257,7 @@ TEST_P(SCascadeTestAll, detect)
     cv::Mat coloredCpu = cv::imread(path("images/image_00000000_0.png"));
     ASSERT_FALSE(coloredCpu.empty());
 
-    cv::gpu::GpuMat colored(coloredCpu), objectBoxes, rois(colored.size(), CV_8UC1);
+    cv::cuda::GpuMat colored(coloredCpu), objectBoxes, rois(colored.size(), CV_8UC1);
     rois.setTo(1);
 
     cascade.detect(colored, rois, objectBoxes);
@@ -294,10 +294,10 @@ TEST_P(SCascadeTestAll, detectStream)
     cv::Mat coloredCpu = cv::imread(path("images/image_00000000_0.png"));
     ASSERT_FALSE(coloredCpu.empty());
 
-    cv::gpu::GpuMat colored(coloredCpu), objectBoxes(1, 100000, CV_8UC1), rois(colored.size(), CV_8UC1);
+    cv::cuda::GpuMat colored(coloredCpu), objectBoxes(1, 100000, CV_8UC1), rois(colored.size(), CV_8UC1);
     rois.setTo(cv::Scalar::all(1));
 
-    cv::gpu::Stream s;
+    cv::cuda::Stream s;
 
     objectBoxes.setTo(0);
     cascade.detect(colored, rois, objectBoxes, s);
diff --git a/modules/softcascade/test/utility.cpp b/modules/softcascade/test/utility.cpp
index b308f416c5..47b3cf1c6e 100644
--- a/modules/softcascade/test/utility.cpp
+++ b/modules/softcascade/test/utility.cpp
@@ -46,7 +46,7 @@
 
 using namespace std;
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 using namespace cvtest;
 using namespace testing;
 using namespace testing::internal;
diff --git a/modules/softcascade/test/utility.hpp b/modules/softcascade/test/utility.hpp
index 44b0fabf29..f4fc9e80c1 100644
--- a/modules/softcascade/test/utility.hpp
+++ b/modules/softcascade/test/utility.hpp
@@ -49,7 +49,7 @@
 //////////////////////////////////////////////////////////////////////
 // Gpu devices
 //! return true if device supports specified feature and gpu module was built with support the feature.
-bool supportFeature(const cv::gpu::DeviceInfo& info, cv::gpu::FeatureSet feature);
+bool supportFeature(const cv::cuda::DeviceInfo& info, cv::cuda::FeatureSet feature);
 
 
 #if defined(HAVE_CUDA)
@@ -61,15 +61,15 @@ public:
     void load(int i);
     void loadAll();
 
-    const std::vector<cv::gpu::DeviceInfo>& values() const { return devices_; }
+    const std::vector<cv::cuda::DeviceInfo>& values() const { return devices_; }
 
 private:
-    std::vector<cv::gpu::DeviceInfo> devices_;
+    std::vector<cv::cuda::DeviceInfo> devices_;
     DeviceManager() {loadAll();}
 };
 # define ALL_DEVICES testing::ValuesIn(DeviceManager::instance().values())
 #else
-# define ALL_DEVICES testing::ValuesIn(std::vector<cv::gpu::DeviceInfo>())
+# define ALL_DEVICES testing::ValuesIn(std::vector<cv::cuda::DeviceInfo>())
 #endif
 
 #endif // __OPENCV_GPU_TEST_UTILITY_HPP__
diff --git a/modules/stitching/include/opencv2/stitching/detail/matchers.hpp b/modules/stitching/include/opencv2/stitching/detail/matchers.hpp
index 0b24e784f5..d1022e93e0 100644
--- a/modules/stitching/include/opencv2/stitching/detail/matchers.hpp
+++ b/modules/stitching/include/opencv2/stitching/detail/matchers.hpp
@@ -116,11 +116,11 @@ public:
 private:
     void find(const Mat &image, ImageFeatures &features);
 
-    gpu::GpuMat image_;
-    gpu::GpuMat gray_image_;
-    gpu::SURF_GPU surf_;
-    gpu::GpuMat keypoints_;
-    gpu::GpuMat descriptors_;
+    cuda::GpuMat image_;
+    cuda::GpuMat gray_image_;
+    cuda::SURF_GPU surf_;
+    cuda::GpuMat keypoints_;
+    cuda::GpuMat descriptors_;
     int num_octaves_, num_layers_;
     int num_octaves_descr_, num_layers_descr_;
 };
diff --git a/modules/stitching/include/opencv2/stitching/detail/warpers.hpp b/modules/stitching/include/opencv2/stitching/detail/warpers.hpp
index 5cd8f5141c..ff7d7a29b4 100644
--- a/modules/stitching/include/opencv2/stitching/detail/warpers.hpp
+++ b/modules/stitching/include/opencv2/stitching/detail/warpers.hpp
@@ -367,18 +367,18 @@ public:
         return result;
     }
 
-    Rect buildMaps(Size src_size, const Mat &K, const Mat &R, gpu::GpuMat &xmap, gpu::GpuMat &ymap);
+    Rect buildMaps(Size src_size, const Mat &K, const Mat &R, cuda::GpuMat &xmap, cuda::GpuMat &ymap);
 
-    Rect buildMaps(Size src_size, const Mat &K, const Mat &R, const Mat &T, gpu::GpuMat &xmap, gpu::GpuMat &ymap);
+    Rect buildMaps(Size src_size, const Mat &K, const Mat &R, const Mat &T, cuda::GpuMat &xmap, cuda::GpuMat &ymap);
 
-    Point warp(const gpu::GpuMat &src, const Mat &K, const Mat &R, int interp_mode, int border_mode,
-               gpu::GpuMat &dst);
+    Point warp(const cuda::GpuMat &src, const Mat &K, const Mat &R, int interp_mode, int border_mode,
+               cuda::GpuMat &dst);
 
-    Point warp(const gpu::GpuMat &src, const Mat &K, const Mat &R, const Mat &T, int interp_mode, int border_mode,
-               gpu::GpuMat &dst);
+    Point warp(const cuda::GpuMat &src, const Mat &K, const Mat &R, const Mat &T, int interp_mode, int border_mode,
+               cuda::GpuMat &dst);
 
 private:
-    gpu::GpuMat d_xmap_, d_ymap_, d_src_, d_dst_;
+    cuda::GpuMat d_xmap_, d_ymap_, d_src_, d_dst_;
 };
 
 
@@ -404,13 +404,13 @@ public:
         return result;
     }
 
-    Rect buildMaps(Size src_size, const Mat &K, const Mat &R, gpu::GpuMat &xmap, gpu::GpuMat &ymap);
+    Rect buildMaps(Size src_size, const Mat &K, const Mat &R, cuda::GpuMat &xmap, cuda::GpuMat &ymap);
 
-    Point warp(const gpu::GpuMat &src, const Mat &K, const Mat &R, int interp_mode, int border_mode,
-               gpu::GpuMat &dst);
+    Point warp(const cuda::GpuMat &src, const Mat &K, const Mat &R, int interp_mode, int border_mode,
+               cuda::GpuMat &dst);
 
 private:
-    gpu::GpuMat d_xmap_, d_ymap_, d_src_, d_dst_;
+    cuda::GpuMat d_xmap_, d_ymap_, d_src_, d_dst_;
 };
 
 
@@ -436,13 +436,13 @@ public:
         return result;
     }
 
-    Rect buildMaps(Size src_size, const Mat &K, const Mat &R, gpu::GpuMat &xmap, gpu::GpuMat &ymap);
+    Rect buildMaps(Size src_size, const Mat &K, const Mat &R, cuda::GpuMat &xmap, cuda::GpuMat &ymap);
 
-    Point warp(const gpu::GpuMat &src, const Mat &K, const Mat &R, int interp_mode, int border_mode,
-               gpu::GpuMat &dst);
+    Point warp(const cuda::GpuMat &src, const Mat &K, const Mat &R, int interp_mode, int border_mode,
+               cuda::GpuMat &dst);
 
 private:
-    gpu::GpuMat d_xmap_, d_ymap_, d_src_, d_dst_;
+    cuda::GpuMat d_xmap_, d_ymap_, d_src_, d_dst_;
 };
 #endif
 
diff --git a/modules/stitching/src/blenders.cpp b/modules/stitching/src/blenders.cpp
index 0043b7348a..6ac37d1f9d 100644
--- a/modules/stitching/src/blenders.cpp
+++ b/modules/stitching/src/blenders.cpp
@@ -189,7 +189,7 @@ MultiBandBlender::MultiBandBlender(int try_gpu, int num_bands, int weight_type)
     setNumBands(num_bands);
 
 #if defined(HAVE_OPENCV_GPUARITHM) && defined(HAVE_OPENCV_GPUWARPING)
-    can_use_gpu_ = try_gpu && gpu::getCudaEnabledDeviceCount();
+    can_use_gpu_ = try_gpu && cuda::getCudaEnabledDeviceCount();
 #else
     (void) try_gpu;
     can_use_gpu_ = false;
@@ -494,16 +494,16 @@ void createLaplacePyrGpu(const Mat &img, int num_levels, std::vector<Mat> &pyr)
 #if defined(HAVE_OPENCV_GPUARITHM) && defined(HAVE_OPENCV_GPUWARPING)
     pyr.resize(num_levels + 1);
 
-    std::vector<gpu::GpuMat> gpu_pyr(num_levels + 1);
+    std::vector<cuda::GpuMat> gpu_pyr(num_levels + 1);
     gpu_pyr[0].upload(img);
     for (int i = 0; i < num_levels; ++i)
-        gpu::pyrDown(gpu_pyr[i], gpu_pyr[i + 1]);
+        cuda::pyrDown(gpu_pyr[i], gpu_pyr[i + 1]);
 
-    gpu::GpuMat tmp;
+    cuda::GpuMat tmp;
     for (int i = 0; i < num_levels; ++i)
     {
-        gpu::pyrUp(gpu_pyr[i + 1], tmp);
-        gpu::subtract(gpu_pyr[i], tmp, gpu_pyr[i]);
+        cuda::pyrUp(gpu_pyr[i + 1], tmp);
+        cuda::subtract(gpu_pyr[i], tmp, gpu_pyr[i]);
         gpu_pyr[i].download(pyr[i]);
     }
 
@@ -535,15 +535,15 @@ void restoreImageFromLaplacePyrGpu(std::vector<Mat> &pyr)
     if (pyr.empty())
         return;
 
-    std::vector<gpu::GpuMat> gpu_pyr(pyr.size());
+    std::vector<cuda::GpuMat> gpu_pyr(pyr.size());
     for (size_t i = 0; i < pyr.size(); ++i)
         gpu_pyr[i].upload(pyr[i]);
 
-    gpu::GpuMat tmp;
+    cuda::GpuMat tmp;
     for (size_t i = pyr.size() - 1; i > 0; --i)
     {
-        gpu::pyrUp(gpu_pyr[i], tmp);
-        gpu::add(tmp, gpu_pyr[i - 1], gpu_pyr[i - 1]);
+        cuda::pyrUp(gpu_pyr[i], tmp);
+        cuda::add(tmp, gpu_pyr[i - 1], gpu_pyr[i - 1]);
     }
 
     gpu_pyr[0].download(pyr[0]);
diff --git a/modules/stitching/src/matchers.cpp b/modules/stitching/src/matchers.cpp
index e8b7bf24f9..aee004e8de 100644
--- a/modules/stitching/src/matchers.cpp
+++ b/modules/stitching/src/matchers.cpp
@@ -44,7 +44,7 @@
 
 using namespace cv;
 using namespace cv::detail;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 #ifdef HAVE_OPENCV_NONFREE
 #include "opencv2/nonfree.hpp"
diff --git a/modules/stitching/src/seam_finders.cpp b/modules/stitching/src/seam_finders.cpp
index c453b5feb3..2c37421f76 100644
--- a/modules/stitching/src/seam_finders.cpp
+++ b/modules/stitching/src/seam_finders.cpp
@@ -1423,14 +1423,14 @@ void GraphCutSeamFinderGpu::findInPair(size_t first, size_t second, Rect roi)
         CV_Error(Error::StsBadArg, "unsupported pixel similarity measure");
     }
 
-    gpu::GpuMat terminals_d(terminals);
-    gpu::GpuMat leftT_d(leftT);
-    gpu::GpuMat rightT_d(rightT);
-    gpu::GpuMat top_d(top);
-    gpu::GpuMat bottom_d(bottom);
-    gpu::GpuMat labels_d, buf_d;
-
-    gpu::graphcut(terminals_d, leftT_d, rightT_d, top_d, bottom_d, labels_d, buf_d);
+    cuda::GpuMat terminals_d(terminals);
+    cuda::GpuMat leftT_d(leftT);
+    cuda::GpuMat rightT_d(rightT);
+    cuda::GpuMat top_d(top);
+    cuda::GpuMat bottom_d(bottom);
+    cuda::GpuMat labels_d, buf_d;
+
+    cuda::graphcut(terminals_d, leftT_d, rightT_d, top_d, bottom_d, labels_d, buf_d);
 
     Mat_<uchar> labels = (Mat)labels_d;
     for (int y = 0; y < roi.height; ++y)
diff --git a/modules/stitching/src/stitcher.cpp b/modules/stitching/src/stitcher.cpp
index 75307209db..f67052c726 100644
--- a/modules/stitching/src/stitcher.cpp
+++ b/modules/stitching/src/stitcher.cpp
@@ -57,7 +57,7 @@ Stitcher Stitcher::createDefault(bool try_use_gpu)
     stitcher.setBundleAdjuster(new detail::BundleAdjusterRay());
 
 #ifdef HAVE_OPENCV_GPU
-    if (try_use_gpu && gpu::getCudaEnabledDeviceCount() > 0)
+    if (try_use_gpu && cuda::getCudaEnabledDeviceCount() > 0)
     {
 #ifdef HAVE_OPENCV_NONFREE
         stitcher.setFeaturesFinder(new detail::SurfFeaturesFinderGpu());
diff --git a/modules/stitching/src/warpers.cpp b/modules/stitching/src/warpers.cpp
index 1082b42d75..e09790f00e 100644
--- a/modules/stitching/src/warpers.cpp
+++ b/modules/stitching/src/warpers.cpp
@@ -211,85 +211,85 @@ void SphericalWarper::detectResultRoi(Size src_size, Point &dst_tl, Point &dst_b
 
 
 #ifdef HAVE_OPENCV_GPUWARPING
-Rect PlaneWarperGpu::buildMaps(Size src_size, const Mat &K, const Mat &R, gpu::GpuMat &xmap, gpu::GpuMat &ymap)
+Rect PlaneWarperGpu::buildMaps(Size src_size, const Mat &K, const Mat &R, cuda::GpuMat &xmap, cuda::GpuMat &ymap)
 {
     return buildMaps(src_size, K, R, Mat::zeros(3, 1, CV_32F), xmap, ymap);
 }
 
-Rect PlaneWarperGpu::buildMaps(Size src_size, const Mat &K, const Mat &R, const Mat &T, gpu::GpuMat &xmap, gpu::GpuMat &ymap)
+Rect PlaneWarperGpu::buildMaps(Size src_size, const Mat &K, const Mat &R, const Mat &T, cuda::GpuMat &xmap, cuda::GpuMat &ymap)
 {
     projector_.setCameraParams(K, R, T);
 
     Point dst_tl, dst_br;
     detectResultRoi(src_size, dst_tl, dst_br);
 
-    gpu::buildWarpPlaneMaps(src_size, Rect(dst_tl, Point(dst_br.x + 1, dst_br.y + 1)),
+    cuda::buildWarpPlaneMaps(src_size, Rect(dst_tl, Point(dst_br.x + 1, dst_br.y + 1)),
                             K, R, T, projector_.scale, xmap, ymap);
 
     return Rect(dst_tl, dst_br);
 }
 
-Point PlaneWarperGpu::warp(const gpu::GpuMat &src, const Mat &K, const Mat &R, int interp_mode, int border_mode,
-                           gpu::GpuMat &dst)
+Point PlaneWarperGpu::warp(const cuda::GpuMat &src, const Mat &K, const Mat &R, int interp_mode, int border_mode,
+                           cuda::GpuMat &dst)
 {
     return warp(src, K, R, Mat::zeros(3, 1, CV_32F), interp_mode, border_mode, dst);
 }
 
 
-Point PlaneWarperGpu::warp(const gpu::GpuMat &src, const Mat &K, const Mat &R, const Mat &T, int interp_mode, int border_mode,
-                           gpu::GpuMat &dst)
+Point PlaneWarperGpu::warp(const cuda::GpuMat &src, const Mat &K, const Mat &R, const Mat &T, int interp_mode, int border_mode,
+                           cuda::GpuMat &dst)
 {
     Rect dst_roi = buildMaps(src.size(), K, R, T, d_xmap_, d_ymap_);
     dst.create(dst_roi.height + 1, dst_roi.width + 1, src.type());
-    gpu::remap(src, dst, d_xmap_, d_ymap_, interp_mode, border_mode);
+    cuda::remap(src, dst, d_xmap_, d_ymap_, interp_mode, border_mode);
     return dst_roi.tl();
 }
 
 
-Rect SphericalWarperGpu::buildMaps(Size src_size, const Mat &K, const Mat &R, gpu::GpuMat &xmap, gpu::GpuMat &ymap)
+Rect SphericalWarperGpu::buildMaps(Size src_size, const Mat &K, const Mat &R, cuda::GpuMat &xmap, cuda::GpuMat &ymap)
 {
     projector_.setCameraParams(K, R);
 
     Point dst_tl, dst_br;
     detectResultRoi(src_size, dst_tl, dst_br);
 
-    gpu::buildWarpSphericalMaps(src_size, Rect(dst_tl, Point(dst_br.x + 1, dst_br.y + 1)),
+    cuda::buildWarpSphericalMaps(src_size, Rect(dst_tl, Point(dst_br.x + 1, dst_br.y + 1)),
                                 K, R, projector_.scale, xmap, ymap);
 
     return Rect(dst_tl, dst_br);
 }
 
 
-Point SphericalWarperGpu::warp(const gpu::GpuMat &src, const Mat &K, const Mat &R, int interp_mode, int border_mode,
-                               gpu::GpuMat &dst)
+Point SphericalWarperGpu::warp(const cuda::GpuMat &src, const Mat &K, const Mat &R, int interp_mode, int border_mode,
+                               cuda::GpuMat &dst)
 {
     Rect dst_roi = buildMaps(src.size(), K, R, d_xmap_, d_ymap_);
     dst.create(dst_roi.height + 1, dst_roi.width + 1, src.type());
-    gpu::remap(src, dst, d_xmap_, d_ymap_, interp_mode, border_mode);
+    cuda::remap(src, dst, d_xmap_, d_ymap_, interp_mode, border_mode);
     return dst_roi.tl();
 }
 
 
-Rect CylindricalWarperGpu::buildMaps(Size src_size, const Mat &K, const Mat &R, gpu::GpuMat &xmap, gpu::GpuMat &ymap)
+Rect CylindricalWarperGpu::buildMaps(Size src_size, const Mat &K, const Mat &R, cuda::GpuMat &xmap, cuda::GpuMat &ymap)
 {
     projector_.setCameraParams(K, R);
 
     Point dst_tl, dst_br;
     detectResultRoi(src_size, dst_tl, dst_br);
 
-    gpu::buildWarpCylindricalMaps(src_size, Rect(dst_tl, Point(dst_br.x + 1, dst_br.y + 1)),
+    cuda::buildWarpCylindricalMaps(src_size, Rect(dst_tl, Point(dst_br.x + 1, dst_br.y + 1)),
                                   K, R, projector_.scale, xmap, ymap);
 
     return Rect(dst_tl, dst_br);
 }
 
 
-Point CylindricalWarperGpu::warp(const gpu::GpuMat &src, const Mat &K, const Mat &R, int interp_mode, int border_mode,
-                                 gpu::GpuMat &dst)
+Point CylindricalWarperGpu::warp(const cuda::GpuMat &src, const Mat &K, const Mat &R, int interp_mode, int border_mode,
+                                 cuda::GpuMat &dst)
 {
     Rect dst_roi = buildMaps(src.size(), K, R, d_xmap_, d_ymap_);
     dst.create(dst_roi.height + 1, dst_roi.width + 1, src.type());
-    gpu::remap(src, dst, d_xmap_, d_ymap_, interp_mode, border_mode);
+    cuda::remap(src, dst, d_xmap_, d_ymap_, interp_mode, border_mode);
     return dst_roi.tl();
 }
 #endif
diff --git a/modules/superres/perf/perf_superres.cpp b/modules/superres/perf/perf_superres.cpp
index 83fb76e8af..4349af9d34 100644
--- a/modules/superres/perf/perf_superres.cpp
+++ b/modules/superres/perf/perf_superres.cpp
@@ -48,7 +48,7 @@ using namespace testing;
 using namespace perf;
 using namespace cv;
 using namespace cv::superres;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 namespace
 {
diff --git a/modules/superres/src/btv_l1_gpu.cpp b/modules/superres/src/btv_l1_gpu.cpp
index 7b2ad73700..677a42a44f 100644
--- a/modules/superres/src/btv_l1_gpu.cpp
+++ b/modules/superres/src/btv_l1_gpu.cpp
@@ -46,7 +46,7 @@
 #include "precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 using namespace cv::superres;
 using namespace cv::superres::detail;
 
@@ -98,20 +98,20 @@ namespace
 
         for (int i = baseIdx - 1; i >= 0; --i)
         {
-            gpu::add(relForwardMotions[i + 1].first, forwardMotions[i].first, relForwardMotions[i].first);
-            gpu::add(relForwardMotions[i + 1].second, forwardMotions[i].second, relForwardMotions[i].second);
+            cuda::add(relForwardMotions[i + 1].first, forwardMotions[i].first, relForwardMotions[i].first);
+            cuda::add(relForwardMotions[i + 1].second, forwardMotions[i].second, relForwardMotions[i].second);
 
-            gpu::add(relBackwardMotions[i + 1].first, backwardMotions[i + 1].first, relBackwardMotions[i].first);
-            gpu::add(relBackwardMotions[i + 1].second, backwardMotions[i + 1].second, relBackwardMotions[i].second);
+            cuda::add(relBackwardMotions[i + 1].first, backwardMotions[i + 1].first, relBackwardMotions[i].first);
+            cuda::add(relBackwardMotions[i + 1].second, backwardMotions[i + 1].second, relBackwardMotions[i].second);
         }
 
         for (int i = baseIdx + 1; i < count; ++i)
         {
-            gpu::add(relForwardMotions[i - 1].first, backwardMotions[i].first, relForwardMotions[i].first);
-            gpu::add(relForwardMotions[i - 1].second, backwardMotions[i].second, relForwardMotions[i].second);
+            cuda::add(relForwardMotions[i - 1].first, backwardMotions[i].first, relForwardMotions[i].first);
+            cuda::add(relForwardMotions[i - 1].second, backwardMotions[i].second, relForwardMotions[i].second);
 
-            gpu::add(relBackwardMotions[i - 1].first, forwardMotions[i - 1].first, relBackwardMotions[i].first);
-            gpu::add(relBackwardMotions[i - 1].second, forwardMotions[i - 1].second, relBackwardMotions[i].second);
+            cuda::add(relBackwardMotions[i - 1].first, forwardMotions[i - 1].first, relBackwardMotions[i].first);
+            cuda::add(relBackwardMotions[i - 1].second, forwardMotions[i - 1].second, relBackwardMotions[i].second);
         }
     }
 
@@ -121,11 +121,11 @@ namespace
 
         for (size_t i = 0; i < lowResMotions.size(); ++i)
         {
-            gpu::resize(lowResMotions[i].first, highResMotions[i].first, Size(), scale, scale, INTER_CUBIC);
-            gpu::resize(lowResMotions[i].second, highResMotions[i].second, Size(), scale, scale, INTER_CUBIC);
+            cuda::resize(lowResMotions[i].first, highResMotions[i].first, Size(), scale, scale, INTER_CUBIC);
+            cuda::resize(lowResMotions[i].second, highResMotions[i].second, Size(), scale, scale, INTER_CUBIC);
 
-            gpu::multiply(highResMotions[i].first, Scalar::all(scale), highResMotions[i].first);
-            gpu::multiply(highResMotions[i].second, Scalar::all(scale), highResMotions[i].second);
+            cuda::multiply(highResMotions[i].first, Scalar::all(scale), highResMotions[i].first);
+            cuda::multiply(highResMotions[i].second, Scalar::all(scale), highResMotions[i].second);
         }
     }
 
@@ -230,7 +230,7 @@ namespace
         Ptr<DenseOpticalFlowExt> opticalFlow_;
 
     private:
-        std::vector<Ptr<gpu::Filter> > filters_;
+        std::vector<Ptr<cuda::Filter> > filters_;
         int curBlurKernelSize_;
         double curBlurSigma_;
         int curSrcType_;
@@ -299,7 +299,7 @@ namespace
         {
             filters_.resize(src.size());
             for (size_t i = 0; i < src.size(); ++i)
-                filters_[i] = gpu::createGaussianFilter(src[0].type(), -1, Size(blurKernelSize_, blurKernelSize_), blurSigma_);
+                filters_[i] = cuda::createGaussianFilter(src[0].type(), -1, Size(blurKernelSize_, blurKernelSize_), blurSigma_);
             curBlurKernelSize_ = blurKernelSize_;
             curBlurSigma_ = blurSigma_;
             curSrcType_ = src[0].type();
@@ -329,7 +329,7 @@ namespace
         const Size lowResSize = src[0].size();
         const Size highResSize(lowResSize.width * scale_, lowResSize.height * scale_);
 
-        gpu::resize(src[baseIdx], highRes_, highResSize, 0, 0, INTER_CUBIC);
+        cuda::resize(src[baseIdx], highRes_, highResSize, 0, 0, INTER_CUBIC);
 
         // iterations
 
@@ -344,11 +344,11 @@ namespace
             for (size_t k = 0; k < src.size(); ++k)
             {
                 // a = M * Ih
-                gpu::remap(highRes_, a_[k], backwardMaps_[k].first, backwardMaps_[k].second, INTER_NEAREST, BORDER_REPLICATE, Scalar(), streams_[k]);
+                cuda::remap(highRes_, a_[k], backwardMaps_[k].first, backwardMaps_[k].second, INTER_NEAREST, BORDER_REPLICATE, Scalar(), streams_[k]);
                 // b = HM * Ih
                 filters_[k]->apply(a_[k], b_[k], streams_[k]);
                 // c = DHF * Ih
-                gpu::resize(b_[k], c_[k], lowResSize, 0, 0, INTER_NEAREST, streams_[k]);
+                cuda::resize(b_[k], c_[k], lowResSize, 0, 0, INTER_NEAREST, streams_[k]);
 
                 diffSign(src[k], c_[k], c_[k], streams_[k]);
 
@@ -357,19 +357,19 @@ namespace
                 // b = HtDt * diff
                 filters_[k]->apply(a_[k], b_[k], streams_[k]);
                 // diffTerm = MtHtDt * diff
-                gpu::remap(b_[k], diffTerms_[k], forwardMaps_[k].first, forwardMaps_[k].second, INTER_NEAREST, BORDER_REPLICATE, Scalar(), streams_[k]);
+                cuda::remap(b_[k], diffTerms_[k], forwardMaps_[k].first, forwardMaps_[k].second, INTER_NEAREST, BORDER_REPLICATE, Scalar(), streams_[k]);
             }
 
             if (lambda_ > 0)
             {
                 calcBtvRegularization(highRes_, regTerm_, btvKernelSize_);
-                gpu::addWeighted(highRes_, 1.0, regTerm_, -tau_ * lambda_, 0.0, highRes_);
+                cuda::addWeighted(highRes_, 1.0, regTerm_, -tau_ * lambda_, 0.0, highRes_);
             }
 
             for (size_t k = 0; k < src.size(); ++k)
             {
                 streams_[k].waitForCompletion();
-                gpu::addWeighted(highRes_, 1.0, diffTerms_[k], tau_, 0.0, highRes_);
+                cuda::addWeighted(highRes_, 1.0, diffTerms_[k], tau_, 0.0, highRes_);
             }
         }
 
diff --git a/modules/superres/src/cuda/btv_l1_gpu.cu b/modules/superres/src/cuda/btv_l1_gpu.cu
index 22b3e0abf5..3ea29481dd 100644
--- a/modules/superres/src/cuda/btv_l1_gpu.cu
+++ b/modules/superres/src/cuda/btv_l1_gpu.cu
@@ -49,8 +49,8 @@
 #include "opencv2/core/cuda/vec_traits.hpp"
 #include "opencv2/core/cuda/vec_math.hpp"
 
-using namespace cv::gpu;
-using namespace cv::gpu::cudev;
+using namespace cv::cuda;
+using namespace cv::cuda::cudev;
 
 namespace btv_l1_cudev
 {
@@ -173,7 +173,7 @@ namespace btv_l1_cudev
     };
 }
 
-namespace cv { namespace gpu { namespace cudev
+namespace cv { namespace cuda { namespace cudev
 {
     template <> struct TransformFunctorTraits<btv_l1_cudev::DiffSign> : DefaultTransformFunctorTraits<btv_l1_cudev::DiffSign>
     {
diff --git a/modules/superres/src/frame_source.cpp b/modules/superres/src/frame_source.cpp
index 5c6b1231b9..3b366aff96 100644
--- a/modules/superres/src/frame_source.cpp
+++ b/modules/superres/src/frame_source.cpp
@@ -42,7 +42,7 @@
 #include "precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 using namespace cv::superres;
 using namespace cv::superres::detail;
 
diff --git a/modules/superres/src/input_array_utility.cpp b/modules/superres/src/input_array_utility.cpp
index 8a3cbca55d..2027eb61c9 100644
--- a/modules/superres/src/input_array_utility.cpp
+++ b/modules/superres/src/input_array_utility.cpp
@@ -43,7 +43,7 @@
 #include "precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 Mat cv::superres::arrGetMat(InputArray arr, Mat& buf)
 {
@@ -191,7 +191,7 @@ namespace
         {
         case _InputArray::GPU_MAT:
             #ifdef HAVE_OPENCV_GPUIMGPROC
-                gpu::cvtColor(src.getGpuMat(), dst.getGpuMatRef(), code, cn);
+                cuda::cvtColor(src.getGpuMat(), dst.getGpuMatRef(), code, cn);
             #else
                 CV_Error(cv::Error::StsNotImplemented, "The called functionality is disabled for current build or platform");
             #endif
diff --git a/modules/superres/src/input_array_utility.hpp b/modules/superres/src/input_array_utility.hpp
index 0c86143fc6..367df614ef 100644
--- a/modules/superres/src/input_array_utility.hpp
+++ b/modules/superres/src/input_array_utility.hpp
@@ -54,12 +54,12 @@ namespace cv
     namespace superres
     {
         CV_EXPORTS Mat arrGetMat(InputArray arr, Mat& buf);
-        CV_EXPORTS gpu::GpuMat arrGetGpuMat(InputArray arr, gpu::GpuMat& buf);
+        CV_EXPORTS cuda::GpuMat arrGetGpuMat(InputArray arr, cuda::GpuMat& buf);
 
         CV_EXPORTS void arrCopy(InputArray src, OutputArray dst);
 
         CV_EXPORTS Mat convertToType(const Mat& src, int type, Mat& buf0, Mat& buf1);
-        CV_EXPORTS gpu::GpuMat convertToType(const gpu::GpuMat& src, int type, gpu::GpuMat& buf0, gpu::GpuMat& buf1);
+        CV_EXPORTS cuda::GpuMat convertToType(const cuda::GpuMat& src, int type, cuda::GpuMat& buf0, cuda::GpuMat& buf1);
 
 #ifdef HAVE_OPENCV_OCL
         CV_EXPORTS ocl::oclMat convertToType(const ocl::oclMat& src, int type, ocl::oclMat& buf0, ocl::oclMat& buf1);
diff --git a/modules/superres/src/optical_flow.cpp b/modules/superres/src/optical_flow.cpp
index 1779498775..e33c54948b 100644
--- a/modules/superres/src/optical_flow.cpp
+++ b/modules/superres/src/optical_flow.cpp
@@ -43,7 +43,7 @@
 #include "precomp.hpp"
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 using namespace cv::superres;
 using namespace cv::superres::detail;
 
diff --git a/modules/ts/include/opencv2/ts/gpu_test.hpp b/modules/ts/include/opencv2/ts/gpu_test.hpp
index 2ff2ca0bfc..83a94b8fb7 100644
--- a/modules/ts/include/opencv2/ts/gpu_test.hpp
+++ b/modules/ts/include/opencv2/ts/gpu_test.hpp
@@ -65,8 +65,8 @@ namespace cvtest
     //////////////////////////////////////////////////////////////////////
     // GpuMat create
 
-    CV_EXPORTS cv::gpu::GpuMat createMat(cv::Size size, int type, bool useRoi = false);
-    CV_EXPORTS cv::gpu::GpuMat loadMat(const cv::Mat& m, bool useRoi = false);
+    CV_EXPORTS cv::cuda::GpuMat createMat(cv::Size size, int type, bool useRoi = false);
+    CV_EXPORTS cv::cuda::GpuMat loadMat(const cv::Mat& m, bool useRoi = false);
 
     //////////////////////////////////////////////////////////////////////
     // Image load
@@ -81,7 +81,7 @@ namespace cvtest
     // Gpu devices
 
     //! return true if device supports specified feature and gpu module was built with support the feature.
-    CV_EXPORTS bool supportFeature(const cv::gpu::DeviceInfo& info, cv::gpu::FeatureSet feature);
+    CV_EXPORTS bool supportFeature(const cv::cuda::DeviceInfo& info, cv::cuda::FeatureSet feature);
 
     class CV_EXPORTS DeviceManager
     {
@@ -91,10 +91,10 @@ namespace cvtest
         void load(int i);
         void loadAll();
 
-        const std::vector<cv::gpu::DeviceInfo>& values() const { return devices_; }
+        const std::vector<cv::cuda::DeviceInfo>& values() const { return devices_; }
 
     private:
-        std::vector<cv::gpu::DeviceInfo> devices_;
+        std::vector<cv::cuda::DeviceInfo> devices_;
     };
 
     #define ALL_DEVICES testing::ValuesIn(cvtest::DeviceManager::instance().values())
@@ -201,7 +201,7 @@ namespace cvtest
         } \
         catch (...) \
         { \
-          cv::gpu::resetDevice(); \
+          cv::cuda::resetDevice(); \
           throw; \
         } \
       } \
@@ -342,7 +342,7 @@ namespace cvtest
     CV_EXPORTS void printCudaInfo();
 }
 
-namespace cv { namespace gpu
+namespace cv { namespace cuda
 {
     CV_EXPORTS void PrintTo(const DeviceInfo& info, std::ostream* os);
 }}
@@ -378,7 +378,7 @@ namespace cv { namespace gpu
             else \
             { \
                 cvtest::DeviceManager::instance().load(device); \
-                cv::gpu::DeviceInfo info(device); \
+                cv::cuda::DeviceInfo info(device); \
                 std::cout << "Run tests on device " << device << " [" << info.name() << "] \n" << std::endl; \
             } \
             cvtest::TS::ptr()->init( resourcesubdir ); \
diff --git a/modules/ts/include/opencv2/ts/ts_perf.hpp b/modules/ts/include/opencv2/ts/ts_perf.hpp
index 74ea47e13b..806003173d 100644
--- a/modules/ts/include/opencv2/ts/ts_perf.hpp
+++ b/modules/ts/include/opencv2/ts/ts_perf.hpp
@@ -22,7 +22,7 @@
 #endif
 
 // declare major namespaces to avoid errors on unknown namespace
-namespace cv { namespace gpu {} namespace ocl {} }
+namespace cv { namespace cuda {} namespace ocl {} }
 
 namespace perf
 {
@@ -96,7 +96,7 @@ private:
         class_name(int val = 0) : val_(val) {}                                          \
         operator int() const { return val_; }                                           \
         void PrintTo(std::ostream* os) const {                                          \
-            using namespace cv;using namespace cv::gpu; using namespace cv::ocl;        \
+            using namespace cv;using namespace cv::cuda; using namespace cv::ocl;        \
             const int vals[] = { __VA_ARGS__ };                                         \
             const char* svals = #__VA_ARGS__;                                           \
             for(int i = 0, pos = 0; i < (int)(sizeof(vals)/sizeof(int)); ++i) {         \
@@ -112,7 +112,7 @@ private:
             *os << "UNKNOWN";                                                           \
         }                                                                               \
         static ::testing::internal::ParamGenerator<class_name> all() {                  \
-            using namespace cv;using namespace cv::gpu; using namespace cv::ocl;        \
+            using namespace cv;using namespace cv::cuda; using namespace cv::ocl;        \
             static class_name vals[] = { __VA_ARGS__ };                                 \
             return ::testing::ValuesIn(vals);                                           \
         }                                                                               \
@@ -126,7 +126,7 @@ private:
         class_name(int val = 0) : val_(val) {}                                          \
         operator int() const { return val_; }                                           \
         void PrintTo(std::ostream* os) const {                                          \
-            using namespace cv;using namespace cv::gpu; using namespace cv::ocl;        \
+            using namespace cv;using namespace cv::cuda; using namespace cv::ocl;        \
             const int vals[] = { __VA_ARGS__ };                                         \
             const char* svals = #__VA_ARGS__;                                           \
             int value = val_;                                                           \
diff --git a/modules/ts/src/gpu_perf.cpp b/modules/ts/src/gpu_perf.cpp
index 2bca535c46..3a6fd2b1bc 100644
--- a/modules/ts/src/gpu_perf.cpp
+++ b/modules/ts/src/gpu_perf.cpp
@@ -278,13 +278,13 @@ namespace perf
         printf("[----------]\n"), fflush(stdout);
 
         printf("[----------]\n"), fflush(stdout);
-        int deviceCount = cv::gpu::getCudaEnabledDeviceCount();
+        int deviceCount = cv::cuda::getCudaEnabledDeviceCount();
         printf("[ GPU INFO ] \tCUDA device count:: %d.\n", deviceCount), fflush(stdout);
         printf("[----------]\n"), fflush(stdout);
 
         for (int i = 0; i < deviceCount; ++i)
         {
-            cv::gpu::DeviceInfo info(i);
+            cv::cuda::DeviceInfo info(i);
 
             printf("[----------]\n"), fflush(stdout);
             printf("[ DEVICE   ] \t# %d %s.\n", i, info.name()), fflush(stdout);
diff --git a/modules/ts/src/gpu_test.cpp b/modules/ts/src/gpu_test.cpp
index 4847dbc383..9a7796629b 100644
--- a/modules/ts/src/gpu_test.cpp
+++ b/modules/ts/src/gpu_test.cpp
@@ -44,7 +44,7 @@
 #include <stdexcept>
 
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 using namespace cvtest;
 using namespace testing;
 using namespace testing::internal;
@@ -515,7 +515,7 @@ namespace cvtest
 }
 
 
-void cv::gpu::PrintTo(const DeviceInfo& info, std::ostream* os)
+void cv::cuda::PrintTo(const DeviceInfo& info, std::ostream* os)
 {
     (*os) << info.name();
 }
diff --git a/modules/ts/src/ts_perf.cpp b/modules/ts/src/ts_perf.cpp
index e3ae8735d0..ba9c07643e 100644
--- a/modules/ts/src/ts_perf.cpp
+++ b/modules/ts/src/ts_perf.cpp
@@ -708,18 +708,18 @@ void TestBase::Init(const std::vector<std::string> & availableImpls,
 
 #ifdef HAVE_CUDA
 
-    param_cuda_device      = std::max(0, std::min(cv::gpu::getCudaEnabledDeviceCount(), args.get<int>("perf_cuda_device")));
+    param_cuda_device      = std::max(0, std::min(cv::cuda::getCudaEnabledDeviceCount(), args.get<int>("perf_cuda_device")));
 
     if (param_impl == "cuda")
     {
-        cv::gpu::DeviceInfo info(param_cuda_device);
+        cv::cuda::DeviceInfo info(param_cuda_device);
         if (!info.isCompatible())
         {
             printf("[----------]\n[ FAILURE  ] \tDevice %s is NOT compatible with current GPU module build.\n[----------]\n", info.name()), fflush(stdout);
             exit(-1);
         }
 
-        cv::gpu::setDevice(param_cuda_device);
+        cv::cuda::setDevice(param_cuda_device);
 
         printf("[----------]\n[ GPU INFO ] \tRun test suite on %s GPU.\n[----------]\n", info.name()), fflush(stdout);
     }
@@ -744,7 +744,7 @@ void TestBase::RecordRunParameters()
 #ifdef HAVE_CUDA
     if (param_impl == "cuda")
     {
-        cv::gpu::DeviceInfo info(param_cuda_device);
+        cv::cuda::DeviceInfo info(param_cuda_device);
         ::testing::Test::RecordProperty("cv_cuda_gpu", info.name());
     }
 #endif
@@ -1203,7 +1203,7 @@ void TestBase::RunPerfTestBody()
         metrics.terminationReason = performance_metrics::TERM_EXCEPTION;
         #ifdef HAVE_CUDA
             if (e.code == cv::Error::GpuApiCallError)
-                cv::gpu::resetDevice();
+                cv::cuda::resetDevice();
         #endif
         FAIL() << "Expected: PerfTestBody() doesn't throw an exception.\n  Actual: it throws cv::Exception:\n  " << e.what();
     }
diff --git a/modules/videostab/include/opencv2/videostab/global_motion.hpp b/modules/videostab/include/opencv2/videostab/global_motion.hpp
index 8ccc067a38..d7cb2746be 100644
--- a/modules/videostab/include/opencv2/videostab/global_motion.hpp
+++ b/modules/videostab/include/opencv2/videostab/global_motion.hpp
@@ -213,17 +213,17 @@ public:
     Ptr<IOutlierRejector> outlierRejector() const { return outlierRejector_; }
 
     virtual Mat estimate(const Mat &frame0, const Mat &frame1, bool *ok = 0);
-    Mat estimate(const gpu::GpuMat &frame0, const gpu::GpuMat &frame1, bool *ok = 0);
+    Mat estimate(const cuda::GpuMat &frame0, const cuda::GpuMat &frame1, bool *ok = 0);
 
 private:
     Ptr<MotionEstimatorBase> motionEstimator_;
-    Ptr<gpu::CornersDetector> detector_;
+    Ptr<cuda::CornersDetector> detector_;
     SparsePyrLkOptFlowEstimatorGpu optFlowEstimator_;
     Ptr<IOutlierRejector> outlierRejector_;
 
-    gpu::GpuMat frame0_, grayFrame0_, frame1_;
-    gpu::GpuMat pointsPrev_, points_;
-    gpu::GpuMat status_;
+    cuda::GpuMat frame0_, grayFrame0_, frame1_;
+    cuda::GpuMat pointsPrev_, points_;
+    cuda::GpuMat status_;
 
     Mat hostPointsPrev_, hostPoints_;
     std::vector<Point2f> hostPointsPrevTmp_, hostPointsTmp_;
diff --git a/modules/videostab/include/opencv2/videostab/optical_flow.hpp b/modules/videostab/include/opencv2/videostab/optical_flow.hpp
index 7509c1207a..fc4efbf5a3 100644
--- a/modules/videostab/include/opencv2/videostab/optical_flow.hpp
+++ b/modules/videostab/include/opencv2/videostab/optical_flow.hpp
@@ -111,15 +111,15 @@ public:
             InputArray frame0, InputArray frame1, InputArray points0, InputOutputArray points1,
             OutputArray status, OutputArray errors);
 
-    void run(const gpu::GpuMat &frame0, const gpu::GpuMat &frame1, const gpu::GpuMat &points0, gpu::GpuMat &points1,
-             gpu::GpuMat &status, gpu::GpuMat &errors);
+    void run(const cuda::GpuMat &frame0, const cuda::GpuMat &frame1, const cuda::GpuMat &points0, cuda::GpuMat &points1,
+             cuda::GpuMat &status, cuda::GpuMat &errors);
 
-    void run(const gpu::GpuMat &frame0, const gpu::GpuMat &frame1, const gpu::GpuMat &points0, gpu::GpuMat &points1,
-             gpu::GpuMat &status);
+    void run(const cuda::GpuMat &frame0, const cuda::GpuMat &frame1, const cuda::GpuMat &points0, cuda::GpuMat &points1,
+             cuda::GpuMat &status);
 
 private:
-    gpu::PyrLKOpticalFlow optFlowEstimator_;
-    gpu::GpuMat frame0_, frame1_, points0_, points1_, status_, errors_;
+    cuda::PyrLKOpticalFlow optFlowEstimator_;
+    cuda::GpuMat frame0_, frame1_, points0_, points1_, status_, errors_;
 };
 
 class CV_EXPORTS DensePyrLkOptFlowEstimatorGpu
@@ -133,8 +133,8 @@ public:
             OutputArray errors);
 
 private:
-    gpu::PyrLKOpticalFlow optFlowEstimator_;
-    gpu::GpuMat frame0_, frame1_, flowX_, flowY_, errors_;
+    cuda::PyrLKOpticalFlow optFlowEstimator_;
+    cuda::GpuMat frame0_, frame1_, flowX_, flowY_, errors_;
 };
 
 #endif
diff --git a/modules/videostab/include/opencv2/videostab/wobble_suppression.hpp b/modules/videostab/include/opencv2/videostab/wobble_suppression.hpp
index 01163b27c4..be6e782a35 100644
--- a/modules/videostab/include/opencv2/videostab/wobble_suppression.hpp
+++ b/modules/videostab/include/opencv2/videostab/wobble_suppression.hpp
@@ -120,12 +120,12 @@ private:
 class CV_EXPORTS MoreAccurateMotionWobbleSuppressorGpu : public MoreAccurateMotionWobbleSuppressorBase
 {
 public:
-    void suppress(int idx, const gpu::GpuMat &frame, gpu::GpuMat &result);
+    void suppress(int idx, const cuda::GpuMat &frame, cuda::GpuMat &result);
     virtual void suppress(int idx, const Mat &frame, Mat &result);
 
 private:
-    gpu::GpuMat frameDevice_, resultDevice_;
-    gpu::GpuMat mapx_, mapy_;
+    cuda::GpuMat frameDevice_, resultDevice_;
+    cuda::GpuMat mapx_, mapy_;
 };
 #endif
 
diff --git a/modules/videostab/src/global_motion.cpp b/modules/videostab/src/global_motion.cpp
index 383a10dfd0..839e7c46d6 100644
--- a/modules/videostab/src/global_motion.cpp
+++ b/modules/videostab/src/global_motion.cpp
@@ -742,9 +742,9 @@ Mat KeypointBasedMotionEstimator::estimate(const Mat &frame0, const Mat &frame1,
 KeypointBasedMotionEstimatorGpu::KeypointBasedMotionEstimatorGpu(Ptr<MotionEstimatorBase> estimator)
     : ImageMotionEstimatorBase(estimator->motionModel()), motionEstimator_(estimator)
 {
-    detector_ = gpu::createGoodFeaturesToTrackDetector(CV_8UC1);
+    detector_ = cuda::createGoodFeaturesToTrackDetector(CV_8UC1);
 
-    CV_Assert(gpu::getCudaEnabledDeviceCount() > 0);
+    CV_Assert(cuda::getCudaEnabledDeviceCount() > 0);
     setOutlierRejector(new NullOutlierRejector());
 }
 
@@ -757,16 +757,16 @@ Mat KeypointBasedMotionEstimatorGpu::estimate(const Mat &frame0, const Mat &fram
 }
 
 
-Mat KeypointBasedMotionEstimatorGpu::estimate(const gpu::GpuMat &frame0, const gpu::GpuMat &frame1, bool *ok)
+Mat KeypointBasedMotionEstimatorGpu::estimate(const cuda::GpuMat &frame0, const cuda::GpuMat &frame1, bool *ok)
 {
     // convert frame to gray if it's color
 
-    gpu::GpuMat grayFrame0;
+    cuda::GpuMat grayFrame0;
     if (frame0.channels() == 1)
         grayFrame0 = frame0;
     else
     {
-        gpu::cvtColor(frame0, grayFrame0_, COLOR_BGR2GRAY);
+        cuda::cvtColor(frame0, grayFrame0_, COLOR_BGR2GRAY);
         grayFrame0 = grayFrame0_;
     }
 
@@ -777,7 +777,7 @@ Mat KeypointBasedMotionEstimatorGpu::estimate(const gpu::GpuMat &frame0, const g
     optFlowEstimator_.run(frame0, frame1, pointsPrev_, points_, status_);
 
     // leave good correspondences only
-    gpu::compactPoints(pointsPrev_, points_, status_);
+    cuda::compactPoints(pointsPrev_, points_, status_);
 
     pointsPrev_.download(hostPointsPrev_);
     points_.download(hostPoints_);
diff --git a/modules/videostab/src/optical_flow.cpp b/modules/videostab/src/optical_flow.cpp
index cee08823a1..922c1e2426 100644
--- a/modules/videostab/src/optical_flow.cpp
+++ b/modules/videostab/src/optical_flow.cpp
@@ -62,7 +62,7 @@ void SparsePyrLkOptFlowEstimator::run(
 
 SparsePyrLkOptFlowEstimatorGpu::SparsePyrLkOptFlowEstimatorGpu()
 {
-    CV_Assert(gpu::getCudaEnabledDeviceCount() > 0);
+    CV_Assert(cuda::getCudaEnabledDeviceCount() > 0);
 }
 
 
@@ -88,8 +88,8 @@ void SparsePyrLkOptFlowEstimatorGpu::run(
 
 
 void SparsePyrLkOptFlowEstimatorGpu::run(
-        const gpu::GpuMat &frame0, const gpu::GpuMat &frame1, const gpu::GpuMat &points0,
-        gpu::GpuMat &points1, gpu::GpuMat &status, gpu::GpuMat &errors)
+        const cuda::GpuMat &frame0, const cuda::GpuMat &frame1, const cuda::GpuMat &points0,
+        cuda::GpuMat &points1, cuda::GpuMat &status, cuda::GpuMat &errors)
 {
     optFlowEstimator_.winSize = winSize_;
     optFlowEstimator_.maxLevel = maxLevel_;
@@ -98,8 +98,8 @@ void SparsePyrLkOptFlowEstimatorGpu::run(
 
 
 void SparsePyrLkOptFlowEstimatorGpu::run(
-        const gpu::GpuMat &frame0, const gpu::GpuMat &frame1, const gpu::GpuMat &points0,
-        gpu::GpuMat &points1, gpu::GpuMat &status)
+        const cuda::GpuMat &frame0, const cuda::GpuMat &frame1, const cuda::GpuMat &points0,
+        cuda::GpuMat &points1, cuda::GpuMat &status)
 {
     optFlowEstimator_.winSize = winSize_;
     optFlowEstimator_.maxLevel = maxLevel_;
@@ -109,7 +109,7 @@ void SparsePyrLkOptFlowEstimatorGpu::run(
 
 DensePyrLkOptFlowEstimatorGpu::DensePyrLkOptFlowEstimatorGpu()
 {
-    CV_Assert(gpu::getCudaEnabledDeviceCount() > 0);
+    CV_Assert(cuda::getCudaEnabledDeviceCount() > 0);
 }
 
 
diff --git a/modules/videostab/src/wobble_suppression.cpp b/modules/videostab/src/wobble_suppression.cpp
index c9d8ac91f9..66bfcf5a44 100644
--- a/modules/videostab/src/wobble_suppression.cpp
+++ b/modules/videostab/src/wobble_suppression.cpp
@@ -123,7 +123,7 @@ void MoreAccurateMotionWobbleSuppressor::suppress(int idx, const Mat &frame, Mat
 
 
 #ifdef HAVE_OPENCV_GPUWARPING
-void MoreAccurateMotionWobbleSuppressorGpu::suppress(int idx, const gpu::GpuMat &frame, gpu::GpuMat &result)
+void MoreAccurateMotionWobbleSuppressorGpu::suppress(int idx, const cuda::GpuMat &frame, cuda::GpuMat &result)
 {
     CV_Assert(motions_ && stabilizationMotions_);
 
@@ -141,12 +141,12 @@ void MoreAccurateMotionWobbleSuppressorGpu::suppress(int idx, const gpu::GpuMat
     Mat ML = S1 * getMotion(k1, idx, *motions2_) * getMotion(k1, idx, *motions_).inv() * S1.inv();
     Mat MR = S1 * getMotion(idx, k2, *motions2_).inv() * getMotion(idx, k2, *motions_) * S1.inv();
 
-    gpu::calcWobbleSuppressionMaps(k1, idx, k2, frame.size(), ML, MR, mapx_, mapy_);
+    cuda::calcWobbleSuppressionMaps(k1, idx, k2, frame.size(), ML, MR, mapx_, mapy_);
 
     if (result.data == frame.data)
-        result = gpu::GpuMat(frame.size(), frame.type());
+        result = cuda::GpuMat(frame.size(), frame.type());
 
-    gpu::remap(frame, result, mapx_, mapy_, INTER_LINEAR, BORDER_REPLICATE);
+    cuda::remap(frame, result, mapx_, mapy_, INTER_LINEAR, BORDER_REPLICATE);
 }
 
 
diff --git a/samples/cpp/stitching_detailed.cpp b/samples/cpp/stitching_detailed.cpp
index ecf19b494b..c9668198f5 100644
--- a/samples/cpp/stitching_detailed.cpp
+++ b/samples/cpp/stitching_detailed.cpp
@@ -357,7 +357,7 @@ int main(int argc, char* argv[])
     if (features_type == "surf")
     {
 #ifdef HAVE_OPENCV_NONFREE
-        if (try_gpu && gpu::getCudaEnabledDeviceCount() > 0)
+        if (try_gpu && cuda::getCudaEnabledDeviceCount() > 0)
             finder = new SurfFeaturesFinderGpu();
         else
 #endif
@@ -553,7 +553,7 @@ int main(int argc, char* argv[])
 
     Ptr<WarperCreator> warper_creator;
 #ifdef HAVE_OPENCV_GPUWARPING
-    if (try_gpu && gpu::getCudaEnabledDeviceCount() > 0)
+    if (try_gpu && cuda::getCudaEnabledDeviceCount() > 0)
     {
         if (warp_type == "plane") warper_creator = new cv::PlaneWarperGpu();
         else if (warp_type == "cylindrical") warper_creator = new cv::CylindricalWarperGpu();
@@ -618,7 +618,7 @@ int main(int argc, char* argv[])
     else if (seam_find_type == "gc_color")
     {
 #ifdef HAVE_OPENCV_GPU
-        if (try_gpu && gpu::getCudaEnabledDeviceCount() > 0)
+        if (try_gpu && cuda::getCudaEnabledDeviceCount() > 0)
             seam_finder = new detail::GraphCutSeamFinderGpu(GraphCutSeamFinderBase::COST_COLOR);
         else
 #endif
@@ -627,7 +627,7 @@ int main(int argc, char* argv[])
     else if (seam_find_type == "gc_colorgrad")
     {
 #ifdef HAVE_OPENCV_GPU
-        if (try_gpu && gpu::getCudaEnabledDeviceCount() > 0)
+        if (try_gpu && cuda::getCudaEnabledDeviceCount() > 0)
             seam_finder = new detail::GraphCutSeamFinderGpu(GraphCutSeamFinderBase::COST_COLOR_GRAD);
         else
 #endif
diff --git a/samples/cpp/tutorial_code/gpu/gpu-basics-similarity/gpu-basics-similarity.cpp b/samples/cpp/tutorial_code/gpu/gpu-basics-similarity/gpu-basics-similarity.cpp
index e8e961031a..00223b0dbd 100644
--- a/samples/cpp/tutorial_code/gpu/gpu-basics-similarity/gpu-basics-similarity.cpp
+++ b/samples/cpp/tutorial_code/gpu/gpu-basics-similarity/gpu-basics-similarity.cpp
@@ -21,28 +21,28 @@ Scalar getMSSIM_GPU( const Mat& I1, const Mat& I2);
 
 struct BufferPSNR                                     // Optimized GPU versions
 {   // Data allocations are very expensive on GPU. Use a buffer to solve: allocate once reuse later.
-    gpu::GpuMat gI1, gI2, gs, t1,t2;
+    cuda::GpuMat gI1, gI2, gs, t1,t2;
 
-    gpu::GpuMat buf;
+    cuda::GpuMat buf;
 };
 double getPSNR_GPU_optimized(const Mat& I1, const Mat& I2, BufferPSNR& b);
 
 struct BufferMSSIM                                     // Optimized GPU versions
 {   // Data allocations are very expensive on GPU. Use a buffer to solve: allocate once reuse later.
-    gpu::GpuMat gI1, gI2, gs, t1,t2;
+    cuda::GpuMat gI1, gI2, gs, t1,t2;
 
-    gpu::GpuMat I1_2, I2_2, I1_I2;
-    vector<gpu::GpuMat> vI1, vI2;
+    cuda::GpuMat I1_2, I2_2, I1_I2;
+    vector<cuda::GpuMat> vI1, vI2;
 
-    gpu::GpuMat mu1, mu2;
-    gpu::GpuMat mu1_2, mu2_2, mu1_mu2;
+    cuda::GpuMat mu1, mu2;
+    cuda::GpuMat mu1_2, mu2_2, mu1_mu2;
 
-    gpu::GpuMat sigma1_2, sigma2_2, sigma12;
-    gpu::GpuMat t3;
+    cuda::GpuMat sigma1_2, sigma2_2, sigma12;
+    cuda::GpuMat t3;
 
-    gpu::GpuMat ssim_map;
+    cuda::GpuMat ssim_map;
 
-    gpu::GpuMat buf;
+    cuda::GpuMat buf;
 };
 Scalar getMSSIM_GPU_optimized( const Mat& i1, const Mat& i2, BufferMSSIM& b);
 
@@ -197,10 +197,10 @@ double getPSNR_GPU_optimized(const Mat& I1, const Mat& I2, BufferPSNR& b)
     b.gI1.convertTo(b.t1, CV_32F);
     b.gI2.convertTo(b.t2, CV_32F);
 
-    gpu::absdiff(b.t1.reshape(1), b.t2.reshape(1), b.gs);
-    gpu::multiply(b.gs, b.gs, b.gs);
+    cuda::absdiff(b.t1.reshape(1), b.t2.reshape(1), b.gs);
+    cuda::multiply(b.gs, b.gs, b.gs);
 
-    double sse = gpu::sum(b.gs, b.buf)[0];
+    double sse = cuda::sum(b.gs, b.buf)[0];
 
     if( sse <= 1e-10) // for small values return zero
         return 0;
@@ -214,7 +214,7 @@ double getPSNR_GPU_optimized(const Mat& I1, const Mat& I2, BufferPSNR& b)
 
 double getPSNR_GPU(const Mat& I1, const Mat& I2)
 {
-    gpu::GpuMat gI1, gI2, gs, t1,t2;
+    cuda::GpuMat gI1, gI2, gs, t1,t2;
 
     gI1.upload(I1);
     gI2.upload(I2);
@@ -222,10 +222,10 @@ double getPSNR_GPU(const Mat& I1, const Mat& I2)
     gI1.convertTo(t1, CV_32F);
     gI2.convertTo(t2, CV_32F);
 
-    gpu::absdiff(t1.reshape(1), t2.reshape(1), gs);
-    gpu::multiply(gs, gs, gs);
+    cuda::absdiff(t1.reshape(1), t2.reshape(1), gs);
+    cuda::multiply(gs, gs, gs);
 
-    Scalar s = gpu::sum(gs);
+    Scalar s = cuda::sum(gs);
     double sse = s.val[0] + s.val[1] + s.val[2];
 
     if( sse <= 1e-10) // for small values return zero
@@ -295,7 +295,7 @@ Scalar getMSSIM_GPU( const Mat& i1, const Mat& i2)
 {
     const float C1 = 6.5025f, C2 = 58.5225f;
     /***************************** INITS **********************************/
-    gpu::GpuMat gI1, gI2, gs1, tmp1,tmp2;
+    cuda::GpuMat gI1, gI2, gs1, tmp1,tmp2;
 
     gI1.upload(i1);
     gI2.upload(i2);
@@ -303,57 +303,57 @@ Scalar getMSSIM_GPU( const Mat& i1, const Mat& i2)
     gI1.convertTo(tmp1, CV_MAKE_TYPE(CV_32F, gI1.channels()));
     gI2.convertTo(tmp2, CV_MAKE_TYPE(CV_32F, gI2.channels()));
 
-    vector<gpu::GpuMat> vI1, vI2;
-    gpu::split(tmp1, vI1);
-    gpu::split(tmp2, vI2);
+    vector<cuda::GpuMat> vI1, vI2;
+    cuda::split(tmp1, vI1);
+    cuda::split(tmp2, vI2);
     Scalar mssim;
 
-    Ptr<gpu::Filter> gauss = gpu::createGaussianFilter(vI2[0].type(), -1, Size(11, 11), 1.5);
+    Ptr<cuda::Filter> gauss = cuda::createGaussianFilter(vI2[0].type(), -1, Size(11, 11), 1.5);
 
     for( int i = 0; i < gI1.channels(); ++i )
     {
-        gpu::GpuMat I2_2, I1_2, I1_I2;
+        cuda::GpuMat I2_2, I1_2, I1_I2;
 
-        gpu::multiply(vI2[i], vI2[i], I2_2);        // I2^2
-        gpu::multiply(vI1[i], vI1[i], I1_2);        // I1^2
-        gpu::multiply(vI1[i], vI2[i], I1_I2);       // I1 * I2
+        cuda::multiply(vI2[i], vI2[i], I2_2);        // I2^2
+        cuda::multiply(vI1[i], vI1[i], I1_2);        // I1^2
+        cuda::multiply(vI1[i], vI2[i], I1_I2);       // I1 * I2
 
         /*************************** END INITS **********************************/
-        gpu::GpuMat mu1, mu2;   // PRELIMINARY COMPUTING
+        cuda::GpuMat mu1, mu2;   // PRELIMINARY COMPUTING
         gauss->apply(vI1[i], mu1);
         gauss->apply(vI2[i], mu2);
 
-        gpu::GpuMat mu1_2, mu2_2, mu1_mu2;
-        gpu::multiply(mu1, mu1, mu1_2);
-        gpu::multiply(mu2, mu2, mu2_2);
-        gpu::multiply(mu1, mu2, mu1_mu2);
+        cuda::GpuMat mu1_2, mu2_2, mu1_mu2;
+        cuda::multiply(mu1, mu1, mu1_2);
+        cuda::multiply(mu2, mu2, mu2_2);
+        cuda::multiply(mu1, mu2, mu1_mu2);
 
-        gpu::GpuMat sigma1_2, sigma2_2, sigma12;
+        cuda::GpuMat sigma1_2, sigma2_2, sigma12;
 
         gauss->apply(I1_2, sigma1_2);
-        gpu::subtract(sigma1_2, mu1_2, sigma1_2); // sigma1_2 -= mu1_2;
+        cuda::subtract(sigma1_2, mu1_2, sigma1_2); // sigma1_2 -= mu1_2;
 
         gauss->apply(I2_2, sigma2_2);
-        gpu::subtract(sigma2_2, mu2_2, sigma2_2); // sigma2_2 -= mu2_2;
+        cuda::subtract(sigma2_2, mu2_2, sigma2_2); // sigma2_2 -= mu2_2;
 
         gauss->apply(I1_I2, sigma12);
-        gpu::subtract(sigma12, mu1_mu2, sigma12); // sigma12 -= mu1_mu2;
+        cuda::subtract(sigma12, mu1_mu2, sigma12); // sigma12 -= mu1_mu2;
 
         ///////////////////////////////// FORMULA ////////////////////////////////
-        gpu::GpuMat t1, t2, t3;
+        cuda::GpuMat t1, t2, t3;
 
         mu1_mu2.convertTo(t1, -1, 2, C1); // t1 = 2 * mu1_mu2 + C1;
         sigma12.convertTo(t2, -1, 2, C2); // t2 = 2 * sigma12 + C2;
-        gpu::multiply(t1, t2, t3);        // t3 = ((2*mu1_mu2 + C1).*(2*sigma12 + C2))
+        cuda::multiply(t1, t2, t3);        // t3 = ((2*mu1_mu2 + C1).*(2*sigma12 + C2))
 
-        gpu::addWeighted(mu1_2, 1.0, mu2_2, 1.0, C1, t1);       // t1 = mu1_2 + mu2_2 + C1;
-        gpu::addWeighted(sigma1_2, 1.0, sigma2_2, 1.0, C2, t2); // t2 = sigma1_2 + sigma2_2 + C2;
-        gpu::multiply(t1, t2, t1);                              // t1 =((mu1_2 + mu2_2 + C1).*(sigma1_2 + sigma2_2 + C2))
+        cuda::addWeighted(mu1_2, 1.0, mu2_2, 1.0, C1, t1);       // t1 = mu1_2 + mu2_2 + C1;
+        cuda::addWeighted(sigma1_2, 1.0, sigma2_2, 1.0, C2, t2); // t2 = sigma1_2 + sigma2_2 + C2;
+        cuda::multiply(t1, t2, t1);                              // t1 =((mu1_2 + mu2_2 + C1).*(sigma1_2 + sigma2_2 + C2))
 
-        gpu::GpuMat ssim_map;
-        gpu::divide(t3, t1, ssim_map);      // ssim_map =  t3./t1;
+        cuda::GpuMat ssim_map;
+        cuda::divide(t3, t1, ssim_map);      // ssim_map =  t3./t1;
 
-        Scalar s = gpu::sum(ssim_map);
+        Scalar s = cuda::sum(ssim_map);
         mssim.val[i] = s.val[0] / (ssim_map.rows * ssim_map.cols);
 
     }
@@ -368,63 +368,63 @@ Scalar getMSSIM_GPU_optimized( const Mat& i1, const Mat& i2, BufferMSSIM& b)
     b.gI1.upload(i1);
     b.gI2.upload(i2);
 
-    gpu::Stream stream;
+    cuda::Stream stream;
 
     b.gI1.convertTo(b.t1, CV_32F, stream);
     b.gI2.convertTo(b.t2, CV_32F, stream);
 
-    gpu::split(b.t1, b.vI1, stream);
-    gpu::split(b.t2, b.vI2, stream);
+    cuda::split(b.t1, b.vI1, stream);
+    cuda::split(b.t2, b.vI2, stream);
     Scalar mssim;
 
-    Ptr<gpu::Filter> gauss = gpu::createGaussianFilter(b.vI1[0].type(), -1, Size(11, 11), 1.5);
+    Ptr<cuda::Filter> gauss = cuda::createGaussianFilter(b.vI1[0].type(), -1, Size(11, 11), 1.5);
 
     for( int i = 0; i < b.gI1.channels(); ++i )
     {
-        gpu::multiply(b.vI2[i], b.vI2[i], b.I2_2, 1, -1, stream);        // I2^2
-        gpu::multiply(b.vI1[i], b.vI1[i], b.I1_2, 1, -1, stream);        // I1^2
-        gpu::multiply(b.vI1[i], b.vI2[i], b.I1_I2, 1, -1, stream);       // I1 * I2
+        cuda::multiply(b.vI2[i], b.vI2[i], b.I2_2, 1, -1, stream);        // I2^2
+        cuda::multiply(b.vI1[i], b.vI1[i], b.I1_2, 1, -1, stream);        // I1^2
+        cuda::multiply(b.vI1[i], b.vI2[i], b.I1_I2, 1, -1, stream);       // I1 * I2
 
         gauss->apply(b.vI1[i], b.mu1, stream);
         gauss->apply(b.vI2[i], b.mu2, stream);
 
-        gpu::multiply(b.mu1, b.mu1, b.mu1_2, 1, -1, stream);
-        gpu::multiply(b.mu2, b.mu2, b.mu2_2, 1, -1, stream);
-        gpu::multiply(b.mu1, b.mu2, b.mu1_mu2, 1, -1, stream);
+        cuda::multiply(b.mu1, b.mu1, b.mu1_2, 1, -1, stream);
+        cuda::multiply(b.mu2, b.mu2, b.mu2_2, 1, -1, stream);
+        cuda::multiply(b.mu1, b.mu2, b.mu1_mu2, 1, -1, stream);
 
         gauss->apply(b.I1_2, b.sigma1_2, stream);
-        gpu::subtract(b.sigma1_2, b.mu1_2, b.sigma1_2, gpu::GpuMat(), -1, stream);
+        cuda::subtract(b.sigma1_2, b.mu1_2, b.sigma1_2, cuda::GpuMat(), -1, stream);
         //b.sigma1_2 -= b.mu1_2;  - This would result in an extra data transfer operation
 
         gauss->apply(b.I2_2, b.sigma2_2, stream);
-        gpu::subtract(b.sigma2_2, b.mu2_2, b.sigma2_2, gpu::GpuMat(), -1, stream);
+        cuda::subtract(b.sigma2_2, b.mu2_2, b.sigma2_2, cuda::GpuMat(), -1, stream);
         //b.sigma2_2 -= b.mu2_2;
 
         gauss->apply(b.I1_I2, b.sigma12, stream);
-        gpu::subtract(b.sigma12, b.mu1_mu2, b.sigma12, gpu::GpuMat(), -1, stream);
+        cuda::subtract(b.sigma12, b.mu1_mu2, b.sigma12, cuda::GpuMat(), -1, stream);
         //b.sigma12 -= b.mu1_mu2;
 
         //here too it would be an extra data transfer due to call of operator*(Scalar, Mat)
-        gpu::multiply(b.mu1_mu2, 2, b.t1, 1, -1, stream); //b.t1 = 2 * b.mu1_mu2 + C1;
-        gpu::add(b.t1, C1, b.t1, gpu::GpuMat(), -1, stream);
-        gpu::multiply(b.sigma12, 2, b.t2, 1, -1, stream); //b.t2 = 2 * b.sigma12 + C2;
-        gpu::add(b.t2, C2, b.t2, gpu::GpuMat(), -12, stream);
+        cuda::multiply(b.mu1_mu2, 2, b.t1, 1, -1, stream); //b.t1 = 2 * b.mu1_mu2 + C1;
+        cuda::add(b.t1, C1, b.t1, cuda::GpuMat(), -1, stream);
+        cuda::multiply(b.sigma12, 2, b.t2, 1, -1, stream); //b.t2 = 2 * b.sigma12 + C2;
+        cuda::add(b.t2, C2, b.t2, cuda::GpuMat(), -12, stream);
 
-        gpu::multiply(b.t1, b.t2, b.t3, 1, -1, stream);     // t3 = ((2*mu1_mu2 + C1).*(2*sigma12 + C2))
+        cuda::multiply(b.t1, b.t2, b.t3, 1, -1, stream);     // t3 = ((2*mu1_mu2 + C1).*(2*sigma12 + C2))
 
-        gpu::add(b.mu1_2, b.mu2_2, b.t1, gpu::GpuMat(), -1, stream);
-        gpu::add(b.t1, C1, b.t1, gpu::GpuMat(), -1, stream);
+        cuda::add(b.mu1_2, b.mu2_2, b.t1, cuda::GpuMat(), -1, stream);
+        cuda::add(b.t1, C1, b.t1, cuda::GpuMat(), -1, stream);
 
-        gpu::add(b.sigma1_2, b.sigma2_2, b.t2, gpu::GpuMat(), -1, stream);
-        gpu::add(b.t2, C2, b.t2, gpu::GpuMat(), -1, stream);
+        cuda::add(b.sigma1_2, b.sigma2_2, b.t2, cuda::GpuMat(), -1, stream);
+        cuda::add(b.t2, C2, b.t2, cuda::GpuMat(), -1, stream);
 
 
-        gpu::multiply(b.t1, b.t2, b.t1, 1, -1, stream);     // t1 =((mu1_2 + mu2_2 + C1).*(sigma1_2 + sigma2_2 + C2))
-        gpu::divide(b.t3, b.t1, b.ssim_map, 1, -1, stream);      // ssim_map =  t3./t1;
+        cuda::multiply(b.t1, b.t2, b.t1, 1, -1, stream);     // t1 =((mu1_2 + mu2_2 + C1).*(sigma1_2 + sigma2_2 + C2))
+        cuda::divide(b.t3, b.t1, b.ssim_map, 1, -1, stream);      // ssim_map =  t3./t1;
 
         stream.waitForCompletion();
 
-        Scalar s = gpu::sum(b.ssim_map, b.buf);
+        Scalar s = cuda::sum(b.ssim_map, b.buf);
         mssim.val[i] = s.val[0] / (b.ssim_map.rows * b.ssim_map.cols);
 
     }
diff --git a/samples/cpp/videostab.cpp b/samples/cpp/videostab.cpp
index 21606d4950..f8f21dd146 100644
--- a/samples/cpp/videostab.cpp
+++ b/samples/cpp/videostab.cpp
@@ -347,7 +347,7 @@ int main(int argc, const char **argv)
         {
             cout << "initializing GPU..."; cout.flush();
             Mat hostTmp = Mat::zeros(1, 1, CV_32F);
-            gpu::GpuMat deviceTmp;
+            cuda::GpuMat deviceTmp;
             deviceTmp.upload(hostTmp);
             cout << endl;
         }
diff --git a/samples/gpu/alpha_comp.cpp b/samples/gpu/alpha_comp.cpp
index ca6937e1c2..1193b11900 100644
--- a/samples/gpu/alpha_comp.cpp
+++ b/samples/gpu/alpha_comp.cpp
@@ -6,7 +6,7 @@
 
 using namespace std;
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 int main()
 {
diff --git a/samples/gpu/bgfg_segm.cpp b/samples/gpu/bgfg_segm.cpp
index 1b8e53271c..28d190042d 100644
--- a/samples/gpu/bgfg_segm.cpp
+++ b/samples/gpu/bgfg_segm.cpp
@@ -14,7 +14,7 @@
 
 using namespace std;
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 enum Method
 {
@@ -75,10 +75,10 @@ int main(int argc, const char** argv)
 
     GpuMat d_frame(frame);
 
-    Ptr<BackgroundSubtractor> mog = gpu::createBackgroundSubtractorMOG();
-    Ptr<BackgroundSubtractor> mog2 = gpu::createBackgroundSubtractorMOG2();
-    Ptr<BackgroundSubtractor> gmg = gpu::createBackgroundSubtractorGMG(40);
-    Ptr<BackgroundSubtractor> fgd = gpu::createBackgroundSubtractorFGD();
+    Ptr<BackgroundSubtractor> mog = cuda::createBackgroundSubtractorMOG();
+    Ptr<BackgroundSubtractor> mog2 = cuda::createBackgroundSubtractorMOG2();
+    Ptr<BackgroundSubtractor> gmg = cuda::createBackgroundSubtractorGMG(40);
+    Ptr<BackgroundSubtractor> fgd = cuda::createBackgroundSubtractorFGD();
 
     GpuMat d_fgmask;
     GpuMat d_fgimg;
diff --git a/samples/gpu/brox_optical_flow.cpp b/samples/gpu/brox_optical_flow.cpp
index 1fb85c9038..b5522d5cd9 100644
--- a/samples/gpu/brox_optical_flow.cpp
+++ b/samples/gpu/brox_optical_flow.cpp
@@ -10,7 +10,7 @@
 
 using namespace std;
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 void getFlowField(const Mat& u, const Mat& v, Mat& flowField);
 
@@ -64,7 +64,7 @@ int main(int argc, const char* argv[])
             return -1;
         }
 
-        cv::gpu::printShortCudaDeviceInfo(cv::gpu::getDevice());
+        cv::cuda::printShortCudaDeviceInfo(cv::cuda::getDevice());
 
         cout << "OpenCV / NVIDIA Computer Vision" << endl;
         cout << "Optical Flow Demo: Frame Interpolation" << endl;
diff --git a/samples/gpu/cascadeclassifier.cpp b/samples/gpu/cascadeclassifier.cpp
index e27186c271..56b70a2eaa 100644
--- a/samples/gpu/cascadeclassifier.cpp
+++ b/samples/gpu/cascadeclassifier.cpp
@@ -14,7 +14,7 @@
 
 using namespace std;
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 
 static void help()
@@ -51,7 +51,7 @@ static void convertAndResize(const GpuMat& src, GpuMat& gray, GpuMat& resized, d
 {
     if (src.channels() == 3)
     {
-        cv::gpu::cvtColor( src, gray, COLOR_BGR2GRAY );
+        cv::cuda::cvtColor( src, gray, COLOR_BGR2GRAY );
     }
     else
     {
@@ -62,7 +62,7 @@ static void convertAndResize(const GpuMat& src, GpuMat& gray, GpuMat& resized, d
 
     if (scale != 1)
     {
-        cv::gpu::resize(gray, resized, sz);
+        cv::cuda::resize(gray, resized, sz);
     }
     else
     {
@@ -131,7 +131,7 @@ int main(int argc, const char *argv[])
         return cerr << "No GPU found or the library is compiled without GPU support" << endl, -1;
     }
 
-    cv::gpu::printShortCudaDeviceInfo(cv::gpu::getDevice());
+    cv::cuda::printShortCudaDeviceInfo(cv::cuda::getDevice());
 
     string cascadeName;
     string inputName;
diff --git a/samples/gpu/cascadeclassifier_nvidia_api.cpp b/samples/gpu/cascadeclassifier_nvidia_api.cpp
index 3b27de58c3..bd65271f93 100644
--- a/samples/gpu/cascadeclassifier_nvidia_api.cpp
+++ b/samples/gpu/cascadeclassifier_nvidia_api.cpp
@@ -162,10 +162,10 @@ int main(int argc, const char** argv)
     cout << "Syntax: exename <cascade_file> <image_or_video_or_cameraid>" << endl;
     cout << "=========================================" << endl;
 
-    ncvAssertPrintReturn(cv::gpu::getCudaEnabledDeviceCount() != 0, "No GPU found or the library is compiled without GPU support", -1);
+    ncvAssertPrintReturn(cv::cuda::getCudaEnabledDeviceCount() != 0, "No GPU found or the library is compiled without GPU support", -1);
     ncvAssertPrintReturn(argc == 3, "Invalid number of arguments", -1);
 
-    cv::gpu::printShortCudaDeviceInfo(cv::gpu::getDevice());
+    cv::cuda::printShortCudaDeviceInfo(cv::cuda::getDevice());
 
     string cascadeName = argv[1];
     string inputName = argv[2];
diff --git a/samples/gpu/driver_api_multi.cpp b/samples/gpu/driver_api_multi.cpp
index e78f7e54fd..ee6de7765e 100644
--- a/samples/gpu/driver_api_multi.cpp
+++ b/samples/gpu/driver_api_multi.cpp
@@ -49,7 +49,7 @@ int main()
 
 using namespace std;
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 struct Worker { void operator()(int device_id) const; };
 void destroyContexts();
@@ -80,7 +80,7 @@ int main()
 
     for (int i = 0; i < num_devices; ++i)
     {
-        cv::gpu::printShortCudaDeviceInfo(i);
+        cv::cuda::printShortCudaDeviceInfo(i);
 
         DeviceInfo dev_info(i);
         if (!dev_info.isCompatible())
@@ -135,7 +135,7 @@ void Worker::operator()(int device_id) const
     // GPU works
     GpuMat d_src(src);
     GpuMat d_dst;
-    gpu::transpose(d_src, d_dst);
+    cuda::transpose(d_src, d_dst);
 
     // Check results
     bool passed = cv::norm(dst - Mat(d_dst), NORM_INF) < 1e-3;
diff --git a/samples/gpu/driver_api_stereo_multi.cpp b/samples/gpu/driver_api_stereo_multi.cpp
index d40c20c1e9..0e5fe8a992 100644
--- a/samples/gpu/driver_api_stereo_multi.cpp
+++ b/samples/gpu/driver_api_stereo_multi.cpp
@@ -51,7 +51,7 @@ int main()
 
 using namespace std;
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 struct Worker { void operator()(int device_id) const; };
 void destroyContexts();
@@ -85,7 +85,7 @@ void inline contextOff()
 // GPUs data
 GpuMat d_left[2];
 GpuMat d_right[2];
-Ptr<gpu::StereoBM> bm[2];
+Ptr<cuda::StereoBM> bm[2];
 GpuMat d_result[2];
 
 static void printHelp()
@@ -110,7 +110,7 @@ int main(int argc, char** argv)
 
     for (int i = 0; i < num_devices; ++i)
     {
-        cv::gpu::printShortCudaDeviceInfo(i);
+        cv::cuda::printShortCudaDeviceInfo(i);
 
         DeviceInfo dev_info(i);
         if (!dev_info.isCompatible())
@@ -162,14 +162,14 @@ int main(int argc, char** argv)
     contextOn(0);
     d_left[0].upload(left.rowRange(0, left.rows / 2));
     d_right[0].upload(right.rowRange(0, right.rows / 2));
-    bm[0] = gpu::createStereoBM();
+    bm[0] = cuda::createStereoBM();
     contextOff();
 
     // Split source images for processing on the GPU #1
     contextOn(1);
     d_left[1].upload(left.rowRange(left.rows / 2, left.rows));
     d_right[1].upload(right.rowRange(right.rows / 2, right.rows));
-    bm[1] = gpu::createStereoBM();
+    bm[1] = cuda::createStereoBM();
     contextOff();
 
     // Execute calculation in two threads using two GPUs
diff --git a/samples/gpu/farneback_optical_flow.cpp b/samples/gpu/farneback_optical_flow.cpp
index c2a5d411e4..c47d7a2866 100644
--- a/samples/gpu/farneback_optical_flow.cpp
+++ b/samples/gpu/farneback_optical_flow.cpp
@@ -10,7 +10,7 @@
 
 using namespace std;
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 template <typename T>
 inline T mapVal(T x, T a, T b, T c, T d)
diff --git a/samples/gpu/generalized_hough.cpp b/samples/gpu/generalized_hough.cpp
index 1863085256..ff91c7cc23 100644
--- a/samples/gpu/generalized_hough.cpp
+++ b/samples/gpu/generalized_hough.cpp
@@ -87,7 +87,7 @@ int main(int argc, const char* argv[])
 
     if (!full)
     {
-        Ptr<GeneralizedHoughBallard> ballard = useGpu ? gpu::createGeneralizedHoughBallard() : createGeneralizedHoughBallard();
+        Ptr<GeneralizedHoughBallard> ballard = useGpu ? cuda::createGeneralizedHoughBallard() : createGeneralizedHoughBallard();
 
         ballard->setMinDist(minDist);
         ballard->setLevels(levels);
@@ -99,7 +99,7 @@ int main(int argc, const char* argv[])
     }
     else
     {
-        Ptr<GeneralizedHoughGuil> guil = useGpu ? gpu::createGeneralizedHoughGuil() : createGeneralizedHoughGuil();
+        Ptr<GeneralizedHoughGuil> guil = useGpu ? cuda::createGeneralizedHoughGuil() : createGeneralizedHoughGuil();
 
         guil->setMinDist(minDist);
         guil->setLevels(levels);
@@ -126,9 +126,9 @@ int main(int argc, const char* argv[])
 
     if (useGpu)
     {
-        gpu::GpuMat d_templ(templ);
-        gpu::GpuMat d_image(image);
-        gpu::GpuMat d_position;
+        cuda::GpuMat d_templ(templ);
+        cuda::GpuMat d_image(image);
+        cuda::GpuMat d_position;
 
         alg->setTemplate(d_templ);
 
diff --git a/samples/gpu/hog.cpp b/samples/gpu/hog.cpp
index c90cc23335..52afd64592 100644
--- a/samples/gpu/hog.cpp
+++ b/samples/gpu/hog.cpp
@@ -194,7 +194,7 @@ Args Args::read(int argc, char** argv)
 
 App::App(const Args& s)
 {
-    cv::gpu::printShortCudaDeviceInfo(cv::gpu::getDevice());
+    cv::cuda::printShortCudaDeviceInfo(cv::cuda::getDevice());
 
     args = s;
     cout << "\nControls:\n"
@@ -246,13 +246,13 @@ void App::run()
     // Create HOG descriptors and detectors here
     vector<float> detector;
     if (win_size == Size(64, 128))
-        detector = cv::gpu::HOGDescriptor::getPeopleDetector64x128();
+        detector = cv::cuda::HOGDescriptor::getPeopleDetector64x128();
     else
-        detector = cv::gpu::HOGDescriptor::getPeopleDetector48x96();
+        detector = cv::cuda::HOGDescriptor::getPeopleDetector48x96();
 
-    cv::gpu::HOGDescriptor gpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9,
-                                   cv::gpu::HOGDescriptor::DEFAULT_WIN_SIGMA, 0.2, gamma_corr,
-                                   cv::gpu::HOGDescriptor::DEFAULT_NLEVELS);
+    cv::cuda::HOGDescriptor gpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9,
+                                   cv::cuda::HOGDescriptor::DEFAULT_WIN_SIGMA, 0.2, gamma_corr,
+                                   cv::cuda::HOGDescriptor::DEFAULT_NLEVELS);
     cv::HOGDescriptor cpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9, 1, -1,
                               HOGDescriptor::L2Hys, 0.2, gamma_corr, cv::HOGDescriptor::DEFAULT_NLEVELS);
     gpu_hog.setSVMDetector(detector);
@@ -289,7 +289,7 @@ void App::run()
         }
 
         Mat img_aux, img, img_to_show;
-        gpu::GpuMat gpu_img;
+        cuda::GpuMat gpu_img;
 
         // Iterate over all frames
         while (running && !frame.empty())
diff --git a/samples/gpu/houghlines.cpp b/samples/gpu/houghlines.cpp
index 14245e5fd9..f7c7e210ec 100644
--- a/samples/gpu/houghlines.cpp
+++ b/samples/gpu/houghlines.cpp
@@ -9,7 +9,7 @@
 
 using namespace std;
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 static void help()
 {
@@ -59,7 +59,7 @@ int main(int argc, const char* argv[])
     {
         const int64 start = getTickCount();
 
-        Ptr<gpu::HoughSegmentDetector> hough = gpu::createHoughSegmentDetector(1.0f, (float) (CV_PI / 180.0f), 50, 5);
+        Ptr<cuda::HoughSegmentDetector> hough = cuda::createHoughSegmentDetector(1.0f, (float) (CV_PI / 180.0f), 50, 5);
 
         hough->detect(d_src, d_lines);
 
diff --git a/samples/gpu/morphology.cpp b/samples/gpu/morphology.cpp
index abc6c980b0..6cae765ae5 100644
--- a/samples/gpu/morphology.cpp
+++ b/samples/gpu/morphology.cpp
@@ -24,7 +24,7 @@ private:
     static void OpenCloseCallback(int, void*);
     static void ErodeDilateCallback(int, void*);
 
-    gpu::GpuMat src, dst;
+    cuda::GpuMat src, dst;
 
     int element_shape;
 
@@ -57,14 +57,14 @@ App::App(int argc, const char* argv[])
     if (src.channels() == 3)
     {
         // gpu support only 4th channel images
-        gpu::GpuMat src4ch;
-        gpu::cvtColor(src, src4ch, COLOR_BGR2BGRA);
+        cuda::GpuMat src4ch;
+        cuda::cvtColor(src, src4ch, COLOR_BGR2BGRA);
         src = src4ch;
     }
 
     help();
 
-    gpu::printShortCudaDeviceInfo(gpu::getDevice());
+    cuda::printShortCudaDeviceInfo(cuda::getDevice());
 }
 
 int App::run()
@@ -132,12 +132,12 @@ void App::OpenClose()
 
     if (n < 0)
     {
-        Ptr<gpu::Filter> openFilter = gpu::createMorphologyFilter(MORPH_OPEN, src.type(), element);
+        Ptr<cuda::Filter> openFilter = cuda::createMorphologyFilter(MORPH_OPEN, src.type(), element);
         openFilter->apply(src, dst);
     }
     else
     {
-        Ptr<gpu::Filter> closeFilter = gpu::createMorphologyFilter(MORPH_CLOSE, src.type(), element);
+        Ptr<cuda::Filter> closeFilter = cuda::createMorphologyFilter(MORPH_CLOSE, src.type(), element);
         closeFilter->apply(src, dst);
     }
 
@@ -154,12 +154,12 @@ void App::ErodeDilate()
 
     if (n < 0)
     {
-        Ptr<gpu::Filter> erodeFilter = gpu::createMorphologyFilter(MORPH_ERODE, src.type(), element);
+        Ptr<cuda::Filter> erodeFilter = cuda::createMorphologyFilter(MORPH_ERODE, src.type(), element);
         erodeFilter->apply(src, dst);
     }
     else
     {
-        Ptr<gpu::Filter> dilateFilter = gpu::createMorphologyFilter(MORPH_DILATE, src.type(), element);
+        Ptr<cuda::Filter> dilateFilter = cuda::createMorphologyFilter(MORPH_DILATE, src.type(), element);
         dilateFilter->apply(src, dst);
     }
 
diff --git a/samples/gpu/multi.cpp b/samples/gpu/multi.cpp
index b83fd2ce46..f6c9e01560 100644
--- a/samples/gpu/multi.cpp
+++ b/samples/gpu/multi.cpp
@@ -42,7 +42,7 @@ int main()
 
 using namespace std;
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 struct Worker { void operator()(int device_id) const; };
 
@@ -56,7 +56,7 @@ int main()
     }
     for (int i = 0; i < num_devices; ++i)
     {
-        cv::gpu::printShortCudaDeviceInfo(i);
+        cv::cuda::printShortCudaDeviceInfo(i);
 
         DeviceInfo dev_info(i);
         if (!dev_info.isCompatible())
@@ -92,7 +92,7 @@ void Worker::operator()(int device_id) const
     // GPU works
     GpuMat d_src(src);
     GpuMat d_dst;
-    gpu::transpose(d_src, d_dst);
+    cuda::transpose(d_src, d_dst);
 
     // Check results
     bool passed = cv::norm(dst - Mat(d_dst), NORM_INF) < 1e-3;
diff --git a/samples/gpu/opengl.cpp b/samples/gpu/opengl.cpp
index 37960d9a9f..e2c83c6926 100644
--- a/samples/gpu/opengl.cpp
+++ b/samples/gpu/opengl.cpp
@@ -30,7 +30,7 @@ int main()
 
 using namespace std;
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 const int win_width = 800;
 const int win_height = 640;
diff --git a/samples/gpu/optical_flow.cpp b/samples/gpu/optical_flow.cpp
index cae00fcc35..d0b736a124 100644
--- a/samples/gpu/optical_flow.cpp
+++ b/samples/gpu/optical_flow.cpp
@@ -8,7 +8,7 @@
 
 using namespace std;
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 inline bool isFlowCorrect(Point2f u)
 {
diff --git a/samples/gpu/opticalflow_nvidia_api.cpp b/samples/gpu/opticalflow_nvidia_api.cpp
index 4e0863b46a..91939fd22a 100644
--- a/samples/gpu/opticalflow_nvidia_api.cpp
+++ b/samples/gpu/opticalflow_nvidia_api.cpp
@@ -393,7 +393,7 @@ int main(int argc, char **argv)
         return result;
     }
 
-    cv::gpu::printShortCudaDeviceInfo(cv::gpu::getDevice());
+    cv::cuda::printShortCudaDeviceInfo(cv::cuda::getDevice());
 
     std::cout << "OpenCV / NVIDIA Computer Vision\n";
     std::cout << "Optical Flow Demo: Frame Interpolation\n";
diff --git a/samples/gpu/performance/performance.cpp b/samples/gpu/performance/performance.cpp
index 8af0b3d0d4..66232be8b1 100644
--- a/samples/gpu/performance/performance.cpp
+++ b/samples/gpu/performance/performance.cpp
@@ -5,7 +5,7 @@
 
 using namespace std;
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 void TestSystem::run()
 {
diff --git a/samples/gpu/performance/tests.cpp b/samples/gpu/performance/tests.cpp
index 136a4d52a9..47569a2098 100644
--- a/samples/gpu/performance/tests.cpp
+++ b/samples/gpu/performance/tests.cpp
@@ -23,9 +23,9 @@ TEST(matchTemplate)
     Mat src, templ, dst;
     gen(src, 3000, 3000, CV_32F, 0, 1);
 
-    gpu::GpuMat d_src(src), d_templ, d_dst;
+    cuda::GpuMat d_src(src), d_templ, d_dst;
 
-    Ptr<gpu::TemplateMatching> alg = gpu::createTemplateMatching(src.type(), TM_CCORR);
+    Ptr<cuda::TemplateMatching> alg = cuda::createTemplateMatching(src.type(), TM_CCORR);
 
     for (int templ_size = 5; templ_size < 200; templ_size *= 5)
     {
@@ -51,7 +51,7 @@ TEST(matchTemplate)
 TEST(minMaxLoc)
 {
     Mat src;
-    gpu::GpuMat d_src;
+    cuda::GpuMat d_src;
 
     double min_val, max_val;
     Point min_loc, max_loc;
@@ -69,7 +69,7 @@ TEST(minMaxLoc)
         d_src.upload(src);
 
         GPU_ON;
-        gpu::minMaxLoc(d_src, &min_val, &max_val, &min_loc, &max_loc);
+        cuda::minMaxLoc(d_src, &min_val, &max_val, &min_loc, &max_loc);
         GPU_OFF;
     }
 }
@@ -78,7 +78,7 @@ TEST(minMaxLoc)
 TEST(remap)
 {
     Mat src, dst, xmap, ymap;
-    gpu::GpuMat d_src, d_dst, d_xmap, d_ymap;
+    cuda::GpuMat d_src, d_dst, d_xmap, d_ymap;
 
     int interpolation = INTER_LINEAR;
     int borderMode = BORDER_REPLICATE;
@@ -112,10 +112,10 @@ TEST(remap)
         d_xmap.upload(xmap);
         d_ymap.upload(ymap);
 
-        gpu::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode);
+        cuda::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode);
 
         GPU_ON;
-        gpu::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode);
+        cuda::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode);
         GPU_OFF;
     }
 }
@@ -124,7 +124,7 @@ TEST(remap)
 TEST(dft)
 {
     Mat src, dst;
-    gpu::GpuMat d_src, d_dst;
+    cuda::GpuMat d_src, d_dst;
 
     for (int size = 1000; size <= 4000; size *= 2)
     {
@@ -140,10 +140,10 @@ TEST(dft)
 
         d_src.upload(src);
 
-        gpu::dft(d_src, d_dst, Size(size, size));
+        cuda::dft(d_src, d_dst, Size(size, size));
 
         GPU_ON;
-        gpu::dft(d_src, d_dst, Size(size, size));
+        cuda::dft(d_src, d_dst, Size(size, size));
         GPU_OFF;
     }
 }
@@ -152,7 +152,7 @@ TEST(dft)
 TEST(cornerHarris)
 {
     Mat src, dst;
-    gpu::GpuMat d_src, d_dst;
+    cuda::GpuMat d_src, d_dst;
 
     for (int size = 1000; size <= 4000; size *= 2)
     {
@@ -168,7 +168,7 @@ TEST(cornerHarris)
 
         d_src.upload(src);
 
-        Ptr<gpu::CornernessCriteria> harris = gpu::createHarrisCorner(src.type(), 5, 7, 0.1, BORDER_REFLECT101);
+        Ptr<cuda::CornernessCriteria> harris = cuda::createHarrisCorner(src.type(), 5, 7, 0.1, BORDER_REFLECT101);
 
         harris->compute(d_src, d_dst);
 
@@ -182,7 +182,7 @@ TEST(cornerHarris)
 TEST(integral)
 {
     Mat src, sum;
-    gpu::GpuMat d_src, d_sum, d_buf;
+    cuda::GpuMat d_src, d_sum, d_buf;
 
     for (int size = 1000; size <= 4000; size *= 2)
     {
@@ -198,10 +198,10 @@ TEST(integral)
 
         d_src.upload(src);
 
-        gpu::integralBuffered(d_src, d_sum, d_buf);
+        cuda::integralBuffered(d_src, d_sum, d_buf);
 
         GPU_ON;
-        gpu::integralBuffered(d_src, d_sum, d_buf);
+        cuda::integralBuffered(d_src, d_sum, d_buf);
         GPU_OFF;
     }
 }
@@ -210,7 +210,7 @@ TEST(integral)
 TEST(norm)
 {
     Mat src;
-    gpu::GpuMat d_src, d_buf;
+    cuda::GpuMat d_src, d_buf;
 
     for (int size = 2000; size <= 4000; size += 1000)
     {
@@ -226,10 +226,10 @@ TEST(norm)
 
         d_src.upload(src);
 
-        gpu::norm(d_src, NORM_INF, d_buf);
+        cuda::norm(d_src, NORM_INF, d_buf);
 
         GPU_ON;
-        gpu::norm(d_src, NORM_INF, d_buf);
+        cuda::norm(d_src, NORM_INF, d_buf);
         GPU_OFF;
     }
 }
@@ -240,7 +240,7 @@ TEST(meanShift)
     int sp = 10, sr = 10;
 
     Mat src, dst;
-    gpu::GpuMat d_src, d_dst;
+    cuda::GpuMat d_src, d_dst;
 
     for (int size = 400; size <= 800; size *= 2)
     {
@@ -258,10 +258,10 @@ TEST(meanShift)
 
         d_src.upload(src);
 
-        gpu::meanShiftFiltering(d_src, d_dst, sp, sr);
+        cuda::meanShiftFiltering(d_src, d_dst, sp, sr);
 
         GPU_ON;
-        gpu::meanShiftFiltering(d_src, d_dst, sp, sr);
+        cuda::meanShiftFiltering(d_src, d_dst, sp, sr);
         GPU_OFF;
     }
 }
@@ -283,15 +283,15 @@ TEST(SURF)
     surf(src, Mat(), keypoints, descriptors);
     CPU_OFF;
 
-    gpu::SURF_GPU d_surf;
-    gpu::GpuMat d_src(src);
-    gpu::GpuMat d_keypoints;
-    gpu::GpuMat d_descriptors;
+    cuda::SURF_GPU d_surf;
+    cuda::GpuMat d_src(src);
+    cuda::GpuMat d_keypoints;
+    cuda::GpuMat d_descriptors;
 
-    d_surf(d_src, gpu::GpuMat(), d_keypoints, d_descriptors);
+    d_surf(d_src, cuda::GpuMat(), d_keypoints, d_descriptors);
 
     GPU_ON;
-    d_surf(d_src, gpu::GpuMat(), d_keypoints, d_descriptors);
+    d_surf(d_src, cuda::GpuMat(), d_keypoints, d_descriptors);
     GPU_OFF;
 }
 
@@ -311,14 +311,14 @@ TEST(FAST)
     FAST(src, keypoints, 20);
     CPU_OFF;
 
-    gpu::FAST_GPU d_FAST(20);
-    gpu::GpuMat d_src(src);
-    gpu::GpuMat d_keypoints;
+    cuda::FAST_GPU d_FAST(20);
+    cuda::GpuMat d_src(src);
+    cuda::GpuMat d_keypoints;
 
-    d_FAST(d_src, gpu::GpuMat(), d_keypoints);
+    d_FAST(d_src, cuda::GpuMat(), d_keypoints);
 
     GPU_ON;
-    d_FAST(d_src, gpu::GpuMat(), d_keypoints);
+    d_FAST(d_src, cuda::GpuMat(), d_keypoints);
     GPU_OFF;
 }
 
@@ -338,15 +338,15 @@ TEST(ORB)
     orb(src, Mat(), keypoints, descriptors);
     CPU_OFF;
 
-    gpu::ORB_GPU d_orb;
-    gpu::GpuMat d_src(src);
-    gpu::GpuMat d_keypoints;
-    gpu::GpuMat d_descriptors;
+    cuda::ORB_GPU d_orb;
+    cuda::GpuMat d_src(src);
+    cuda::GpuMat d_keypoints;
+    cuda::GpuMat d_descriptors;
 
-    d_orb(d_src, gpu::GpuMat(), d_keypoints, d_descriptors);
+    d_orb(d_src, cuda::GpuMat(), d_keypoints, d_descriptors);
 
     GPU_ON;
-    d_orb(d_src, gpu::GpuMat(), d_keypoints, d_descriptors);
+    d_orb(d_src, cuda::GpuMat(), d_keypoints, d_descriptors);
     GPU_OFF;
 }
 
@@ -367,14 +367,14 @@ TEST(BruteForceMatcher)
 
     // Init GPU matcher
 
-    gpu::BFMatcher_GPU d_matcher(NORM_L2);
+    cuda::BFMatcher_GPU d_matcher(NORM_L2);
 
-    gpu::GpuMat d_query(query);
-    gpu::GpuMat d_train(train);
+    cuda::GpuMat d_query(query);
+    cuda::GpuMat d_train(train);
 
     // Output
     vector< vector<DMatch> > matches(2);
-    gpu::GpuMat d_trainIdx, d_distance, d_allDist, d_nMatches;
+    cuda::GpuMat d_trainIdx, d_distance, d_allDist, d_nMatches;
 
     SUBTEST << "match";
 
@@ -427,7 +427,7 @@ TEST(BruteForceMatcher)
 TEST(magnitude)
 {
     Mat x, y, mag;
-    gpu::GpuMat d_x, d_y, d_mag;
+    cuda::GpuMat d_x, d_y, d_mag;
 
     for (int size = 2000; size <= 4000; size += 1000)
     {
@@ -445,10 +445,10 @@ TEST(magnitude)
         d_x.upload(x);
         d_y.upload(y);
 
-        gpu::magnitude(d_x, d_y, d_mag);
+        cuda::magnitude(d_x, d_y, d_mag);
 
         GPU_ON;
-        gpu::magnitude(d_x, d_y, d_mag);
+        cuda::magnitude(d_x, d_y, d_mag);
         GPU_OFF;
     }
 }
@@ -457,7 +457,7 @@ TEST(magnitude)
 TEST(add)
 {
     Mat src1, src2, dst;
-    gpu::GpuMat d_src1, d_src2, d_dst;
+    cuda::GpuMat d_src1, d_src2, d_dst;
 
     for (int size = 2000; size <= 4000; size += 1000)
     {
@@ -475,10 +475,10 @@ TEST(add)
         d_src1.upload(src1);
         d_src2.upload(src2);
 
-        gpu::add(d_src1, d_src2, d_dst);
+        cuda::add(d_src1, d_src2, d_dst);
 
         GPU_ON;
-        gpu::add(d_src1, d_src2, d_dst);
+        cuda::add(d_src1, d_src2, d_dst);
         GPU_OFF;
     }
 }
@@ -487,7 +487,7 @@ TEST(add)
 TEST(log)
 {
     Mat src, dst;
-    gpu::GpuMat d_src, d_dst;
+    cuda::GpuMat d_src, d_dst;
 
     for (int size = 2000; size <= 4000; size += 1000)
     {
@@ -503,10 +503,10 @@ TEST(log)
 
         d_src.upload(src);
 
-        gpu::log(d_src, d_dst);
+        cuda::log(d_src, d_dst);
 
         GPU_ON;
-        gpu::log(d_src, d_dst);
+        cuda::log(d_src, d_dst);
         GPU_OFF;
     }
 }
@@ -515,7 +515,7 @@ TEST(log)
 TEST(mulSpectrums)
 {
     Mat src1, src2, dst;
-    gpu::GpuMat d_src1, d_src2, d_dst;
+    cuda::GpuMat d_src1, d_src2, d_dst;
 
     for (int size = 2000; size <= 4000; size += 1000)
     {
@@ -533,10 +533,10 @@ TEST(mulSpectrums)
         d_src1.upload(src1);
         d_src2.upload(src2);
 
-        gpu::mulSpectrums(d_src1, d_src2, d_dst, 0, true);
+        cuda::mulSpectrums(d_src1, d_src2, d_dst, 0, true);
 
         GPU_ON;
-        gpu::mulSpectrums(d_src1, d_src2, d_dst, 0, true);
+        cuda::mulSpectrums(d_src1, d_src2, d_dst, 0, true);
         GPU_OFF;
     }
 }
@@ -545,7 +545,7 @@ TEST(mulSpectrums)
 TEST(resize)
 {
     Mat src, dst;
-    gpu::GpuMat d_src, d_dst;
+    cuda::GpuMat d_src, d_dst;
 
     for (int size = 1000; size <= 3000; size += 1000)
     {
@@ -561,10 +561,10 @@ TEST(resize)
 
         d_src.upload(src);
 
-        gpu::resize(d_src, d_dst, Size(), 2.0, 2.0);
+        cuda::resize(d_src, d_dst, Size(), 2.0, 2.0);
 
         GPU_ON;
-        gpu::resize(d_src, d_dst, Size(), 2.0, 2.0);
+        cuda::resize(d_src, d_dst, Size(), 2.0, 2.0);
         GPU_OFF;
     }
 
@@ -582,10 +582,10 @@ TEST(resize)
 
         d_src.upload(src);
 
-        gpu::resize(d_src, d_dst, Size(), 0.5, 0.5);
+        cuda::resize(d_src, d_dst, Size(), 0.5, 0.5);
 
         GPU_ON;
-        gpu::resize(d_src, d_dst, Size(), 0.5, 0.5);
+        cuda::resize(d_src, d_dst, Size(), 0.5, 0.5);
         GPU_OFF;
     }
 }
@@ -594,7 +594,7 @@ TEST(resize)
 TEST(cvtColor)
 {
     Mat src, dst;
-    gpu::GpuMat d_src, d_dst;
+    cuda::GpuMat d_src, d_dst;
 
     gen(src, 4000, 4000, CV_8UC1, 0, 255);
     d_src.upload(src);
@@ -607,10 +607,10 @@ TEST(cvtColor)
     cvtColor(src, dst, COLOR_GRAY2BGRA, 4);
     CPU_OFF;
 
-    gpu::cvtColor(d_src, d_dst, COLOR_GRAY2BGRA, 4);
+    cuda::cvtColor(d_src, d_dst, COLOR_GRAY2BGRA, 4);
 
     GPU_ON;
-    gpu::cvtColor(d_src, d_dst, COLOR_GRAY2BGRA, 4);
+    cuda::cvtColor(d_src, d_dst, COLOR_GRAY2BGRA, 4);
     GPU_OFF;
 
     cv::swap(src, dst);
@@ -624,10 +624,10 @@ TEST(cvtColor)
     cvtColor(src, dst, COLOR_BGR2YCrCb);
     CPU_OFF;
 
-    gpu::cvtColor(d_src, d_dst, COLOR_BGR2YCrCb, 4);
+    cuda::cvtColor(d_src, d_dst, COLOR_BGR2YCrCb, 4);
 
     GPU_ON;
-    gpu::cvtColor(d_src, d_dst, COLOR_BGR2YCrCb, 4);
+    cuda::cvtColor(d_src, d_dst, COLOR_BGR2YCrCb, 4);
     GPU_OFF;
 
     cv::swap(src, dst);
@@ -641,10 +641,10 @@ TEST(cvtColor)
     cvtColor(src, dst, COLOR_YCrCb2BGR, 4);
     CPU_OFF;
 
-    gpu::cvtColor(d_src, d_dst, COLOR_YCrCb2BGR, 4);
+    cuda::cvtColor(d_src, d_dst, COLOR_YCrCb2BGR, 4);
 
     GPU_ON;
-    gpu::cvtColor(d_src, d_dst, COLOR_YCrCb2BGR, 4);
+    cuda::cvtColor(d_src, d_dst, COLOR_YCrCb2BGR, 4);
     GPU_OFF;
 
     cv::swap(src, dst);
@@ -658,10 +658,10 @@ TEST(cvtColor)
     cvtColor(src, dst, COLOR_BGR2XYZ);
     CPU_OFF;
 
-    gpu::cvtColor(d_src, d_dst, COLOR_BGR2XYZ, 4);
+    cuda::cvtColor(d_src, d_dst, COLOR_BGR2XYZ, 4);
 
     GPU_ON;
-    gpu::cvtColor(d_src, d_dst, COLOR_BGR2XYZ, 4);
+    cuda::cvtColor(d_src, d_dst, COLOR_BGR2XYZ, 4);
     GPU_OFF;
 
     cv::swap(src, dst);
@@ -675,10 +675,10 @@ TEST(cvtColor)
     cvtColor(src, dst, COLOR_XYZ2BGR, 4);
     CPU_OFF;
 
-    gpu::cvtColor(d_src, d_dst, COLOR_XYZ2BGR, 4);
+    cuda::cvtColor(d_src, d_dst, COLOR_XYZ2BGR, 4);
 
     GPU_ON;
-    gpu::cvtColor(d_src, d_dst, COLOR_XYZ2BGR, 4);
+    cuda::cvtColor(d_src, d_dst, COLOR_XYZ2BGR, 4);
     GPU_OFF;
 
     cv::swap(src, dst);
@@ -692,10 +692,10 @@ TEST(cvtColor)
     cvtColor(src, dst, COLOR_BGR2HSV);
     CPU_OFF;
 
-    gpu::cvtColor(d_src, d_dst, COLOR_BGR2HSV, 4);
+    cuda::cvtColor(d_src, d_dst, COLOR_BGR2HSV, 4);
 
     GPU_ON;
-    gpu::cvtColor(d_src, d_dst, COLOR_BGR2HSV, 4);
+    cuda::cvtColor(d_src, d_dst, COLOR_BGR2HSV, 4);
     GPU_OFF;
 
     cv::swap(src, dst);
@@ -709,10 +709,10 @@ TEST(cvtColor)
     cvtColor(src, dst, COLOR_HSV2BGR, 4);
     CPU_OFF;
 
-    gpu::cvtColor(d_src, d_dst, COLOR_HSV2BGR, 4);
+    cuda::cvtColor(d_src, d_dst, COLOR_HSV2BGR, 4);
 
     GPU_ON;
-    gpu::cvtColor(d_src, d_dst, COLOR_HSV2BGR, 4);
+    cuda::cvtColor(d_src, d_dst, COLOR_HSV2BGR, 4);
     GPU_OFF;
 
     cv::swap(src, dst);
@@ -723,7 +723,7 @@ TEST(cvtColor)
 TEST(erode)
 {
     Mat src, dst, ker;
-    gpu::GpuMat d_src, d_buf, d_dst;
+    cuda::GpuMat d_src, d_buf, d_dst;
 
     for (int size = 2000; size <= 4000; size += 1000)
     {
@@ -740,7 +740,7 @@ TEST(erode)
 
         d_src.upload(src);
 
-        Ptr<gpu::Filter> erode = gpu::createMorphologyFilter(MORPH_ERODE, d_src.type(), ker);
+        Ptr<cuda::Filter> erode = cuda::createMorphologyFilter(MORPH_ERODE, d_src.type(), ker);
 
         erode->apply(d_src, d_dst);
 
@@ -753,7 +753,7 @@ TEST(erode)
 TEST(threshold)
 {
     Mat src, dst;
-    gpu::GpuMat d_src, d_dst;
+    cuda::GpuMat d_src, d_dst;
 
     for (int size = 2000; size <= 4000; size += 1000)
     {
@@ -769,10 +769,10 @@ TEST(threshold)
 
         d_src.upload(src);
 
-        gpu::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY);
+        cuda::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY);
 
         GPU_ON;
-        gpu::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY);
+        cuda::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY);
         GPU_OFF;
     }
 
@@ -790,10 +790,10 @@ TEST(threshold)
 
         d_src.upload(src);
 
-        gpu::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC);
+        cuda::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC);
 
         GPU_ON;
-        gpu::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC);
+        cuda::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC);
         GPU_OFF;
     }
 }
@@ -801,7 +801,7 @@ TEST(threshold)
 TEST(pow)
 {
     Mat src, dst;
-    gpu::GpuMat d_src, d_dst;
+    cuda::GpuMat d_src, d_dst;
 
     for (int size = 1000; size <= 4000; size += 1000)
     {
@@ -817,10 +817,10 @@ TEST(pow)
 
         d_src.upload(src);
 
-        gpu::pow(d_src, -2.0, d_dst);
+        cuda::pow(d_src, -2.0, d_dst);
 
         GPU_ON;
-        gpu::pow(d_src, -2.0, d_dst);
+        cuda::pow(d_src, -2.0, d_dst);
         GPU_OFF;
     }
 }
@@ -830,7 +830,7 @@ TEST(projectPoints)
 {
     Mat src;
     vector<Point2f> dst;
-    gpu::GpuMat d_src, d_dst;
+    cuda::GpuMat d_src, d_dst;
 
     Mat rvec; gen(rvec, 1, 3, CV_32F, 0, 1);
     Mat tvec; gen(tvec, 1, 3, CV_32F, 0, 1);
@@ -854,10 +854,10 @@ TEST(projectPoints)
 
         d_src.upload(src);
 
-        gpu::projectPoints(d_src, rvec, tvec, camera_mat, Mat(), d_dst);
+        cuda::projectPoints(d_src, rvec, tvec, camera_mat, Mat(), d_dst);
 
         GPU_ON;
-        gpu::projectPoints(d_src, rvec, tvec, camera_mat, Mat(), d_dst);
+        cuda::projectPoints(d_src, rvec, tvec, camera_mat, Mat(), d_dst);
         GPU_OFF;
     }
 }
@@ -868,7 +868,7 @@ static void InitSolvePnpRansac()
     Mat object; gen(object, 1, 4, CV_32FC3, Scalar::all(0), Scalar::all(100));
     Mat image; gen(image, 1, 4, CV_32FC2, Scalar::all(0), Scalar::all(100));
     Mat rvec, tvec;
-    gpu::solvePnPRansac(object, image, Mat::eye(3, 3, CV_32F), Mat(), rvec, tvec);
+    cuda::solvePnPRansac(object, image, Mat::eye(3, 3, CV_32F), Mat(), rvec, tvec);
 }
 
 
@@ -899,7 +899,7 @@ TEST(solvePnPRansac)
         CPU_OFF;
 
         GPU_ON;
-        gpu::solvePnPRansac(object, image, camera_mat, Mat::zeros(1, 8, CV_32F), rvec, tvec, false, num_iters,
+        cuda::solvePnPRansac(object, image, camera_mat, Mat::zeros(1, 8, CV_32F), rvec, tvec, false, num_iters,
                             max_dist, int(num_points * 0.05), &inliers_gpu);
         GPU_OFF;
     }
@@ -921,11 +921,11 @@ TEST(GaussianBlur)
         GaussianBlur(src, dst, Size(3, 3), 1);
         CPU_OFF;
 
-        gpu::GpuMat d_src(src);
-        gpu::GpuMat d_dst(src.size(), src.type());
-        gpu::GpuMat d_buf;
+        cuda::GpuMat d_src(src);
+        cuda::GpuMat d_dst(src.size(), src.type());
+        cuda::GpuMat d_buf;
 
-        cv::Ptr<cv::gpu::Filter> gauss = cv::gpu::createGaussianFilter(d_src.type(), -1, cv::Size(3, 3), 1);
+        cv::Ptr<cv::cuda::Filter> gauss = cv::cuda::createGaussianFilter(d_src.type(), -1, cv::Size(3, 3), 1);
 
         gauss->apply(d_src, d_dst);
 
@@ -956,10 +956,10 @@ TEST(filter2D)
             cv::filter2D(src, dst, -1, kernel);
             CPU_OFF;
 
-            gpu::GpuMat d_src(src);
-            gpu::GpuMat d_dst;
+            cuda::GpuMat d_src(src);
+            cuda::GpuMat d_dst;
 
-            Ptr<gpu::Filter> filter2D = gpu::createLinearFilter(d_src.type(), -1, kernel);
+            Ptr<cuda::Filter> filter2D = cuda::createLinearFilter(d_src.type(), -1, kernel);
             filter2D->apply(d_src, d_dst);
 
             GPU_ON;
@@ -984,13 +984,13 @@ TEST(pyrDown)
         pyrDown(src, dst);
         CPU_OFF;
 
-        gpu::GpuMat d_src(src);
-        gpu::GpuMat d_dst;
+        cuda::GpuMat d_src(src);
+        cuda::GpuMat d_dst;
 
-        gpu::pyrDown(d_src, d_dst);
+        cuda::pyrDown(d_src, d_dst);
 
         GPU_ON;
-        gpu::pyrDown(d_src, d_dst);
+        cuda::pyrDown(d_src, d_dst);
         GPU_OFF;
     }
 }
@@ -1011,13 +1011,13 @@ TEST(pyrUp)
         pyrUp(src, dst);
         CPU_OFF;
 
-        gpu::GpuMat d_src(src);
-        gpu::GpuMat d_dst;
+        cuda::GpuMat d_src(src);
+        cuda::GpuMat d_dst;
 
-        gpu::pyrUp(d_src, d_dst);
+        cuda::pyrUp(d_src, d_dst);
 
         GPU_ON;
-        gpu::pyrUp(d_src, d_dst);
+        cuda::pyrUp(d_src, d_dst);
         GPU_OFF;
     }
 }
@@ -1039,14 +1039,14 @@ TEST(equalizeHist)
         equalizeHist(src, dst);
         CPU_OFF;
 
-        gpu::GpuMat d_src(src);
-        gpu::GpuMat d_dst;
-        gpu::GpuMat d_buf;
+        cuda::GpuMat d_src(src);
+        cuda::GpuMat d_dst;
+        cuda::GpuMat d_buf;
 
-        gpu::equalizeHist(d_src, d_dst, d_buf);
+        cuda::equalizeHist(d_src, d_dst, d_buf);
 
         GPU_ON;
-        gpu::equalizeHist(d_src, d_dst, d_buf);
+        cuda::equalizeHist(d_src, d_dst, d_buf);
         GPU_OFF;
     }
 }
@@ -1064,10 +1064,10 @@ TEST(Canny)
     Canny(img, edges, 50.0, 100.0);
     CPU_OFF;
 
-    gpu::GpuMat d_img(img);
-    gpu::GpuMat d_edges;
+    cuda::GpuMat d_img(img);
+    cuda::GpuMat d_edges;
 
-    Ptr<gpu::CannyEdgeDetector> canny = gpu::createCannyEdgeDetector(50.0, 100.0);
+    Ptr<cuda::CannyEdgeDetector> canny = cuda::createCannyEdgeDetector(50.0, 100.0);
 
     canny->detect(d_img, d_edges);
 
@@ -1087,9 +1087,9 @@ TEST(reduce)
         Mat dst0;
         Mat dst1;
 
-        gpu::GpuMat d_src(src);
-        gpu::GpuMat d_dst0;
-        gpu::GpuMat d_dst1;
+        cuda::GpuMat d_src(src);
+        cuda::GpuMat d_dst0;
+        cuda::GpuMat d_dst1;
 
         SUBTEST << size << 'x' << size << ", dim = 0";
 
@@ -1099,10 +1099,10 @@ TEST(reduce)
         reduce(src, dst0, 0, REDUCE_MIN);
         CPU_OFF;
 
-        gpu::reduce(d_src, d_dst0, 0, REDUCE_MIN);
+        cuda::reduce(d_src, d_dst0, 0, REDUCE_MIN);
 
         GPU_ON;
-        gpu::reduce(d_src, d_dst0, 0, REDUCE_MIN);
+        cuda::reduce(d_src, d_dst0, 0, REDUCE_MIN);
         GPU_OFF;
 
         SUBTEST << size << 'x' << size << ", dim = 1";
@@ -1113,10 +1113,10 @@ TEST(reduce)
         reduce(src, dst1, 1, REDUCE_MIN);
         CPU_OFF;
 
-        gpu::reduce(d_src, d_dst1, 1, REDUCE_MIN);
+        cuda::reduce(d_src, d_dst1, 1, REDUCE_MIN);
 
         GPU_ON;
-        gpu::reduce(d_src, d_dst1, 1, REDUCE_MIN);
+        cuda::reduce(d_src, d_dst1, 1, REDUCE_MIN);
         GPU_OFF;
     }
 }
@@ -1125,7 +1125,7 @@ TEST(reduce)
 TEST(gemm)
 {
     Mat src1, src2, src3, dst;
-    gpu::GpuMat d_src1, d_src2, d_src3, d_dst;
+    cuda::GpuMat d_src1, d_src2, d_src3, d_dst;
 
     for (int size = 512; size <= 1024; size *= 2)
     {
@@ -1145,10 +1145,10 @@ TEST(gemm)
         d_src2.upload(src2);
         d_src3.upload(src3);
 
-        gpu::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst);
+        cuda::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst);
 
         GPU_ON;
-        gpu::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst);
+        cuda::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst);
         GPU_OFF;
     }
 }
@@ -1166,10 +1166,10 @@ TEST(GoodFeaturesToTrack)
     goodFeaturesToTrack(src, pts, 8000, 0.01, 0.0);
     CPU_OFF;
 
-    Ptr<gpu::CornersDetector> detector = gpu::createGoodFeaturesToTrackDetector(src.type(), 8000, 0.01, 0.0);
+    Ptr<cuda::CornersDetector> detector = cuda::createGoodFeaturesToTrackDetector(src.type(), 8000, 0.01, 0.0);
 
-    gpu::GpuMat d_src(src);
-    gpu::GpuMat d_pts;
+    cuda::GpuMat d_src(src);
+    cuda::GpuMat d_pts;
 
     detector->detect(d_src, d_pts);
 
@@ -1207,18 +1207,18 @@ TEST(PyrLKOpticalFlow)
         calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts, status, err);
         CPU_OFF;
 
-        gpu::PyrLKOpticalFlow d_pyrLK;
+        cuda::PyrLKOpticalFlow d_pyrLK;
 
-        gpu::GpuMat d_frame0(frame0);
-        gpu::GpuMat d_frame1(frame1);
+        cuda::GpuMat d_frame0(frame0);
+        cuda::GpuMat d_frame1(frame1);
 
-        gpu::GpuMat d_pts;
+        cuda::GpuMat d_pts;
         Mat pts_mat(1, (int)pts.size(), CV_32FC2, (void*)&pts[0]);
         d_pts.upload(pts_mat);
 
-        gpu::GpuMat d_nextPts;
-        gpu::GpuMat d_status;
-        gpu::GpuMat d_err;
+        cuda::GpuMat d_nextPts;
+        cuda::GpuMat d_status;
+        cuda::GpuMat d_err;
 
         d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err);
 
@@ -1242,11 +1242,11 @@ TEST(FarnebackOpticalFlow)
     if (frame0.empty()) throw runtime_error("can't open " + datasets[i] + "1.png");
     if (frame1.empty()) throw runtime_error("can't open " + datasets[i] + "2.png");
 
-    gpu::FarnebackOpticalFlow calc;
+    cuda::FarnebackOpticalFlow calc;
     calc.fastPyramids = fastPyramids != 0;
     calc.flags |= useGaussianBlur ? OPTFLOW_FARNEBACK_GAUSSIAN : 0;
 
-    gpu::GpuMat d_frame0(frame0), d_frame1(frame1), d_flowx, d_flowy;
+    cuda::GpuMat d_frame0(frame0), d_frame1(frame1), d_flowx, d_flowy;
     GPU_ON;
     calc(d_frame0, d_frame1, d_flowx, d_flowy);
     GPU_OFF;
@@ -1297,8 +1297,8 @@ TEST(FGDStatModel)
 
     cap >> frame;
 
-    gpu::GpuMat d_frame(frame), d_fgmask;
-    Ptr<BackgroundSubtractor> d_fgd = gpu::createBackgroundSubtractorFGD();
+    cuda::GpuMat d_frame(frame), d_fgmask;
+    Ptr<BackgroundSubtractor> d_fgd = cuda::createBackgroundSubtractorFGD();
 
     d_fgd->apply(d_frame, d_fgmask);
 
@@ -1347,9 +1347,9 @@ TEST(MOG)
 
     cap >> frame;
 
-    cv::gpu::GpuMat d_frame(frame);
-    cv::Ptr<cv::BackgroundSubtractor> d_mog = cv::gpu::createBackgroundSubtractorMOG();
-    cv::gpu::GpuMat d_foreground;
+    cv::cuda::GpuMat d_frame(frame);
+    cv::Ptr<cv::BackgroundSubtractor> d_mog = cv::cuda::createBackgroundSubtractorMOG();
+    cv::cuda::GpuMat d_foreground;
 
     d_mog->apply(d_frame, d_foreground, 0.01);
 
@@ -1401,10 +1401,10 @@ TEST(MOG2)
 
     cap >> frame;
 
-    cv::Ptr<cv::BackgroundSubtractor> d_mog2 = cv::gpu::createBackgroundSubtractorMOG2();
-    cv::gpu::GpuMat d_frame(frame);
-    cv::gpu::GpuMat d_foreground;
-    cv::gpu::GpuMat d_background;
+    cv::Ptr<cv::BackgroundSubtractor> d_mog2 = cv::cuda::createBackgroundSubtractorMOG2();
+    cv::cuda::GpuMat d_frame(frame);
+    cv::cuda::GpuMat d_foreground;
+    cv::cuda::GpuMat d_background;
 
     d_mog2->apply(d_frame, d_foreground);
     d_mog2->getBackgroundImage(d_background);
diff --git a/samples/gpu/pyrlk_optical_flow.cpp b/samples/gpu/pyrlk_optical_flow.cpp
index 08717292cd..7fc44f042f 100644
--- a/samples/gpu/pyrlk_optical_flow.cpp
+++ b/samples/gpu/pyrlk_optical_flow.cpp
@@ -10,7 +10,7 @@
 
 using namespace std;
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 static void download(const GpuMat& d_mat, vector<Point2f>& vec)
 {
@@ -179,7 +179,7 @@ int main(int argc, const char* argv[])
     GpuMat d_frame0Gray(frame0Gray);
     GpuMat d_prevPts;
 
-    Ptr<gpu::CornersDetector> detector = gpu::createGoodFeaturesToTrackDetector(d_frame0Gray.type(), points, 0.01, minDist);
+    Ptr<cuda::CornersDetector> detector = cuda::createGoodFeaturesToTrackDetector(d_frame0Gray.type(), points, 0.01, minDist);
 
     detector->detect(d_frame0Gray, d_prevPts);
 
diff --git a/samples/gpu/softcascade.cpp b/samples/gpu/softcascade.cpp
index 980d6ffdf7..1eeb2a036e 100644
--- a/samples/gpu/softcascade.cpp
+++ b/samples/gpu/softcascade.cpp
@@ -33,7 +33,7 @@ int main(int argc, char** argv)
         return 1;
     }
 
-    cv::gpu::setDevice(parser.get<int>("device"));
+    cv::cuda::setDevice(parser.get<int>("device"));
 
     std::string cascadePath = parser.get<std::string>("cascade");
 
@@ -67,8 +67,8 @@ int main(int argc, char** argv)
         return 1;
     }
 
-    cv::gpu::GpuMat objects(1, sizeof(Detection) * 10000, CV_8UC1);
-    cv::gpu::printShortCudaDeviceInfo(parser.get<int>("device"));
+    cv::cuda::GpuMat objects(1, sizeof(Detection) * 10000, CV_8UC1);
+    cv::cuda::printShortCudaDeviceInfo(parser.get<int>("device"));
     for (;;)
     {
         cv::Mat frame;
@@ -78,7 +78,7 @@ int main(int argc, char** argv)
             return 0;
         }
 
-        cv::gpu::GpuMat dframe(frame), roi(frame.rows, frame.cols, CV_8UC1);
+        cv::cuda::GpuMat dframe(frame), roi(frame.rows, frame.cols, CV_8UC1);
         roi.setTo(cv::Scalar::all(1));
         cascade.detect(dframe, roi, objects);
 
diff --git a/samples/gpu/stereo_match.cpp b/samples/gpu/stereo_match.cpp
index e404476406..1721ffbf60 100644
--- a/samples/gpu/stereo_match.cpp
+++ b/samples/gpu/stereo_match.cpp
@@ -63,11 +63,11 @@ private:
 
     Mat left_src, right_src;
     Mat left, right;
-    gpu::GpuMat d_left, d_right;
+    cuda::GpuMat d_left, d_right;
 
-    Ptr<gpu::StereoBM> bm;
-    Ptr<gpu::StereoBeliefPropagation> bp;
-    Ptr<gpu::StereoConstantSpaceBP> csbp;
+    Ptr<cuda::StereoBM> bm;
+    Ptr<cuda::StereoBeliefPropagation> bp;
+    Ptr<cuda::StereoConstantSpaceBP> csbp;
 
     int64 work_begin;
     double work_fps;
@@ -140,7 +140,7 @@ Params Params::read(int argc, char** argv)
 App::App(const Params& params)
     : p(params), running(false)
 {
-    cv::gpu::printShortCudaDeviceInfo(cv::gpu::getDevice());
+    cv::cuda::printShortCudaDeviceInfo(cv::cuda::getDevice());
 
     cout << "stereo_match_gpu sample\n";
     cout << "\nControls:\n"
@@ -172,13 +172,13 @@ void App::run()
     imshow("right", right);
 
     // Set common parameters
-    bm = gpu::createStereoBM(p.ndisp);
-    bp = gpu::createStereoBeliefPropagation(p.ndisp);
-    csbp = cv::gpu::createStereoConstantSpaceBP(p.ndisp);
+    bm = cuda::createStereoBM(p.ndisp);
+    bp = cuda::createStereoBeliefPropagation(p.ndisp);
+    csbp = cv::cuda::createStereoConstantSpaceBP(p.ndisp);
 
     // Prepare disparity map of specified type
     Mat disp(left.size(), CV_8U);
-    gpu::GpuMat d_disp(left.size(), CV_8U);
+    cuda::GpuMat d_disp(left.size(), CV_8U);
 
     cout << endl;
     printParams();
diff --git a/samples/gpu/stereo_multi.cpp b/samples/gpu/stereo_multi.cpp
index 83e2f2578b..75f2215415 100644
--- a/samples/gpu/stereo_multi.cpp
+++ b/samples/gpu/stereo_multi.cpp
@@ -44,14 +44,14 @@ int main()
 
 using namespace std;
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 struct Worker { void operator()(int device_id) const; };
 
 // GPUs data
 GpuMat d_left[2];
 GpuMat d_right[2];
-Ptr<gpu::StereoBM> bm[2];
+Ptr<cuda::StereoBM> bm[2];
 GpuMat d_result[2];
 
 static void printHelp()
@@ -75,7 +75,7 @@ int main(int argc, char** argv)
     }
     for (int i = 0; i < num_devices; ++i)
     {
-        cv::gpu::printShortCudaDeviceInfo(i);
+        cv::cuda::printShortCudaDeviceInfo(i);
 
         DeviceInfo dev_info(i);
         if (!dev_info.isCompatible())
@@ -112,13 +112,13 @@ int main(int argc, char** argv)
     setDevice(0);
     d_left[0].upload(left.rowRange(0, left.rows / 2));
     d_right[0].upload(right.rowRange(0, right.rows / 2));
-    bm[0] = gpu::createStereoBM();
+    bm[0] = cuda::createStereoBM();
 
     // Split source images for processing on the GPU #1
     setDevice(1);
     d_left[1].upload(left.rowRange(left.rows / 2, left.rows));
     d_right[1].upload(right.rowRange(right.rows / 2, right.rows));
-    bm[1] = gpu::createStereoBM();
+    bm[1] = cuda::createStereoBM();
 
     // Execute calculation in two threads using two GPUs
     int devices[] = {0, 1};
diff --git a/samples/gpu/surf_keypoint_matcher.cpp b/samples/gpu/surf_keypoint_matcher.cpp
index fd3578d9f4..a442c44db7 100644
--- a/samples/gpu/surf_keypoint_matcher.cpp
+++ b/samples/gpu/surf_keypoint_matcher.cpp
@@ -12,7 +12,7 @@
 
 using namespace std;
 using namespace cv;
-using namespace cv::gpu;
+using namespace cv::cuda;
 
 static void help()
 {
@@ -48,7 +48,7 @@ int main(int argc, char* argv[])
         }
     }
 
-    cv::gpu::printShortCudaDeviceInfo(cv::gpu::getDevice());
+    cv::cuda::printShortCudaDeviceInfo(cv::cuda::getDevice());
 
     SURF_GPU surf;
 
diff --git a/samples/gpu/video_reader.cpp b/samples/gpu/video_reader.cpp
index 42f6f91db4..13f8cb8081 100644
--- a/samples/gpu/video_reader.cpp
+++ b/samples/gpu/video_reader.cpp
@@ -24,12 +24,12 @@ int main(int argc, const char* argv[])
 
     cv::namedWindow("CPU", cv::WINDOW_NORMAL);
     cv::namedWindow("GPU", cv::WINDOW_OPENGL);
-    cv::gpu::setGlDevice();
+    cv::cuda::setGlDevice();
 
     cv::Mat frame;
     cv::VideoCapture reader(fname);
 
-    cv::gpu::GpuMat d_frame;
+    cv::cuda::GpuMat d_frame;
     cv::Ptr<cv::gpucodec::VideoReader> d_reader = cv::gpucodec::createVideoReader(fname);
 
     cv::TickMeter tm;