diff --git a/modules/core/include/opencv2/core/stream_accessor.hpp b/modules/core/include/opencv2/core/stream_accessor.hpp
index 6a1a0bddd5..30dcc6042b 100644
--- a/modules/core/include/opencv2/core/stream_accessor.hpp
+++ b/modules/core/include/opencv2/core/stream_accessor.hpp
@@ -43,7 +43,7 @@
 #ifndef __OPENCV_GPU_STREAM_ACCESSOR_HPP__
 #define __OPENCV_GPU_STREAM_ACCESSOR_HPP__
 
-#include "opencv2/gpu/gpu.hpp"
+#include "opencv2/core/gpumat.hpp"
 #include "cuda_runtime_api.h"
 
 namespace cv
diff --git a/modules/core/src/cudastream.cpp b/modules/core/src/cudastream.cpp
index a10807cf27..c22db87195 100644
--- a/modules/core/src/cudastream.cpp
+++ b/modules/core/src/cudastream.cpp
@@ -41,11 +41,13 @@
 //M*/
 
 #include "precomp.hpp"
+#include "opencv2/core/gpumat.hpp"
 
 using namespace cv;
 using namespace cv::gpu;
 
 #if !defined (HAVE_CUDA)
+#define throw_nogpu() CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support")
 
 cv::gpu::Stream::Stream() { throw_nogpu(); }
 cv::gpu::Stream::~Stream() {}
@@ -70,7 +72,7 @@ void cv::gpu::Stream::release() { throw_nogpu(); }
 
 #else /* !defined (HAVE_CUDA) */
 
-#include "opencv2/gpu/stream_accessor.hpp"
+#include "opencv2/core/stream_accessor.hpp"
 
 namespace
 {
diff --git a/modules/core/src/matrix_operations.cpp b/modules/core/src/matrix_operations.cpp
index 3b82df5728..eace5181d9 100644
--- a/modules/core/src/matrix_operations.cpp
+++ b/modules/core/src/matrix_operations.cpp
@@ -181,12 +181,13 @@ bool cv::gpu::CudaMem::empty() const
 
 #if !defined (HAVE_CUDA)
 
-void cv::gpu::registerPageLocked(Mat&) { throw_nogpu(); }
-void cv::gpu::unregisterPageLocked(Mat&) { throw_nogpu(); }
-void cv::gpu::CudaMem::create(int /*_rows*/, int /*_cols*/, int /*_type*/, int /*type_alloc*/) { throw_nogpu(); }
-bool cv::gpu::CudaMem::canMapHostMemory() { throw_nogpu(); return false; }
-void cv::gpu::CudaMem::release() { throw_nogpu(); }
-GpuMat cv::gpu::CudaMem::createGpuMatHeader () const { throw_nogpu(); return GpuMat(); }
+void cv::gpu::registerPageLocked(Mat&) { CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support"); }
+void cv::gpu::unregisterPageLocked(Mat&) { CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support"); }
+void cv::gpu::CudaMem::create(int /*_rows*/, int /*_cols*/, int /*_type*/, int /*type_alloc*/)
+{ CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support"); }
+bool cv::gpu::CudaMem::canMapHostMemory() { CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support"); return false; }
+void cv::gpu::CudaMem::release() { CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support"); }
+GpuMat cv::gpu::CudaMem::createGpuMatHeader () const { CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support"); return GpuMat(); }
 
 #else /* !defined (HAVE_CUDA) */
 #include <cuda_runtime_api.h>
diff --git a/modules/softcascade/CMakeLists.txt b/modules/softcascade/CMakeLists.txt
index ee6e89299b..0d0d6fecb6 100644
--- a/modules/softcascade/CMakeLists.txt
+++ b/modules/softcascade/CMakeLists.txt
@@ -9,7 +9,7 @@ macro(ocv_glob_cuda_powered_module_sources)
   set(lib_device_hdrs "")
 
   if (HAVE_CUDA AND lib_device_srcs)
-  ocv_include_directories(${CUDA_INCLUDE_DIRS} "${OpenCV_SOURCE_DIR}/modules/gpu/include")
+  ocv_include_directories(${CUDA_INCLUDE_DIRS})
   file(GLOB_RECURSE lib_device_hdrs "src/cuda/*.hpp")
 
   ocv_cuda_compile(device_objs ${lib_device_srcs})
@@ -40,6 +40,8 @@ ocv_add_module(softcascade opencv_core opencv_imgproc opencv_ml OPTIONAL ${cuda_
 if(HAVE_CUDA)
   ocv_module_include_directories(${CUDA_INCLUDE_DIRS})
   ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef)
+else()
+  ocv_module_include_directories()
 endif()
 
 ocv_glob_cuda_powered_module_sources()
diff --git a/modules/softcascade/src/cuda/icf-sc.cu b/modules/softcascade/src/cuda/icf-sc.cu
index 19b20db9c5..cb2f4c8cde 100644
--- a/modules/softcascade/src/cuda/icf-sc.cu
+++ b/modules/softcascade/src/cuda/icf-sc.cu
@@ -40,13 +40,28 @@
 //
 //M*/
 
-#include <opencv2/gpu/device/common.hpp>
-#include <opencv2/gpu/device/saturate_cast.hpp>
-
 #include <cuda_invoker.hpp>
 #include <float.h>
 #include <stdio.h>
 
+namespace
+{
+#if defined(__GNUC__)
+    #define cudaSafeCall(expr)  ___cudaSafeCall(expr, __FILE__, __LINE__, __func__)
+#else /* defined(__CUDACC__) || defined(__MSVC__) */
+    #define cudaSafeCall(expr)  ___cudaSafeCall(expr, __FILE__, __LINE__)
+#endif
+
+    inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "")
+    {
+        //if (cudaSuccess != err) cv::gpu::error(cudaGetErrorString(err), file, line, func);
+    }
+}
+
+#ifndef CV_PI
+    #define CV_PI   3.1415926535897932384626433832795
+#endif
+
 namespace cv { namespace softcascade { namespace device {
 
 typedef unsigned char uchar;
@@ -126,7 +141,7 @@ typedef unsigned char uchar;
         luvg[luvgPitch * (y + 2 * 480) + x] = v;
     }
 
-    void bgr2Luv(const PtrStepSzb& bgr, PtrStepSzb luv)
+    void bgr2Luv(const cv::gpu::PtrStepSzb& bgr, cv::gpu::PtrStepSzb luv)
     {
         dim3 block(32, 8);
         dim3 grid(bgr.cols / 32, bgr.rows / 8);
@@ -208,7 +223,7 @@ typedef unsigned char uchar;
     texture<uchar,  cudaTextureType2D, cudaReadModeElementType> tgray;
 
     template<bool isDefaultNum>
-    __global__ void gray2hog(PtrStepSzb mag)
+    __global__ void gray2hog(cv::gpu::PtrStepSzb mag)
     {
         const int x = blockIdx.x * blockDim.x + threadIdx.x;
         const int y = blockIdx.y * blockDim.y + threadIdx.y;
@@ -223,7 +238,7 @@ typedef unsigned char uchar;
         mag( 480 * fast_angle_bin<isDefaultNum>(dy, dx) + y, x) = cmag;
     }
 
-    void gray2hog(const PtrStepSzb& gray, PtrStepSzb mag, const int bins)
+    void gray2hog(const cv::gpu::PtrStepSzb& gray, cv::gpu::PtrStepSzb mag, const int bins)
     {
         dim3 block(32, 8);
         dim3 grid(gray.cols / 32, gray.rows / 8);
@@ -326,8 +341,8 @@ typedef unsigned char uchar;
         }
     }
 
-    void suppress(const PtrStepSzb& objects, PtrStepSzb overlaps, PtrStepSzi ndetections,
-        PtrStepSzb suppressed, cudaStream_t stream)
+    void suppress(const cv::gpu::PtrStepSzb& objects, cv::gpu::PtrStepSzb overlaps, cv::gpu::PtrStepSzi ndetections,
+        cv::gpu::PtrStepSzb suppressed, cudaStream_t stream)
     {
         int block = 192;
         int grid = 1;
@@ -529,8 +544,8 @@ __global__ void soft_cascade(const CascadeInvoker<Policy> invoker, Detection* ob
 }
 
 template<typename Policy>
-void CascadeInvoker<Policy>::operator()(const PtrStepSzb& roi, const PtrStepSzi& hogluv,
-    PtrStepSz<uchar4> objects, const int downscales, const cudaStream_t& stream) const
+void CascadeInvoker<Policy>::operator()(const cv::gpu::PtrStepSzb& roi, const cv::gpu::PtrStepSzi& hogluv,
+    cv::gpu::PtrStepSz<uchar4> objects, const int downscales, const cudaStream_t& stream) const
 {
     int fw = roi.rows;
     int fh = roi.cols;
@@ -562,7 +577,7 @@ void CascadeInvoker<Policy>::operator()(const PtrStepSzb& roi, const PtrStepSzi&
     }
 }
 
-template void CascadeInvoker<GK107PolicyX4>::operator()(const PtrStepSzb& roi, const PtrStepSzi& hogluv,
-    PtrStepSz<uchar4> objects, const int downscales, const cudaStream_t& stream) const;
+template void CascadeInvoker<GK107PolicyX4>::operator()(const cv::gpu::PtrStepSzb& roi, const cv::gpu::PtrStepSzi& hogluv,
+    cv::gpu::PtrStepSz<uchar4> objects, const int downscales, const cudaStream_t& stream) const;
 
 }}}
diff --git a/modules/softcascade/src/cuda_invoker.hpp b/modules/softcascade/src/cuda_invoker.hpp
index 958850f0c6..dfce0ba097 100644
--- a/modules/softcascade/src/cuda_invoker.hpp
+++ b/modules/softcascade/src/cuda_invoker.hpp
@@ -44,9 +44,9 @@
 #ifndef __OPENCV_ICF_HPP__
 #define __OPENCV_ICF_HPP__
 
-#include <opencv2/gpu/device/common.hpp>
-
-using namespace cv::gpu::device;
+// #include <opencv2/gpu/device/common.hpp>
+#include "opencv2/core/cuda_devptrs.hpp"
+#include "cuda_runtime_api.h"
 
 #if defined __CUDACC__
 # define __device_inline__ __device__ __forceinline__
@@ -57,6 +57,8 @@ using namespace cv::gpu::device;
 
 namespace cv { namespace softcascade { namespace device {
 
+typedef unsigned char uchar;
+
 struct Octave
 {
     ushort index;
diff --git a/modules/softcascade/src/detector_cuda.cpp b/modules/softcascade/src/detector_cuda.cpp
index a013a16fe6..07d4535766 100644
--- a/modules/softcascade/src/detector_cuda.cpp
+++ b/modules/softcascade/src/detector_cuda.cpp
@@ -41,9 +41,9 @@
 //M*/
 
 #include "precomp.hpp"
-#include "opencv2/gpu/stream_accessor.hpp"
 
 #if !defined (HAVE_CUDA)
+#define throw_nogpu() CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support")
 cv::softcascade::SCascade::SCascade(const double, const double, const int, const int) { throw_nogpu(); }
 
 cv::softcascade::SCascade::~SCascade() { throw_nogpu(); }
@@ -54,14 +54,28 @@ void cv::softcascade::SCascade::detect(InputArray, InputArray, OutputArray, cv::
 
 void cv::softcascade::SCascade::read(const FileNode& fn) { Algorithm::read(fn); }
 
-cv::gpu::ChannelsProcessor::ChannelsProcessor() { throw_nogpu(); }
- cv::gpu::ChannelsProcessor::~ChannelsProcessor() { throw_nogpu(); }
+cv::softcascade::ChannelsProcessor::ChannelsProcessor() { throw_nogpu(); }
+ cv::softcascade::ChannelsProcessor::~ChannelsProcessor() { throw_nogpu(); }
 
-cv::Ptr<cv::gpu::ChannelsProcessor> cv::gpu::ChannelsProcessor::create(const int, const int, const int)
-{ throw_nogpu(); return cv::Ptr<cv::gpu::ChannelsProcessor>(0); }
+cv::Ptr<cv::softcascade::ChannelsProcessor> cv::softcascade::ChannelsProcessor::create(const int, const int, const int)
+{ throw_nogpu(); return cv::Ptr<cv::softcascade::ChannelsProcessor>(0); }
 
 #else
 # include "cuda_invoker.hpp"
+# include "opencv2/core/stream_accessor.hpp"
+namespace
+{
+#if defined(__GNUC__)
+    #define cudaSafeCall(expr)  ___cudaSafeCall(expr, __FILE__, __LINE__, __func__)
+#else /* defined(__CUDACC__) || defined(__MSVC__) */
+    #define cudaSafeCall(expr)  ___cudaSafeCall(expr, __FILE__, __LINE__)
+#endif
+
+    inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "")
+    {
+        //if (cudaSuccess != err) cv::gpu::error(cudaGetErrorString(err), file, line, func);
+    }
+}
 
 cv::softcascade::device::Level::Level(int idx, const Octave& oct, const float scale, const int w, const int h)
 :  octave(idx), step(oct.stages), relScale(scale / oct.scale)
diff --git a/modules/softcascade/test/test_cuda_softcascade.cpp b/modules/softcascade/test/test_cuda_softcascade.cpp
index f97a26ad30..139c608560 100644
--- a/modules/softcascade/test/test_cuda_softcascade.cpp
+++ b/modules/softcascade/test/test_cuda_softcascade.cpp
@@ -43,6 +43,8 @@
 #include "test_precomp.hpp"
 #include "opencv2/core/gpumat.hpp"
 
+
+#ifdef HAVE_CUDA
 using std::tr1::get;
 
 // show detection results on input image with cv::imshow
@@ -210,7 +212,7 @@ TEST_P(SCascadeTestRoi, Detect)
 }
 
 INSTANTIATE_TEST_CASE_P(cuda_accelerated, SCascadeTestRoi, testing::Combine(
-    testing::ValuesIn(DeviceManager::instance().values()),
+    ALL_DEVICES,
     testing::Values(std::string("cascades/inria_caltech-17.01.2013.xml"),
                     std::string("cascades/sc_cvpr_2012_to_opencv_new_format.xml")),
     testing::Values(std::string("images/image_00000000_0.png")),
@@ -309,4 +311,6 @@ TEST_P(SCascadeTestAll, detectStream)
 
 INSTANTIATE_TEST_CASE_P(cuda_accelerated, SCascadeTestAll, testing::Combine( ALL_DEVICES,
                     testing::Values(Fixture("cascades/inria_caltech-17.01.2013.xml", 7),
-                                    Fixture("cascades/sc_cvpr_2012_to_opencv_new_format.xml", 1291))));
\ No newline at end of file
+                                    Fixture("cascades/sc_cvpr_2012_to_opencv_new_format.xml", 1291))));
+
+#endif
\ No newline at end of file
diff --git a/modules/softcascade/test/utility.hpp b/modules/softcascade/test/utility.hpp
index e6b840c534..2018a156eb 100644
--- a/modules/softcascade/test/utility.hpp
+++ b/modules/softcascade/test/utility.hpp
@@ -52,6 +52,8 @@
 //! return true if device supports specified feature and gpu module was built with support the feature.
 bool supportFeature(const cv::gpu::DeviceInfo& info, cv::gpu::FeatureSet feature);
 
+
+#if defined(HAVE_CUDA)
 class DeviceManager
 {
 public:
@@ -66,8 +68,9 @@ private:
     std::vector<cv::gpu::DeviceInfo> devices_;
     DeviceManager() {loadAll();}
 };
-
-#define ALL_DEVICES testing::ValuesIn(DeviceManager::instance().values())
-
+# define ALL_DEVICES testing::ValuesIn(DeviceManager::instance().values())
+#else
+# define ALL_DEVICES testing::ValuesIn(std::vector<cv::gpu::DeviceInfo>())
+#endif
 
 #endif // __OPENCV_GPU_TEST_UTILITY_HPP__