diff --git a/CMakeLists.txt b/CMakeLists.txt
index 23b79ec2b6..da82a9d908 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -708,47 +708,36 @@ if(WITH_CUDA)
         message(STATUS "CUDA detected: " ${CUDA_VERSION})
 
         set(CUDA_ARCH_GPU "1.3 2.0" CACHE STRING "Specify 'real' GPU architectures to build binaries for")
-        set(CUDA_ARCH_PTX "1.1 1.3" CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for")       
+        set(CUDA_ARCH_PTX "1.1 1.3" CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for")              
         
-        # Architectures to be searched for in user's input
-        set (CUDA_ARCH_ALL 1.0 1.1 1.2 1.3 2.0 2.1)
+        # These variables are used in config templates
+        string(REGEX REPLACE "\\." "" ARCH_GPU_NO_POINTS "${CUDA_ARCH_GPU}")
+        string(REGEX REPLACE "\\." "" ARCH_PTX_NO_POINTS "${CUDA_ARCH_PTX}")
         
-        # Parse user's input
-        foreach(ARCH IN LISTS CUDA_ARCH_ALL)            
-            string(REGEX MATCH ${ARCH} ARCH_GPU_MATCH "${CUDA_ARCH_GPU}") 
-            string(REGEX MATCH ${ARCH} ARCH_PTX_MATCH "${CUDA_ARCH_PTX}") 
-            string(REGEX REPLACE "\\." "" ARCH_GPU_AS_NUM "${ARCH_GPU_MATCH}")
-            string(REGEX REPLACE "\\." "" ARCH_PTX_AS_NUM "${ARCH_PTX_MATCH}")
-            
-            # Define variables indicating the architectures specified by user
-            if(NOT ${ARCH_GPU_AS_NUM} STREQUAL "")
-                set(OPENCV_ARCH_GPU_${ARCH_GPU_AS_NUM} 1)
-            endif()            
-            if(NOT ${ARCH_PTX_AS_NUM} STREQUAL "")
-                set(OPENCV_ARCH_PTX_${ARCH_PTX_AS_NUM} 1)
-            endif()
-        endforeach()       
+        # Ckeck if user specified 1.0 compute capability
+        string(REGEX MATCH "1.0" HAS_ARCH_10 "${CUDA_ARCH_GPU} ${CUDA_ARCH_PTX}")
+        if(NOT ${HAS_ARCH_10} STREQUAL "")
+            set(OPENCV_ARCH_GPU_OR_PTX_10 1)
+        endif()
         
         set(NVCC_FLAGS_EXTRA "")
         
         # Tell nvcc to add binaries for the specified GPUs
-        string(REGEX REPLACE "\\." "" CUDA_ARCH_GPU "${CUDA_ARCH_GPU}")
-        string(REGEX MATCHALL "[0-9]+" CUDA_ARCH_GPU_LIST "${CUDA_ARCH_GPU}")
-        foreach(ARCH_GPU IN LISTS CUDA_ARCH_GPU_LIST)
-            set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -gencode arch=compute_${ARCH_GPU},code=sm_${ARCH_GPU})
+        string(REGEX MATCHALL "[0-9]+" ARCH_LIST "${ARCH_GPU_NO_POINTS}")
+        foreach(ARCH IN LISTS ARCH_LIST)
+            set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -gencode arch=compute_${ARCH},code=sm_${ARCH})
         endforeach()
         
         # Tell nvcc to add PTX intermediate code for the specified architectures
-        string(REGEX REPLACE "\\." "" CUDA_ARCH_PTX "${CUDA_ARCH_PTX}")
-        string(REGEX MATCHALL "[0-9]+" CUDA_ARCH_PTX_LIST "${CUDA_ARCH_PTX}")
-        foreach(ARCH_PTX IN LISTS CUDA_ARCH_PTX_LIST)
-            set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -gencode arch=compute_${ARCH_PTX},code=compute_${ARCH_PTX})
+        string(REGEX MATCHALL "[0-9]+" ARCH_LIST "${ARCH_PTX_NO_POINTS}")
+        foreach(ARCH IN LISTS ARCH_LIST)
+            set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -gencode arch=compute_${ARCH},code=compute_${ARCH})
         endforeach()               
-             
-        set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${NVCC_FLAGS_EXTRA})
-        message(STATUS "CUDA NVCC flags: ${CUDA_NVCC_FLAGS}")
         
+        set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${NVCC_FLAGS_EXTRA})        
         set(OpenCV_CUDA_CC "${NVCC_FLAGS_EXTRA}")
+        
+        message(STATUS "CUDA NVCC flags: ${CUDA_NVCC_FLAGS}")
     endif()
 endif()
 
diff --git a/cvconfig.h.cmake b/cvconfig.h.cmake
index 053fde1e24..27681a3b2e 100644
--- a/cvconfig.h.cmake
+++ b/cvconfig.h.cmake
@@ -163,21 +163,14 @@
 /* NVidia Cuda Runtime API*/
 #cmakedefine HAVE_CUDA
 
-/* Compile for 'real' NVIDIA GPU architecture */
-#cmakedefine OPENCV_ARCH_GPU_10
-#cmakedefine OPENCV_ARCH_GPU_11
-#cmakedefine OPENCV_ARCH_GPU_12
-#cmakedefine OPENCV_ARCH_GPU_13
-#cmakedefine OPENCV_ARCH_GPU_20
-#cmakedefine OPENCV_ARCH_GPU_21
-
-/* Compile for 'virtual' NVIDIA PTX architecture */
-#cmakedefine OPENCV_ARCH_PTX_10
-#cmakedefine OPENCV_ARCH_PTX_11
-#cmakedefine OPENCV_ARCH_PTX_12
-#cmakedefine OPENCV_ARCH_PTX_13
-#cmakedefine OPENCV_ARCH_PTX_20
-#cmakedefine OPENCV_ARCH_PTX_21
+/* Compile for 'real' NVIDIA GPU architectures */
+#define OPENCV_ARCH_GPU "${ARCH_GPU_NO_POINTS}"
+
+/* Compile for 'virtual' NVIDIA PTX architectures */
+#define OPENCV_ARCH_PTX "${ARCH_PTX_NO_POINTS}"
+
+/* Create PTX or CUBIN for 1.0 compute capability */
+#cmakedefine OPENCV_ARCH_GPU_OR_PTX_10
 
 /* VideoInput library */
 #cmakedefine HAVE_VIDEOINPUT
diff --git a/doc/gpu_image_processing.tex b/doc/gpu_image_processing.tex
index 38e326b3eb..fe6a69ee8b 100644
--- a/doc/gpu_image_processing.tex
+++ b/doc/gpu_image_processing.tex
@@ -232,10 +232,10 @@ private:
 
 \cvCppFunc{gpu::ConvolveBuf::ConvolveBuf}
 
-\cvdefCpp{ConvolveBuf();}
+\cvdefCpp{ConvolveBuf::ConvolveBuf();}
 Constructs an empty buffer which will be properly resized after first call of the convolve function.
 
-\cvdefCpp{ConvolveBuf(Size image\_size, Size templ\_size);}
+\cvdefCpp{ConvolveBuf::ConvolveBuf(Size image\_size, Size templ\_size);}
 Constructs a buffer for the convolve function with respectively arguments.
 
 
diff --git a/doc/gpu_object_detection.tex b/doc/gpu_object_detection.tex
index e5cfb18cf7..46cca72fee 100644
--- a/doc/gpu_object_detection.tex
+++ b/doc/gpu_object_detection.tex
@@ -82,13 +82,13 @@ Creates HOG descriptor and detector.
 \cvCppFunc{gpu::HOGDescriptor::getDescriptorSize}
 Returns number of coefficients required for the classification.
 
-\cvdefCpp{size\_t getDescriptorSize() const;}
+\cvdefCpp{size\_t HOGDescriptor::getDescriptorSize() const;}
 
 
 \cvCppFunc{gpu::HOGDescriptor::getBlockHistogramSize}
 Returns block histogram size.
 
-\cvdefCpp{size\_t getBlockHistogramSize() const;}
+\cvdefCpp{size\_t HOGDescriptor::getBlockHistogramSize() const;}
 
 
 \cvCppFunc{gpu::HOGDescriptor::setSVMDetector}
@@ -100,25 +100,25 @@ Sets coefficients for the linear SVM classifier.
 \cvCppFunc{gpu::HOGDescriptor::getDefaultPeopleDetector}
 Returns coefficients of the classifier trained for people detection (for default window size).
 
-\cvdefCpp{static vector<float> getDefaultPeopleDetector();}
+\cvdefCpp{static vector<float> HOGDescriptor::getDefaultPeopleDetector();}
 
 
 \cvCppFunc{gpu::HOGDescriptor::getPeopleDetector48x96}
 Returns coefficients of the classifier trained for people detection (for 48x96 windows).
 
-\cvdefCpp{static vector<float> getPeopleDetector48x96();}
+\cvdefCpp{static vector<float> HOGDescriptor::getPeopleDetector48x96();}
 
 
 \cvCppFunc{gpu::HOGDescriptor::getPeopleDetector64x128}
 Returns coefficients of the classifier trained for people detection (for 64x128 windows).
 
-\cvdefCpp{static vector<float> getPeopleDetector64x128();}
+\cvdefCpp{static vector<float> HOGDescriptor::getPeopleDetector64x128();}
 
 
 \cvCppFunc{gpu::HOGDescriptor::detect}
 Perfroms object detection without multiscale window.
 
-\cvdefCpp{void detect(const GpuMat\& img, vector<Point>\& found\_locations,\par
+\cvdefCpp{void HOGDescriptor::detect(const GpuMat\& img, vector<Point>\& found\_locations,\par
              double hit\_threshold=0, Size win\_stride=Size(),\par
              Size padding=Size());}
 
@@ -134,10 +134,10 @@ Perfroms object detection without multiscale window.
 \cvCppFunc{gpu::HOGDescriptor::detectMultiScale}
 Perfroms object detection with multiscale window.
 
-\cvdefCpp{void detectMultiScale(const GpuMat\& img, vector<Rect>\& found\_locations,\par
-                      double hit\_threshold=0, Size win\_stride=Size(),\par
-                      Size padding=Size(), double scale0=1.05,\par
-                      int group\_threshold=2);}
+\cvdefCpp{void HOGDescriptor::detectMultiScale(const GpuMat\& img,\par
+  vector<Rect>\& found\_locations, double hit\_threshold=0,\par
+  Size win\_stride=Size(), Size padding=Size(),\par
+  double scale0=1.05, int group\_threshold=2);}
 
 \begin{description}
 \cvarg{img}{Source image. See \cvCppCross{gpu::HOGDescriptor::detect} for type limitations.}
@@ -154,9 +154,9 @@ See \cvCppCross{groupRectangles}.}
 \cvCppFunc{gpu::HOGDescriptor::getDescriptors}
 Returns block descriptors computed for the whole image. It's mainly used for classifier learning purposes.
 
-\cvdefCpp{void getDescriptors(const GpuMat\& img, Size win\_stride,\par
-                    GpuMat\& descriptors,\par
-                    int descr\_format=DESCR\_FORMAT\_COL\_BY\_COL);}
+\cvdefCpp{void HOGDescriptor::getDescriptors(const GpuMat\& img,\par
+  Size win\_stride, GpuMat\& descriptors,\par
+  int descr\_format=DESCR\_FORMAT\_COL\_BY\_COL);}
 
 \begin{description}
 \cvarg{img}{Source image. See \cvCppCross{gpu::HOGDescriptor::detect} for type limitations.}
diff --git a/modules/gpu/src/initialization.cpp b/modules/gpu/src/initialization.cpp
index 10ee3b5b9d..d754c87364 100644
--- a/modules/gpu/src/initialization.cpp
+++ b/modules/gpu/src/initialization.cpp
@@ -41,6 +41,7 @@
 //M*/
 
 #include "precomp.hpp"
+#include <functional>
 
 using namespace cv;
 using namespace cv::gpu;
@@ -58,12 +59,12 @@ CV_EXPORTS void cv::gpu::getGpuMemInfo(size_t& /*free*/, size_t& /*total*/)  { t
 CV_EXPORTS bool cv::gpu::hasNativeDoubleSupport(int /*device*/) { throw_nogpu(); return false; }
 CV_EXPORTS bool cv::gpu::hasAtomicsSupport(int /*device*/) { throw_nogpu(); return false; }
 CV_EXPORTS bool cv::gpu::hasPtxVersion(int major, int minor) { throw_nogpu(); return false; }
-CV_EXPORTS bool cv::gpu::hasLessOrEqualPtxVersion(int major, int minor) { throw_nogpu(); return false; }
-CV_EXPORTS bool cv::gpu::hasGreaterOrEqualPtxVersion(int major, int minor) { throw_nogpu(); return false; }
-CV_EXPORTS bool cv::gpu::hasCubinVersion(int major, int minor) { throw_nogpu(); return false; }
-CV_EXPORTS bool cv::gpu::hasGreaterOrEqualCubinVersion(int major, int minor) { throw_nogpu(); return false; }
-CV_EXPORTS bool cv::gpu::hasVersion(int major, int minor) { throw_nogpu(); return false; }
-CV_EXPORTS bool cv::gpu::hasGreaterOrEqualVersion(int major, int minor) { throw_nogpu(); return false; }
+CV_EXPORTS bool cv::gpu::hasLessOrEqualPtxVersion(int major, int minor) { return false; }
+CV_EXPORTS bool cv::gpu::hasGreaterOrEqualPtxVersion(int major, int minor) { return false; }
+CV_EXPORTS bool cv::gpu::hasCubinVersion(int major, int minor) { return false; }
+CV_EXPORTS bool cv::gpu::hasGreaterOrEqualCubinVersion(int major, int minor) { return false; }
+CV_EXPORTS bool cv::gpu::hasVersion(int major, int minor) { return false; }
+CV_EXPORTS bool cv::gpu::hasGreaterOrEqualVersion(int major, int minor) { return false; }
 CV_EXPORTS bool cv::gpu::isCompatibleWith(int device) { throw_nogpu(); return false; }
 
 
@@ -142,118 +143,55 @@ CV_EXPORTS bool cv::gpu::hasAtomicsSupport(int device)
 namespace 
 {
     template <typename Comparer>
-    bool checkPtxVersion(int major, int minor, Comparer cmp) 
+    bool compare(const std::string& str, int x, Comparer cmp)
     {
-#ifdef OPENCV_ARCH_PTX_10
-        if (cmp(1, 0, major, minor)) return true;
-#endif
-
-#ifdef OPENCV_ARCH_PTX_11
-        if (cmp(1, 1, major, minor)) return true;
-#endif
-
-#ifdef OPENCV_ARCH_PTX_12
-        if (cmp(1, 2, major, minor)) return true;
-#endif
-
-#ifdef OPENCV_ARCH_PTX_13
-        if (cmp(1, 3, major, minor)) return true;
-#endif
-
-#ifdef OPENCV_ARCH_PTX_20
-        if (cmp(2, 0, major, minor)) return true;
-#endif
-
-#ifdef OPENCV_ARCH_PTX_21
-        if (cmp(2, 1, major, minor)) return true;
-#endif
-
-        return false;
-    }
-
-    template <typename Comparer>
-    bool checkCubinVersion(int major, int minor, Comparer cmp) 
-    {
-#ifdef OPENCV_ARCH_GPU_10
-        if (cmp(1, 0, major, minor)) return true;
-#endif
-
-#ifdef OPENCV_ARCH_GPU_11
-        if (cmp(1, 1, major, minor)) return true;
-#endif
+        std::stringstream stream(str);
 
-#ifdef OPENCV_ARCH_GPU_12
-        if (cmp(1, 2, major, minor)) return true;
-#endif
-
-#ifdef OPENCV_ARCH_GPU_13
-        if (cmp(1, 3, major, minor)) return true;
-#endif
-
-#ifdef OPENCV_ARCH_GPU_20
-        if (cmp(2, 0, major, minor)) return true;
-#endif
-
-#ifdef OPENCV_ARCH_GPU_21
-        if (cmp(2, 1, major, minor)) return true;
-#endif
-
-        return false;
-    }
+        int val;
+        stream >> val;
 
-    struct ComparerEqual 
-    {
-        bool operator()(int lhs1, int lhs2, int rhs1, int rhs2) const
-        {
-            return lhs1 == rhs1 && lhs2 == rhs2;
-        }
-    };
-
-    struct ComparerLessOrEqual
-    {
-        bool operator()(int lhs1, int lhs2, int rhs1, int rhs2) const
+        while (!stream.eof() && !stream.fail())
         {
-            return lhs1 < rhs1 || (lhs1 == rhs1 && lhs2 <= rhs2);
+            if (cmp(val, x))
+                return true;
+            stream >> val;
         }
-    };
 
-    struct ComparerGreaterOrEqual
-    {
-        bool operator()(int lhs1, int lhs2, int rhs1, int rhs2) const
-        {
-            return lhs1 > rhs1 || (lhs1 == rhs1 && lhs2 >= rhs2);
-        }
-    };
+        return false;
+    }
 }
 
 
 CV_EXPORTS bool cv::gpu::hasPtxVersion(int major, int minor)
 {
-    return checkPtxVersion(major, minor, ComparerEqual());
+    return ::compare(OPENCV_ARCH_PTX, major * 10 + minor, std::equal_to<int>());
 }
 
 
 CV_EXPORTS bool cv::gpu::hasLessOrEqualPtxVersion(int major, int minor)
 {
-    return checkPtxVersion(major, minor, ComparerLessOrEqual());
+    return ::compare(OPENCV_ARCH_PTX, major * 10 + minor, 
+                     std::less_equal<int>());
 }
 
 
 CV_EXPORTS bool cv::gpu::hasGreaterOrEqualPtxVersion(int major, int minor)
 {
-    return checkPtxVersion(major, minor, ComparerGreaterOrEqual());
+    return ::compare(OPENCV_ARCH_PTX, major * 10 + minor, 
+                     std::greater_equal<int>());
 }
 
 
 CV_EXPORTS bool cv::gpu::hasCubinVersion(int major, int minor)
 {
-    return checkCubinVersion(major, minor, ComparerEqual());
+    return ::compare(OPENCV_ARCH_GPU, major * 10 + minor, std::equal_to<int>());
 }
 
 
 CV_EXPORTS bool cv::gpu::hasGreaterOrEqualCubinVersion(int major, int minor)
 {
-    return checkCubinVersion(major, minor, ComparerGreaterOrEqual());
+    return ::compare(OPENCV_ARCH_GPU, major * 10 + minor, 
+                     std::greater_equal<int>());
 }
 
 
@@ -284,7 +222,7 @@ CV_EXPORTS bool cv::gpu::isCompatibleWith(int device)
         return true;
 
     // Check CUBIN compatibilty
-    for (int i = 0; i <= minor; ++i)
+    for (int i = minor; i >= 0; --i)
         if (hasCubinVersion(major, i))
             return true;
 
diff --git a/modules/gpu/src/precomp.hpp b/modules/gpu/src/precomp.hpp
index a7ba6ffaba..03acb4a2bd 100644
--- a/modules/gpu/src/precomp.hpp
+++ b/modules/gpu/src/precomp.hpp
@@ -85,6 +85,10 @@
     #error "Insufficient NPP version, please update it."
 #endif
 
+#if defined(OPENCV_ARCH_GPU_OR_PTX_10)
+    #error "OpenCV GPU module doesn't support NVIDIA compute capability 1.0"
+#endif
+
     static inline void throw_nogpu() { CV_Error(CV_GpuNotSupported, "The called functionality is disabled for current build or platform"); }
 
 #else /* defined(HAVE_CUDA) */