refactored gpu info structures (TargetArchs and DeviceInfo)

now DeviceInfo provides full information about device (from cudaDeviceProp)
12 years ago · 2dab93c2e8
parent 76f4b02b06
commit 2dab93c2e8
13 changed files with 929 additions and 186 deletions
--- a/modules/core/include/opencv2/core/gpu.hpp
+++ b/modules/core/include/opencv2/core/gpu.hpp
@ -392,17 +392,17 @@ private:

 //////////////////////////////// Initialization & Info ////////////////////////

-//! This is the only function that do not throw exceptions if the library is compiled without Cuda.
+//! this is the only function that do not throw exceptions if the library is compiled without CUDA
 CV_EXPORTS int getCudaEnabledDeviceCount();

-//! Functions below throw cv::Expception if the library is compiled without Cuda.
-
+//! set device to be used for GPU executions for the calling host thread
 CV_EXPORTS void setDevice(int device);

+//! returns which device is currently being used for the calling host thread
 CV_EXPORTS int getDevice();

-//! Explicitly destroys and cleans up all resources associated with the current device in the current process.
-//! Any subsequent API call to this device will reinitialize the device.
+//! explicitly destroys and cleans up all resources associated with the current device in the current process
+//! any subsequent API call to this device will reinitialize the device
 CV_EXPORTS void resetDevice();

 enum FeatureSet
@ -423,75 +423,218 @@ enum FeatureSet
    DYNAMIC_PARALLELISM = FEATURE_SET_COMPUTE_35
 };

-// Checks whether current device supports the given feature
+//! checks whether current device supports the given feature
 CV_EXPORTS bool deviceSupports(FeatureSet feature_set);

-// Gives information about what GPU archs this OpenCV GPU module was
-// compiled for
+//! information about what GPU archs this OpenCV GPU module was compiled for
 class CV_EXPORTS TargetArchs
 {
 public:
    static bool builtWith(FeatureSet feature_set);
+
    static bool has(int major, int minor);
    static bool hasPtx(int major, int minor);
    static bool hasBin(int major, int minor);
+
    static bool hasEqualOrLessPtx(int major, int minor);
    static bool hasEqualOrGreater(int major, int minor);
    static bool hasEqualOrGreaterPtx(int major, int minor);
    static bool hasEqualOrGreaterBin(int major, int minor);
-private:
-    TargetArchs();
 };

-// Gives information about the given GPU
+//! information about the given GPU.
 class CV_EXPORTS DeviceInfo
 {
 public:
-    // Creates DeviceInfo object for the current GPU
-    DeviceInfo() : device_id_(getDevice()) { query(); }
+    //! creates DeviceInfo object for the current GPU
+    DeviceInfo();

-    // Creates DeviceInfo object for the given GPU
-    DeviceInfo(int device_id) : device_id_(device_id) { query(); }
+    //! creates DeviceInfo object for the given GPU
+    DeviceInfo(int device_id);

-    String name() const { return name_; }
+    //! device number.
+    int deviceID() const;

-    // Return compute capability versions
-    int majorVersion() const { return majorVersion_; }
-    int minorVersion() const { return minorVersion_; }
+    //! ASCII string identifying device
+    const char* name() const;

-    int multiProcessorCount() const { return multi_processor_count_; }
+    //! global memory available on device in bytes
+    size_t totalGlobalMem() const;

+    //! shared memory available per block in bytes
    size_t sharedMemPerBlock() const;

+    //! 32-bit registers available per block
+    int regsPerBlock() const;
+
+    //! warp size in threads
+    int warpSize() const;
+
+    //! maximum pitch in bytes allowed by memory copies
+    size_t memPitch() const;
+
+    //! maximum number of threads per block
+    int maxThreadsPerBlock() const;
+
+    //! maximum size of each dimension of a block
+    Vec3i maxThreadsDim() const;
+
+    //! maximum size of each dimension of a grid
+    Vec3i maxGridSize() const;
+
+    //! clock frequency in kilohertz
+    int clockRate() const;
+
+    //! constant memory available on device in bytes
+    size_t totalConstMem() const;
+
+    //! major compute capability
+    int major() const;
+
+    //! minor compute capability
+    int minor() const;
+
+    //! alignment requirement for textures
+    size_t textureAlignment() const;
+
+    //! pitch alignment requirement for texture references bound to pitched memory
+    size_t texturePitchAlignment() const;
+
+    //! number of multiprocessors on device
+    int multiProcessorCount() const;
+
+    //! specified whether there is a run time limit on kernels
+    bool kernelExecTimeoutEnabled() const;
+
+    //! device is integrated as opposed to discrete
+    bool integrated() const;
+
+    //! device can map host memory with cudaHostAlloc/cudaHostGetDevicePointer
+    bool canMapHostMemory() const;
+
+    enum ComputeMode
+    {
+        ComputeModeDefault,         /**< default compute mode (Multiple threads can use ::cudaSetDevice() with this device) */
+        ComputeModeExclusive,       /**< compute-exclusive-thread mode (Only one thread in one process will be able to use ::cudaSetDevice() with this device) */
+        ComputeModeProhibited,      /**< compute-prohibited mode (No threads can use ::cudaSetDevice() with this device) */
+        ComputeModeExclusiveProcess /**< compute-exclusive-process mode (Many threads in one process will be able to use ::cudaSetDevice() with this device) */
+    };
+
+    //! compute mode
+    ComputeMode computeMode() const;
+
+    //! maximum 1D texture size
+    int maxTexture1D() const;
+
+    //! maximum 1D mipmapped texture size
+    int maxTexture1DMipmap() const;
+
+    //! maximum size for 1D textures bound to linear memory
+    int maxTexture1DLinear() const;
+
+    //! maximum 2D texture dimensions
+    Vec2i maxTexture2D() const;
+
+    //! maximum 2D mipmapped texture dimensions
+    Vec2i maxTexture2DMipmap() const;
+
+    //! maximum dimensions (width, height, pitch) for 2D textures bound to pitched memory
+    Vec3i maxTexture2DLinear() const;
+
+    //! maximum 2D texture dimensions if texture gather operations have to be performed
+    Vec2i maxTexture2DGather() const;
+
+    //! maximum 3D texture dimensions
+    Vec3i maxTexture3D() const;
+
+    //! maximum Cubemap texture dimensions
+    int maxTextureCubemap() const;
+
+    //! maximum 1D layered texture dimensions
+    Vec2i maxTexture1DLayered() const;
+
+    //! maximum 2D layered texture dimensions
+    Vec3i maxTexture2DLayered() const;
+
+    //! maximum Cubemap layered texture dimensions
+    Vec2i maxTextureCubemapLayered() const;
+
+    //! maximum 1D surface size
+    int maxSurface1D() const;
+
+    //! maximum 2D surface dimensions
+    Vec2i maxSurface2D() const;
+
+    //! maximum 3D surface dimensions
+    Vec3i maxSurface3D() const;
+
+    //! maximum 1D layered surface dimensions
+    Vec2i maxSurface1DLayered() const;
+
+    //! maximum 2D layered surface dimensions
+    Vec3i maxSurface2DLayered() const;
+
+    //! maximum Cubemap surface dimensions
+    int maxSurfaceCubemap() const;
+
+    //! maximum Cubemap layered surface dimensions
+    Vec2i maxSurfaceCubemapLayered() const;
+
+    //! alignment requirements for surfaces
+    size_t surfaceAlignment() const;
+
+    //! device can possibly execute multiple kernels concurrently
+    bool concurrentKernels() const;
+
+    //! device has ECC support enabled
+    bool ECCEnabled() const;
+
+    //! PCI bus ID of the device
+    int pciBusID() const;
+
+    //! PCI device ID of the device
+    int pciDeviceID() const;
+
+    //! PCI domain ID of the device
+    int pciDomainID() const;
+
+    //! true if device is a Tesla device using TCC driver, false otherwise
+    bool tccDriver() const;
+
+    //! number of asynchronous engines
+    int asyncEngineCount() const;
+
+    //! device shares a unified address space with the host
+    bool unifiedAddressing() const;
+
+    //! peak memory clock frequency in kilohertz
+    int memoryClockRate() const;
+
+    //! global memory bus width in bits
+    int memoryBusWidth() const;
+
+    //! size of L2 cache in bytes
+    int l2CacheSize() const;
+
+    //! maximum resident threads per multiprocessor
+    int maxThreadsPerMultiProcessor() const;
+
+    //! gets free and total device memory
    void queryMemory(size_t& totalMemory, size_t& freeMemory) const;
    size_t freeMemory() const;
    size_t totalMemory() const;

-    // Checks whether device supports the given feature
+    //! checks whether device supports the given feature
    bool supports(FeatureSet feature_set) const;

-    // Checks whether the GPU module can be run on the given device
+    //! checks whether the GPU module can be run on the given device
    bool isCompatible() const;

-    bool canMapHostMemory() const;
-
-    size_t textureAlignment() const;
-
-    int deviceID() const { return device_id_; }
-
 private:
-    void query();
-
    int device_id_;
-
-    String name_;
-    int multi_processor_count_;
-    int majorVersion_;
-    int minorVersion_;
 };

 CV_EXPORTS void printCudaDeviceInfo(int device);
-
 CV_EXPORTS void printShortCudaDeviceInfo(int device);

 }} // namespace cv { namespace gpu {
--- a/modules/core/include/opencv2/core/gpu.inl.hpp
+++ b/modules/core/include/opencv2/core/gpu.inl.hpp
@ -567,6 +567,62 @@ Stream::Stream(const Ptr<Impl>& impl)
 {
 }

+//////////////////////////////// Initialization & Info ////////////////////////
+
+inline
+bool TargetArchs::has(int major, int minor)
+{
+    return hasPtx(major, minor) || hasBin(major, minor);
+}
+
+inline
+bool TargetArchs::hasEqualOrGreater(int major, int minor)
+{
+    return hasEqualOrGreaterPtx(major, minor) || hasEqualOrGreaterBin(major, minor);
+}
+
+inline
+DeviceInfo::DeviceInfo()
+{
+    device_id_ = getDevice();
+}
+
+inline
+DeviceInfo::DeviceInfo(int device_id)
+{
+    CV_Assert( device_id >= 0 && device_id < getCudaEnabledDeviceCount() );
+    device_id_ = device_id;
+}
+
+inline
+int DeviceInfo::deviceID() const
+{
+    return device_id_;
+}
+
+inline
+size_t DeviceInfo::freeMemory() const
+{
+    size_t _totalMemory, _freeMemory;
+    queryMemory(_totalMemory, _freeMemory);
+    return _freeMemory;
+}
+
+inline
+size_t DeviceInfo::totalMemory() const
+{
+    size_t _totalMemory, _freeMemory;
+    queryMemory(_totalMemory, _freeMemory);
+    return _totalMemory;
+}
+
+inline
+bool DeviceInfo::supports(FeatureSet feature_set) const
+{
+    int version = major() * 10 + minor();
+    return version >= feature_set;
+}
+
 }} // namespace cv { namespace gpu {

 //////////////////////////////// Mat ////////////////////////////////
--- a/modules/core/src/gpu_info.cpp
+++ b/modules/core/src/gpu_info.cpp
@ -41,50 +41,17 @@
 //M*/

 #include "precomp.hpp"
-#include <limits>

 using namespace cv;
 using namespace cv::gpu;

-//////////////////////////////// Initialization & Info ////////////////////////
-
-#ifndef HAVE_CUDA
-
-int cv::gpu::getCudaEnabledDeviceCount() { return 0; }
-
-void cv::gpu::setDevice(int) { throw_no_cuda(); }
-int cv::gpu::getDevice() { throw_no_cuda(); return 0; }
-
-void cv::gpu::resetDevice() { throw_no_cuda(); }
-
-bool cv::gpu::deviceSupports(FeatureSet) { throw_no_cuda(); return false; }
-
-bool cv::gpu::TargetArchs::builtWith(FeatureSet) { throw_no_cuda(); return false; }
-bool cv::gpu::TargetArchs::has(int, int) { throw_no_cuda(); return false; }
-bool cv::gpu::TargetArchs::hasPtx(int, int) { throw_no_cuda(); return false; }
-bool cv::gpu::TargetArchs::hasBin(int, int) { throw_no_cuda(); return false; }
-bool cv::gpu::TargetArchs::hasEqualOrLessPtx(int, int) { throw_no_cuda(); return false; }
-bool cv::gpu::TargetArchs::hasEqualOrGreater(int, int) { throw_no_cuda(); return false; }
-bool cv::gpu::TargetArchs::hasEqualOrGreaterPtx(int, int) { throw_no_cuda(); return false; }
-bool cv::gpu::TargetArchs::hasEqualOrGreaterBin(int, int) { throw_no_cuda(); return false; }
-
-size_t cv::gpu::DeviceInfo::sharedMemPerBlock() const { throw_no_cuda(); return 0; }
-void cv::gpu::DeviceInfo::queryMemory(size_t&, size_t&) const { throw_no_cuda(); }
-size_t cv::gpu::DeviceInfo::freeMemory() const { throw_no_cuda(); return 0; }
-size_t cv::gpu::DeviceInfo::totalMemory() const { throw_no_cuda(); return 0; }
-bool cv::gpu::DeviceInfo::supports(FeatureSet) const { throw_no_cuda(); return false; }
-bool cv::gpu::DeviceInfo::isCompatible() const { throw_no_cuda(); return false; }
-void cv::gpu::DeviceInfo::query() { throw_no_cuda(); }
-
-void cv::gpu::printCudaDeviceInfo(int) { throw_no_cuda(); }
-void cv::gpu::printShortCudaDeviceInfo(int) { throw_no_cuda(); }
-
-#else // HAVE_CUDA
-
 int cv::gpu::getCudaEnabledDeviceCount()
 {
+#ifndef HAVE_CUDA
+    return 0;
+#else
    int count;
-    cudaError_t error = cudaGetDeviceCount( &count );
+    cudaError_t error = cudaGetDeviceCount(&count);

    if (error == cudaErrorInsufficientDriver)
        return -1;
@ -94,25 +61,78 @@ int cv::gpu::getCudaEnabledDeviceCount()

    cudaSafeCall( error );
    return count;
+#endif
 }

 void cv::gpu::setDevice(int device)
 {
-    cudaSafeCall( cudaSetDevice( device ) );
+#ifndef HAVE_CUDA
+    (void) device;
+    throw_no_cuda();
+#else
+    cudaSafeCall( cudaSetDevice(device) );
+#endif
 }

 int cv::gpu::getDevice()
 {
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return 0;
+#else
    int device;
-    cudaSafeCall( cudaGetDevice( &device ) );
+    cudaSafeCall( cudaGetDevice(&device) );
    return device;
+#endif
 }

 void cv::gpu::resetDevice()
 {
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+#else
    cudaSafeCall( cudaDeviceReset() );
+#endif
+}
+
+bool cv::gpu::deviceSupports(FeatureSet feature_set)
+{
+#ifndef HAVE_CUDA
+    (void) feature_set;
+    throw_no_cuda();
+    return false;
+#else
+    static int versions[] =
+    {
+        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
+    };
+    static const int cache_size = static_cast<int>(sizeof(versions) / sizeof(versions[0]));
+
+    const int devId = getDevice();
+
+    int version;
+
+    if (devId < cache_size && versions[devId] >= 0)
+    {
+        version = versions[devId];
+    }
+    else
+    {
+        DeviceInfo dev(devId);
+        version = dev.major() * 10 + dev.minor();
+        if (devId < cache_size)
+            versions[devId] = version;
+    }
+
+    return TargetArchs::builtWith(feature_set) && (version >= feature_set);
+#endif
 }

+////////////////////////////////////////////////////////////////////////
+// TargetArchs
+
+#ifdef HAVE_CUDA
+
 namespace
 {
    class CudaArch
@ -128,7 +148,7 @@ namespace
        bool hasEqualOrGreaterBin(int major, int minor) const;

    private:
-        static void fromStr(const String& set_as_str, std::vector<int>& arr);
+        static void fromStr(const char* set_as_str, std::vector<int>& arr);

        std::vector<int> bin;
        std::vector<int> ptx;
@ -174,12 +194,14 @@ namespace
        return !bin.empty() && (bin.back() >= major * 10 + minor);
    }

-    void CudaArch::fromStr(const String& set_as_str, std::vector<int>& arr)
+    void CudaArch::fromStr(const char* set_as_str, std::vector<int>& arr)
    {
        arr.clear();

+        const size_t len = strlen(set_as_str);
+
        size_t pos = 0;
-        while (pos < set_as_str.size())
+        while (pos < len)
        {
            if (isspace(set_as_str[pos]))
            {
@ -189,8 +211,8 @@ namespace
            {
                int cur_value;
                int chars_read;
-                int args_read = sscanf(set_as_str.c_str() + pos, "%d%n", &cur_value, &chars_read);
-                CV_Assert(args_read == 1);
+                int args_read = sscanf(set_as_str + pos, "%d%n", &cur_value, &chars_read);
+                CV_Assert( args_read == 1 );

                arr.push_back(cur_value);
                pos += chars_read;
@ -201,70 +223,83 @@ namespace
    }
 }

+#endif
+
 bool cv::gpu::TargetArchs::builtWith(cv::gpu::FeatureSet feature_set)
 {
+#ifndef HAVE_CUDA
+    (void) feature_set;
+    throw_no_cuda();
+    return false;
+#else
    return cudaArch.builtWith(feature_set);
-}
-
-bool cv::gpu::TargetArchs::has(int major, int minor)
-{
-    return hasPtx(major, minor) || hasBin(major, minor);
+#endif
 }

 bool cv::gpu::TargetArchs::hasPtx(int major, int minor)
 {
+#ifndef HAVE_CUDA
+    (void) major;
+    (void) minor;
+    throw_no_cuda();
+    return false;
+#else
    return cudaArch.hasPtx(major, minor);
+#endif
 }

 bool cv::gpu::TargetArchs::hasBin(int major, int minor)
 {
+#ifndef HAVE_CUDA
+    (void) major;
+    (void) minor;
+    throw_no_cuda();
+    return false;
+#else
    return cudaArch.hasBin(major, minor);
+#endif
 }

 bool cv::gpu::TargetArchs::hasEqualOrLessPtx(int major, int minor)
 {
+#ifndef HAVE_CUDA
+    (void) major;
+    (void) minor;
+    throw_no_cuda();
+    return false;
+#else
    return cudaArch.hasEqualOrLessPtx(major, minor);
-}
-
-bool cv::gpu::TargetArchs::hasEqualOrGreater(int major, int minor)
-{
-    return hasEqualOrGreaterPtx(major, minor) || hasEqualOrGreaterBin(major, minor);
+#endif
 }

 bool cv::gpu::TargetArchs::hasEqualOrGreaterPtx(int major, int minor)
 {
+#ifndef HAVE_CUDA
+    (void) major;
+    (void) minor;
+    throw_no_cuda();
+    return false;
+#else
    return cudaArch.hasEqualOrGreaterPtx(major, minor);
+#endif
 }

 bool cv::gpu::TargetArchs::hasEqualOrGreaterBin(int major, int minor)
 {
+#ifndef HAVE_CUDA
+    (void) major;
+    (void) minor;
+    throw_no_cuda();
+    return false;
+#else
    return cudaArch.hasEqualOrGreaterBin(major, minor);
+#endif
 }

-bool cv::gpu::deviceSupports(FeatureSet feature_set)
-{
-    static int versions[] =
-    {
-        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
-    };
-    static const int cache_size = static_cast<int>(sizeof(versions) / sizeof(versions[0]));
-
-    const int devId = getDevice();
-
-    int version;
-
-    if (devId < cache_size && versions[devId] >= 0)
-        version = versions[devId];
-    else
-    {
-        DeviceInfo dev(devId);
-        version = dev.majorVersion() * 10 + dev.minorVersion();
-        if (devId < cache_size)
-            versions[devId] = version;
-    }
+////////////////////////////////////////////////////////////////////////
+// DeviceInfo

-    return TargetArchs::builtWith(feature_set) && (version >= feature_set);
-}
+#ifdef HAVE_CUDA

 namespace
 {
@ -272,116 +307,613 @@ namespace
    {
    public:
        DeviceProps();
-        ~DeviceProps();

-        cudaDeviceProp* get(int devID);
+        const cudaDeviceProp* get(int devID) const;

    private:
-        std::vector<cudaDeviceProp*> props_;
+        std::vector<cudaDeviceProp> props_;
    };

    DeviceProps::DeviceProps()
    {
-        props_.resize(10, 0);
-    }
+        int count = getCudaEnabledDeviceCount();

-    DeviceProps::~DeviceProps()
+        if (count > 0)
        {
-        for (size_t i = 0; i < props_.size(); ++i)
+            props_.resize(count);
+
+            for (int devID = 0; devID < count; ++devID)
            {
-            if (props_[i])
-                delete props_[i];
+                cudaSafeCall( cudaGetDeviceProperties(&props_[devID], devID) );
+            }
        }
-        props_.clear();
    }

-    cudaDeviceProp* DeviceProps::get(int devID)
+    const cudaDeviceProp* DeviceProps::get(int devID) const
    {
-        if (devID >= (int) props_.size())
-            props_.resize(devID + 5, 0);
+        CV_Assert( static_cast<size_t>(devID) < props_.size() );

-        if (!props_[devID])
-        {
-            props_[devID] = new cudaDeviceProp;
-            cudaSafeCall( cudaGetDeviceProperties(props_[devID], devID) );
+        return &props_[devID];
    }

-        return props_[devID];
+    DeviceProps& deviceProps()
+    {
+        static DeviceProps props;
+        return props;
    }
+}

-    DeviceProps deviceProps;
+#endif
+
+const char* cv::gpu::DeviceInfo::name() const
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return "";
+#else
+    return deviceProps().get(device_id_)->name;
+#endif
+}
+
+size_t cv::gpu::DeviceInfo::totalGlobalMem() const
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return 0;
+#else
+    return deviceProps().get(device_id_)->totalGlobalMem;
+#endif
 }

 size_t cv::gpu::DeviceInfo::sharedMemPerBlock() const
 {
-    return deviceProps.get(device_id_)->sharedMemPerBlock;
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return 0;
+#else
+    return deviceProps().get(device_id_)->sharedMemPerBlock;
+#endif
 }

-bool cv::gpu::DeviceInfo::canMapHostMemory() const
+int cv::gpu::DeviceInfo::regsPerBlock() const
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return 0;
+#else
+    return deviceProps().get(device_id_)->regsPerBlock;
+#endif
+}
+
+int cv::gpu::DeviceInfo::warpSize() const
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return 0;
+#else
+    return deviceProps().get(device_id_)->warpSize;
+#endif
+}
+
+size_t cv::gpu::DeviceInfo::memPitch() const
 {
-    return deviceProps.get(device_id_)->canMapHostMemory != 0;
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return 0;
+#else
+    return deviceProps().get(device_id_)->memPitch;
+#endif
+}
+
+int cv::gpu::DeviceInfo::maxThreadsPerBlock() const
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return 0;
+#else
+    return deviceProps().get(device_id_)->maxThreadsPerBlock;
+#endif
+}
+
+Vec3i cv::gpu::DeviceInfo::maxThreadsDim() const
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return Vec3i();
+#else
+    return Vec3i(deviceProps().get(device_id_)->maxThreadsDim);
+#endif
+}
+
+Vec3i cv::gpu::DeviceInfo::maxGridSize() const
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return Vec3i();
+#else
+    return Vec3i(deviceProps().get(device_id_)->maxGridSize);
+#endif
+}
+
+int cv::gpu::DeviceInfo::clockRate() const
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return 0;
+#else
+    return deviceProps().get(device_id_)->clockRate;
+#endif
+}
+
+size_t cv::gpu::DeviceInfo::totalConstMem() const
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return 0;
+#else
+    return deviceProps().get(device_id_)->totalConstMem;
+#endif
+}
+
+int cv::gpu::DeviceInfo::major() const
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return 0;
+#else
+    return deviceProps().get(device_id_)->major;
+#endif
+}
+
+int cv::gpu::DeviceInfo::minor() const
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return 0;
+#else
+    return deviceProps().get(device_id_)->minor;
+#endif
 }

 size_t cv::gpu::DeviceInfo::textureAlignment() const
 {
-    return deviceProps.get(device_id_)->textureAlignment;
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return 0;
+#else
+    return deviceProps().get(device_id_)->textureAlignment;
+#endif
 }

-void cv::gpu::DeviceInfo::queryMemory(size_t& _totalMemory, size_t& _freeMemory) const
+size_t cv::gpu::DeviceInfo::texturePitchAlignment() const
 {
-    int prevDeviceID = getDevice();
-    if (prevDeviceID != device_id_)
-        setDevice(device_id_);
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return 0;
+#else
+    return deviceProps().get(device_id_)->texturePitchAlignment;
+#endif
+}

-    cudaSafeCall( cudaMemGetInfo(&_freeMemory, &_totalMemory) );
+int cv::gpu::DeviceInfo::multiProcessorCount() const
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return 0;
+#else
+    return deviceProps().get(device_id_)->multiProcessorCount;
+#endif
+}

-    if (prevDeviceID != device_id_)
-        setDevice(prevDeviceID);
+bool cv::gpu::DeviceInfo::kernelExecTimeoutEnabled() const
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return false;
+#else
+    return deviceProps().get(device_id_)->kernelExecTimeoutEnabled != 0;
+#endif
 }

-size_t cv::gpu::DeviceInfo::freeMemory() const
+bool cv::gpu::DeviceInfo::integrated() const
 {
-    size_t _totalMemory, _freeMemory;
-    queryMemory(_totalMemory, _freeMemory);
-    return _freeMemory;
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return false;
+#else
+    return deviceProps().get(device_id_)->integrated != 0;
+#endif
 }

-size_t cv::gpu::DeviceInfo::totalMemory() const
+bool cv::gpu::DeviceInfo::canMapHostMemory() const
 {
-    size_t _totalMemory, _freeMemory;
-    queryMemory(_totalMemory, _freeMemory);
-    return _totalMemory;
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return false;
+#else
+    return deviceProps().get(device_id_)->canMapHostMemory != 0;
+#endif
 }

-bool cv::gpu::DeviceInfo::supports(FeatureSet feature_set) const
+DeviceInfo::ComputeMode cv::gpu::DeviceInfo::computeMode() const
 {
-    int version = majorVersion() * 10 + minorVersion();
-    return version >= feature_set;
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return ComputeModeDefault;
+#else
+    static const ComputeMode tbl[] =
+    {
+        ComputeModeDefault,
+        ComputeModeExclusive,
+        ComputeModeProhibited,
+        ComputeModeExclusiveProcess
+    };
+
+    return tbl[deviceProps().get(device_id_)->computeMode];
+#endif
+}
+
+int cv::gpu::DeviceInfo::maxTexture1D() const
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return 0;
+#else
+    return deviceProps().get(device_id_)->maxTexture1D;
+#endif
+}
+
+int cv::gpu::DeviceInfo::maxTexture1DMipmap() const
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return 0;
+#else
+    return deviceProps().get(device_id_)->maxTexture1DMipmap;
+#endif
+}
+
+int cv::gpu::DeviceInfo::maxTexture1DLinear() const
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return 0;
+#else
+    return deviceProps().get(device_id_)->maxTexture1DLinear;
+#endif
+}
+
+Vec2i cv::gpu::DeviceInfo::maxTexture2D() const
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return Vec2i();
+#else
+    return Vec2i(deviceProps().get(device_id_)->maxTexture2D);
+#endif
+}
+
+Vec2i cv::gpu::DeviceInfo::maxTexture2DMipmap() const
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return Vec2i();
+#else
+    return Vec2i(deviceProps().get(device_id_)->maxTexture2DMipmap);
+#endif
+}
+
+Vec3i cv::gpu::DeviceInfo::maxTexture2DLinear() const
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return Vec3i();
+#else
+    return Vec3i(deviceProps().get(device_id_)->maxTexture2DLinear);
+#endif
+}
+
+Vec2i cv::gpu::DeviceInfo::maxTexture2DGather() const
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return Vec2i();
+#else
+    return Vec2i(deviceProps().get(device_id_)->maxTexture2DGather);
+#endif
+}
+
+Vec3i cv::gpu::DeviceInfo::maxTexture3D() const
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return Vec3i();
+#else
+    return Vec3i(deviceProps().get(device_id_)->maxTexture3D);
+#endif
+}
+
+int cv::gpu::DeviceInfo::maxTextureCubemap() const
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return 0;
+#else
+    return deviceProps().get(device_id_)->maxTextureCubemap;
+#endif
+}
+
+Vec2i cv::gpu::DeviceInfo::maxTexture1DLayered() const
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return Vec2i();
+#else
+    return Vec2i(deviceProps().get(device_id_)->maxTexture1DLayered);
+#endif
+}
+
+Vec3i cv::gpu::DeviceInfo::maxTexture2DLayered() const
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return Vec3i();
+#else
+    return Vec3i(deviceProps().get(device_id_)->maxTexture2DLayered);
+#endif
+}
+
+Vec2i cv::gpu::DeviceInfo::maxTextureCubemapLayered() const
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return Vec2i();
+#else
+    return Vec2i(deviceProps().get(device_id_)->maxTextureCubemapLayered);
+#endif
+}
+
+int cv::gpu::DeviceInfo::maxSurface1D() const
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return 0;
+#else
+    return deviceProps().get(device_id_)->maxSurface1D;
+#endif
+}
+
+Vec2i cv::gpu::DeviceInfo::maxSurface2D() const
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return Vec2i();
+#else
+    return Vec2i(deviceProps().get(device_id_)->maxSurface2D);
+#endif
+}
+
+Vec3i cv::gpu::DeviceInfo::maxSurface3D() const
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return Vec3i();
+#else
+    return Vec3i(deviceProps().get(device_id_)->maxSurface3D);
+#endif
+}
+
+Vec2i cv::gpu::DeviceInfo::maxSurface1DLayered() const
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return Vec2i();
+#else
+    return Vec2i(deviceProps().get(device_id_)->maxSurface1DLayered);
+#endif
+}
+
+Vec3i cv::gpu::DeviceInfo::maxSurface2DLayered() const
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return Vec3i();
+#else
+    return Vec3i(deviceProps().get(device_id_)->maxSurface2DLayered);
+#endif
+}
+
+int cv::gpu::DeviceInfo::maxSurfaceCubemap() const
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return 0;
+#else
+    return deviceProps().get(device_id_)->maxSurfaceCubemap;
+#endif
+}
+
+Vec2i cv::gpu::DeviceInfo::maxSurfaceCubemapLayered() const
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return Vec2i();
+#else
+    return Vec2i(deviceProps().get(device_id_)->maxSurfaceCubemapLayered);
+#endif
+}
+
+size_t cv::gpu::DeviceInfo::surfaceAlignment() const
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return 0;
+#else
+    return deviceProps().get(device_id_)->surfaceAlignment;
+#endif
+}
+
+bool cv::gpu::DeviceInfo::concurrentKernels() const
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return false;
+#else
+    return deviceProps().get(device_id_)->concurrentKernels != 0;
+#endif
+}
+
+bool cv::gpu::DeviceInfo::ECCEnabled() const
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return false;
+#else
+    return deviceProps().get(device_id_)->ECCEnabled != 0;
+#endif
+}
+
+int cv::gpu::DeviceInfo::pciBusID() const
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return 0;
+#else
+    return deviceProps().get(device_id_)->pciBusID;
+#endif
+}
+
+int cv::gpu::DeviceInfo::pciDeviceID() const
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return 0;
+#else
+    return deviceProps().get(device_id_)->pciDeviceID;
+#endif
+}
+
+int cv::gpu::DeviceInfo::pciDomainID() const
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return 0;
+#else
+    return deviceProps().get(device_id_)->pciDomainID;
+#endif
+}
+
+bool cv::gpu::DeviceInfo::tccDriver() const
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return false;
+#else
+    return deviceProps().get(device_id_)->tccDriver != 0;
+#endif
+}
+
+int cv::gpu::DeviceInfo::asyncEngineCount() const
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return 0;
+#else
+    return deviceProps().get(device_id_)->asyncEngineCount;
+#endif
+}
+
+bool cv::gpu::DeviceInfo::unifiedAddressing() const
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return false;
+#else
+    return deviceProps().get(device_id_)->unifiedAddressing != 0;
+#endif
+}
+
+int cv::gpu::DeviceInfo::memoryClockRate() const
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return 0;
+#else
+    return deviceProps().get(device_id_)->memoryClockRate;
+#endif
+}
+
+int cv::gpu::DeviceInfo::memoryBusWidth() const
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return 0;
+#else
+    return deviceProps().get(device_id_)->memoryBusWidth;
+#endif
+}
+
+int cv::gpu::DeviceInfo::l2CacheSize() const
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return 0;
+#else
+    return deviceProps().get(device_id_)->l2CacheSize;
+#endif
+}
+
+int cv::gpu::DeviceInfo::maxThreadsPerMultiProcessor() const
+{
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return 0;
+#else
+    return deviceProps().get(device_id_)->maxThreadsPerMultiProcessor;
+#endif
+}
+
+void cv::gpu::DeviceInfo::queryMemory(size_t& _totalMemory, size_t& _freeMemory) const
+{
+#ifndef HAVE_CUDA
+    (void) _totalMemory;
+    (void) _freeMemory;
+    throw_no_cuda();
+#else
+    int prevDeviceID = getDevice();
+    if (prevDeviceID != device_id_)
+        setDevice(device_id_);
+
+    cudaSafeCall( cudaMemGetInfo(&_freeMemory, &_totalMemory) );
+
+    if (prevDeviceID != device_id_)
+        setDevice(prevDeviceID);
+#endif
 }

 bool cv::gpu::DeviceInfo::isCompatible() const
 {
+#ifndef HAVE_CUDA
+    throw_no_cuda();
+    return false;
+#else
    // Check PTX compatibility
-    if (TargetArchs::hasEqualOrLessPtx(majorVersion(), minorVersion()))
+    if (TargetArchs::hasEqualOrLessPtx(major(), minor()))
        return true;

    // Check BIN compatibility
-    for (int i = minorVersion(); i >= 0; --i)
-        if (TargetArchs::hasBin(majorVersion(), i))
+    for (int i = minor(); i >= 0; --i)
+        if (TargetArchs::hasBin(major(), i))
            return true;

    return false;
+#endif
 }

-void cv::gpu::DeviceInfo::query()
-{
-    const cudaDeviceProp* prop = deviceProps.get(device_id_);
+////////////////////////////////////////////////////////////////////////
+// print info

-    name_ = prop->name;
-    multi_processor_count_ = prop->multiProcessorCount;
-    majorVersion_ = prop->major;
-    minorVersion_ = prop->minor;
-}
+#ifdef HAVE_CUDA

 namespace
 {
@ -407,8 +939,14 @@ namespace
    }
 }

+#endif
+
 void cv::gpu::printCudaDeviceInfo(int device)
 {
+#ifndef HAVE_CUDA
+    (void) device;
+    throw_no_cuda();
+#else
    int count = getCudaEnabledDeviceCount();
    bool valid = (device >= 0) && (device < count);

@ -484,11 +1022,17 @@ void cv::gpu::printCudaDeviceInfo(int device)
    printf(", CUDA Driver Version  = %d.%d", driverVersion / 1000, driverVersion % 100);
    printf(", CUDA Runtime Version = %d.%d", runtimeVersion/1000, runtimeVersion%100);
    printf(", NumDevs = %d\n\n", count);
+
    fflush(stdout);
+#endif
 }

 void cv::gpu::printShortCudaDeviceInfo(int device)
 {
+#ifndef HAVE_CUDA
+    (void) device;
+    throw_no_cuda();
+#else
    int count = getCudaEnabledDeviceCount();
    bool valid = (device >= 0) && (device < count);

@ -514,11 +1058,11 @@ void cv::gpu::printShortCudaDeviceInfo(int device)

        printf(", Driver/Runtime ver.%d.%d/%d.%d\n", driverVersion/1000, driverVersion%100, runtimeVersion/1000, runtimeVersion%100);
    }
+
    fflush(stdout);
+#endif
 }

-#endif // HAVE_CUDA
-
 ////////////////////////////////////////////////////////////////////////
 // Error handling

--- a/modules/gpufilters/src/filtering.cpp
+++ b/modules/gpufilters/src/filtering.cpp
@ -878,7 +878,7 @@ namespace
        virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null())
        {
            DeviceInfo devInfo;
-            int cc = devInfo.majorVersion() * 10 + devInfo.minorVersion();
+            int cc = devInfo.major() * 10 + devInfo.minor();
            func(src, dst, kernel.ptr<float>(), ksize, anchor, brd_type, cc, StreamAccessor::getStream(s));
        }

@ -977,7 +977,7 @@ namespace
        virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null())
        {
            DeviceInfo devInfo;
-            int cc = devInfo.majorVersion() * 10 + devInfo.minorVersion();
+            int cc = devInfo.major() * 10 + devInfo.minor();
            if (ksize > 16 && cc < 20)
                CV_Error(cv::Error::StsNotImplemented, "column linear filter doesn't implemented for kernel size > 16 for device with compute capabilities less than 2.0");

--- a/modules/gpuoptflow/test/test_optflow.cpp
+++ b/modules/gpuoptflow/test/test_optflow.cpp
@ -80,7 +80,7 @@ GPU_TEST_P(BroxOpticalFlow, Regression)
    brox(loadMat(frame0), loadMat(frame1), u, v);

    std::string fname(cvtest::TS::ptr()->get_data_path());
-    if (devInfo.majorVersion() >= 2)
+    if (devInfo.major() >= 2)
        fname += "opticalflow/brox_optical_flow_cc20.bin";
    else
        fname += "opticalflow/brox_optical_flow.bin";
--- a/modules/gpustereo/src/stereobm.cpp
+++ b/modules/gpustereo/src/stereobm.cpp
@ -91,7 +91,7 @@ bool cv::gpu::StereoBM_GPU::checkIfGpuCallReasonable()

    DeviceInfo device_info;

-    if (device_info.majorVersion() > 1 || device_info.multiProcessorCount() > 16)
+    if (device_info.major() > 1 || device_info.multiProcessorCount() > 16)
        return true;

    return false;
--- a/modules/ts/src/gpu_perf.cpp
+++ b/modules/ts/src/gpu_perf.cpp
@ -287,8 +287,8 @@ namespace perf
            cv::gpu::DeviceInfo info(i);

            printf("[----------]\n"), fflush(stdout);
-            printf("[ DEVICE   ] \t# %d %s.\n", i, info.name().c_str()), fflush(stdout);
-            printf("[          ] \tCompute capability: %d.%d\n", (int)info.majorVersion(), (int)info.minorVersion()), fflush(stdout);
+            printf("[ DEVICE   ] \t# %d %s.\n", i, info.name()), fflush(stdout);
+            printf("[          ] \tCompute capability: %d.%d\n", (int)info.major(), (int)info.minor()), fflush(stdout);
            printf("[          ] \tMulti Processor Count:  %d\n", info.multiProcessorCount()), fflush(stdout);
            printf("[          ] \tTotal memory: %d Mb\n", static_cast<int>(static_cast<int>(info.totalMemory() / 1024.0) / 1024.0)), fflush(stdout);
            printf("[          ] \tFree  memory: %d Mb\n", static_cast<int>(static_cast<int>(info.freeMemory()  / 1024.0) / 1024.0)), fflush(stdout);
--- a/modules/ts/src/ts_perf.cpp
+++ b/modules/ts/src/ts_perf.cpp
@ -682,13 +682,13 @@ void TestBase::Init(int argc, const char* const argv[])
        cv::gpu::DeviceInfo info(param_cuda_device);
        if (!info.isCompatible())
        {
-            printf("[----------]\n[ FAILURE  ] \tDevice %s is NOT compatible with current GPU module build.\n[----------]\n", info.name().c_str()), fflush(stdout);
+            printf("[----------]\n[ FAILURE  ] \tDevice %s is NOT compatible with current GPU module build.\n[----------]\n", info.name()), fflush(stdout);
            exit(-1);
        }

        cv::gpu::setDevice(param_cuda_device);

-        printf("[----------]\n[ GPU INFO ] \tRun test suite on %s GPU.\n[----------]\n", info.name().c_str()), fflush(stdout);
+        printf("[----------]\n[ GPU INFO ] \tRun test suite on %s GPU.\n[----------]\n", info.name()), fflush(stdout);
    }
 #endif

--- a/samples/gpu/driver_api_multi.cpp
+++ b/samples/gpu/driver_api_multi.cpp
@ -82,8 +82,8 @@ int main()
        if (!dev_info.isCompatible())
        {
            std::cout << "GPU module isn't built for GPU #" << i << " ("
-                 << dev_info.name() << ", CC " << dev_info.majorVersion()
-                 << dev_info.minorVersion() << "\n";
+                 << dev_info.name() << ", CC " << dev_info.major()
+                 << dev_info.minor() << "\n";
            return -1;
        }
    }
--- a/samples/gpu/driver_api_stereo_multi.cpp
+++ b/samples/gpu/driver_api_stereo_multi.cpp
@ -112,8 +112,8 @@ int main(int argc, char** argv)
        if (!dev_info.isCompatible())
        {
            std::cout << "GPU module isn't built for GPU #" << i << " ("
-                 << dev_info.name() << ", CC " << dev_info.majorVersion()
-                 << dev_info.minorVersion() << "\n";
+                 << dev_info.name() << ", CC " << dev_info.major()
+                 << dev_info.minor() << "\n";
            return -1;
        }
    }
--- a/samples/gpu/multi.cpp
+++ b/samples/gpu/multi.cpp
@ -62,8 +62,8 @@ int main()
        if (!dev_info.isCompatible())
        {
            std::cout << "GPU module isn't built for GPU #" << i << " ("
-                 << dev_info.name() << ", CC " << dev_info.majorVersion()
-                 << dev_info.minorVersion() << "\n";
+                 << dev_info.name() << ", CC " << dev_info.major()
+                 << dev_info.minor() << "\n";
            return -1;
        }
    }
--- a/samples/gpu/performance/performance.cpp
+++ b/samples/gpu/performance/performance.cpp
@ -191,7 +191,7 @@ int main(int argc, const char* argv[])
    DeviceInfo dev_info(device);
    if (!dev_info.isCompatible())
    {
-        cerr << "GPU module isn't built for GPU #" << device << " " << dev_info.name() << ", CC " << dev_info.majorVersion() << '.' << dev_info.minorVersion() << endl;
+        cerr << "GPU module isn't built for GPU #" << device << " " << dev_info.name() << ", CC " << dev_info.major() << '.' << dev_info.minor() << endl;
        return -1;
    }
    setDevice(device);
--- a/samples/gpu/stereo_multi.cpp
+++ b/samples/gpu/stereo_multi.cpp
@ -81,8 +81,8 @@ int main(int argc, char** argv)
        if (!dev_info.isCompatible())
        {
            std::cout << "GPU module isn't built for GPU #" << i << " ("
-                 << dev_info.name() << ", CC " << dev_info.majorVersion()
-                 << dev_info.minorVersion() << "\n";
+                 << dev_info.name() << ", CC " << dev_info.major()
+                 << dev_info.minor() << "\n";
            return -1;
        }
    }