refactored gpu info structures (TargetArchs and DeviceInfo)

now DeviceInfo provides full information about device (from cudaDeviceProp)
pull/974/head
Vladislav Vinogradov 12 years ago
parent 76f4b02b06
commit 2dab93c2e8
  1. 215
      modules/core/include/opencv2/core/gpu.hpp
  2. 56
      modules/core/include/opencv2/core/gpu.inl.hpp
  3. 810
      modules/core/src/gpu_info.cpp
  4. 4
      modules/gpufilters/src/filtering.cpp
  5. 2
      modules/gpuoptflow/test/test_optflow.cpp
  6. 2
      modules/gpustereo/src/stereobm.cpp
  7. 4
      modules/ts/src/gpu_perf.cpp
  8. 4
      modules/ts/src/ts_perf.cpp
  9. 4
      samples/gpu/driver_api_multi.cpp
  10. 4
      samples/gpu/driver_api_stereo_multi.cpp
  11. 4
      samples/gpu/multi.cpp
  12. 2
      samples/gpu/performance/performance.cpp
  13. 4
      samples/gpu/stereo_multi.cpp

@ -392,17 +392,17 @@ private:
//////////////////////////////// Initialization & Info ////////////////////////
//! This is the only function that do not throw exceptions if the library is compiled without Cuda.
//! this is the only function that do not throw exceptions if the library is compiled without CUDA
CV_EXPORTS int getCudaEnabledDeviceCount();
//! Functions below throw cv::Expception if the library is compiled without Cuda.
//! set device to be used for GPU executions for the calling host thread
CV_EXPORTS void setDevice(int device);
//! returns which device is currently being used for the calling host thread
CV_EXPORTS int getDevice();
//! Explicitly destroys and cleans up all resources associated with the current device in the current process.
//! Any subsequent API call to this device will reinitialize the device.
//! explicitly destroys and cleans up all resources associated with the current device in the current process
//! any subsequent API call to this device will reinitialize the device
CV_EXPORTS void resetDevice();
enum FeatureSet
@ -423,75 +423,218 @@ enum FeatureSet
DYNAMIC_PARALLELISM = FEATURE_SET_COMPUTE_35
};
// Checks whether current device supports the given feature
//! checks whether current device supports the given feature
CV_EXPORTS bool deviceSupports(FeatureSet feature_set);
// Gives information about what GPU archs this OpenCV GPU module was
// compiled for
//! information about what GPU archs this OpenCV GPU module was compiled for
class CV_EXPORTS TargetArchs
{
public:
static bool builtWith(FeatureSet feature_set);
static bool has(int major, int minor);
static bool hasPtx(int major, int minor);
static bool hasBin(int major, int minor);
static bool hasEqualOrLessPtx(int major, int minor);
static bool hasEqualOrGreater(int major, int minor);
static bool hasEqualOrGreaterPtx(int major, int minor);
static bool hasEqualOrGreaterBin(int major, int minor);
private:
TargetArchs();
};
// Gives information about the given GPU
//! information about the given GPU.
class CV_EXPORTS DeviceInfo
{
public:
// Creates DeviceInfo object for the current GPU
DeviceInfo() : device_id_(getDevice()) { query(); }
//! creates DeviceInfo object for the current GPU
DeviceInfo();
// Creates DeviceInfo object for the given GPU
DeviceInfo(int device_id) : device_id_(device_id) { query(); }
//! creates DeviceInfo object for the given GPU
DeviceInfo(int device_id);
String name() const { return name_; }
//! device number.
int deviceID() const;
// Return compute capability versions
int majorVersion() const { return majorVersion_; }
int minorVersion() const { return minorVersion_; }
//! ASCII string identifying device
const char* name() const;
int multiProcessorCount() const { return multi_processor_count_; }
//! global memory available on device in bytes
size_t totalGlobalMem() const;
//! shared memory available per block in bytes
size_t sharedMemPerBlock() const;
//! 32-bit registers available per block
int regsPerBlock() const;
//! warp size in threads
int warpSize() const;
//! maximum pitch in bytes allowed by memory copies
size_t memPitch() const;
//! maximum number of threads per block
int maxThreadsPerBlock() const;
//! maximum size of each dimension of a block
Vec3i maxThreadsDim() const;
//! maximum size of each dimension of a grid
Vec3i maxGridSize() const;
//! clock frequency in kilohertz
int clockRate() const;
//! constant memory available on device in bytes
size_t totalConstMem() const;
//! major compute capability
int major() const;
//! minor compute capability
int minor() const;
//! alignment requirement for textures
size_t textureAlignment() const;
//! pitch alignment requirement for texture references bound to pitched memory
size_t texturePitchAlignment() const;
//! number of multiprocessors on device
int multiProcessorCount() const;
//! specified whether there is a run time limit on kernels
bool kernelExecTimeoutEnabled() const;
//! device is integrated as opposed to discrete
bool integrated() const;
//! device can map host memory with cudaHostAlloc/cudaHostGetDevicePointer
bool canMapHostMemory() const;
enum ComputeMode
{
ComputeModeDefault, /**< default compute mode (Multiple threads can use ::cudaSetDevice() with this device) */
ComputeModeExclusive, /**< compute-exclusive-thread mode (Only one thread in one process will be able to use ::cudaSetDevice() with this device) */
ComputeModeProhibited, /**< compute-prohibited mode (No threads can use ::cudaSetDevice() with this device) */
ComputeModeExclusiveProcess /**< compute-exclusive-process mode (Many threads in one process will be able to use ::cudaSetDevice() with this device) */
};
//! compute mode
ComputeMode computeMode() const;
//! maximum 1D texture size
int maxTexture1D() const;
//! maximum 1D mipmapped texture size
int maxTexture1DMipmap() const;
//! maximum size for 1D textures bound to linear memory
int maxTexture1DLinear() const;
//! maximum 2D texture dimensions
Vec2i maxTexture2D() const;
//! maximum 2D mipmapped texture dimensions
Vec2i maxTexture2DMipmap() const;
//! maximum dimensions (width, height, pitch) for 2D textures bound to pitched memory
Vec3i maxTexture2DLinear() const;
//! maximum 2D texture dimensions if texture gather operations have to be performed
Vec2i maxTexture2DGather() const;
//! maximum 3D texture dimensions
Vec3i maxTexture3D() const;
//! maximum Cubemap texture dimensions
int maxTextureCubemap() const;
//! maximum 1D layered texture dimensions
Vec2i maxTexture1DLayered() const;
//! maximum 2D layered texture dimensions
Vec3i maxTexture2DLayered() const;
//! maximum Cubemap layered texture dimensions
Vec2i maxTextureCubemapLayered() const;
//! maximum 1D surface size
int maxSurface1D() const;
//! maximum 2D surface dimensions
Vec2i maxSurface2D() const;
//! maximum 3D surface dimensions
Vec3i maxSurface3D() const;
//! maximum 1D layered surface dimensions
Vec2i maxSurface1DLayered() const;
//! maximum 2D layered surface dimensions
Vec3i maxSurface2DLayered() const;
//! maximum Cubemap surface dimensions
int maxSurfaceCubemap() const;
//! maximum Cubemap layered surface dimensions
Vec2i maxSurfaceCubemapLayered() const;
//! alignment requirements for surfaces
size_t surfaceAlignment() const;
//! device can possibly execute multiple kernels concurrently
bool concurrentKernels() const;
//! device has ECC support enabled
bool ECCEnabled() const;
//! PCI bus ID of the device
int pciBusID() const;
//! PCI device ID of the device
int pciDeviceID() const;
//! PCI domain ID of the device
int pciDomainID() const;
//! true if device is a Tesla device using TCC driver, false otherwise
bool tccDriver() const;
//! number of asynchronous engines
int asyncEngineCount() const;
//! device shares a unified address space with the host
bool unifiedAddressing() const;
//! peak memory clock frequency in kilohertz
int memoryClockRate() const;
//! global memory bus width in bits
int memoryBusWidth() const;
//! size of L2 cache in bytes
int l2CacheSize() const;
//! maximum resident threads per multiprocessor
int maxThreadsPerMultiProcessor() const;
//! gets free and total device memory
void queryMemory(size_t& totalMemory, size_t& freeMemory) const;
size_t freeMemory() const;
size_t totalMemory() const;
// Checks whether device supports the given feature
//! checks whether device supports the given feature
bool supports(FeatureSet feature_set) const;
// Checks whether the GPU module can be run on the given device
//! checks whether the GPU module can be run on the given device
bool isCompatible() const;
bool canMapHostMemory() const;
size_t textureAlignment() const;
int deviceID() const { return device_id_; }
private:
void query();
int device_id_;
String name_;
int multi_processor_count_;
int majorVersion_;
int minorVersion_;
};
CV_EXPORTS void printCudaDeviceInfo(int device);
CV_EXPORTS void printShortCudaDeviceInfo(int device);
}} // namespace cv { namespace gpu {

@ -567,6 +567,62 @@ Stream::Stream(const Ptr<Impl>& impl)
{
}
//////////////////////////////// Initialization & Info ////////////////////////
inline
bool TargetArchs::has(int major, int minor)
{
return hasPtx(major, minor) || hasBin(major, minor);
}
inline
bool TargetArchs::hasEqualOrGreater(int major, int minor)
{
return hasEqualOrGreaterPtx(major, minor) || hasEqualOrGreaterBin(major, minor);
}
inline
DeviceInfo::DeviceInfo()
{
device_id_ = getDevice();
}
inline
DeviceInfo::DeviceInfo(int device_id)
{
CV_Assert( device_id >= 0 && device_id < getCudaEnabledDeviceCount() );
device_id_ = device_id;
}
inline
int DeviceInfo::deviceID() const
{
return device_id_;
}
inline
size_t DeviceInfo::freeMemory() const
{
size_t _totalMemory, _freeMemory;
queryMemory(_totalMemory, _freeMemory);
return _freeMemory;
}
inline
size_t DeviceInfo::totalMemory() const
{
size_t _totalMemory, _freeMemory;
queryMemory(_totalMemory, _freeMemory);
return _totalMemory;
}
inline
bool DeviceInfo::supports(FeatureSet feature_set) const
{
int version = major() * 10 + minor();
return version >= feature_set;
}
}} // namespace cv { namespace gpu {
//////////////////////////////// Mat ////////////////////////////////

@ -41,50 +41,17 @@
//M*/
#include "precomp.hpp"
#include <limits>
using namespace cv;
using namespace cv::gpu;
//////////////////////////////// Initialization & Info ////////////////////////
#ifndef HAVE_CUDA
int cv::gpu::getCudaEnabledDeviceCount() { return 0; }
void cv::gpu::setDevice(int) { throw_no_cuda(); }
int cv::gpu::getDevice() { throw_no_cuda(); return 0; }
void cv::gpu::resetDevice() { throw_no_cuda(); }
bool cv::gpu::deviceSupports(FeatureSet) { throw_no_cuda(); return false; }
bool cv::gpu::TargetArchs::builtWith(FeatureSet) { throw_no_cuda(); return false; }
bool cv::gpu::TargetArchs::has(int, int) { throw_no_cuda(); return false; }
bool cv::gpu::TargetArchs::hasPtx(int, int) { throw_no_cuda(); return false; }
bool cv::gpu::TargetArchs::hasBin(int, int) { throw_no_cuda(); return false; }
bool cv::gpu::TargetArchs::hasEqualOrLessPtx(int, int) { throw_no_cuda(); return false; }
bool cv::gpu::TargetArchs::hasEqualOrGreater(int, int) { throw_no_cuda(); return false; }
bool cv::gpu::TargetArchs::hasEqualOrGreaterPtx(int, int) { throw_no_cuda(); return false; }
bool cv::gpu::TargetArchs::hasEqualOrGreaterBin(int, int) { throw_no_cuda(); return false; }
size_t cv::gpu::DeviceInfo::sharedMemPerBlock() const { throw_no_cuda(); return 0; }
void cv::gpu::DeviceInfo::queryMemory(size_t&, size_t&) const { throw_no_cuda(); }
size_t cv::gpu::DeviceInfo::freeMemory() const { throw_no_cuda(); return 0; }
size_t cv::gpu::DeviceInfo::totalMemory() const { throw_no_cuda(); return 0; }
bool cv::gpu::DeviceInfo::supports(FeatureSet) const { throw_no_cuda(); return false; }
bool cv::gpu::DeviceInfo::isCompatible() const { throw_no_cuda(); return false; }
void cv::gpu::DeviceInfo::query() { throw_no_cuda(); }
void cv::gpu::printCudaDeviceInfo(int) { throw_no_cuda(); }
void cv::gpu::printShortCudaDeviceInfo(int) { throw_no_cuda(); }
#else // HAVE_CUDA
int cv::gpu::getCudaEnabledDeviceCount()
{
#ifndef HAVE_CUDA
return 0;
#else
int count;
cudaError_t error = cudaGetDeviceCount( &count );
cudaError_t error = cudaGetDeviceCount(&count);
if (error == cudaErrorInsufficientDriver)
return -1;
@ -94,25 +61,78 @@ int cv::gpu::getCudaEnabledDeviceCount()
cudaSafeCall( error );
return count;
#endif
}
void cv::gpu::setDevice(int device)
{
cudaSafeCall( cudaSetDevice( device ) );
#ifndef HAVE_CUDA
(void) device;
throw_no_cuda();
#else
cudaSafeCall( cudaSetDevice(device) );
#endif
}
int cv::gpu::getDevice()
{
#ifndef HAVE_CUDA
throw_no_cuda();
return 0;
#else
int device;
cudaSafeCall( cudaGetDevice( &device ) );
cudaSafeCall( cudaGetDevice(&device) );
return device;
#endif
}
void cv::gpu::resetDevice()
{
#ifndef HAVE_CUDA
throw_no_cuda();
#else
cudaSafeCall( cudaDeviceReset() );
#endif
}
bool cv::gpu::deviceSupports(FeatureSet feature_set)
{
#ifndef HAVE_CUDA
(void) feature_set;
throw_no_cuda();
return false;
#else
static int versions[] =
{
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
};
static const int cache_size = static_cast<int>(sizeof(versions) / sizeof(versions[0]));
const int devId = getDevice();
int version;
if (devId < cache_size && versions[devId] >= 0)
{
version = versions[devId];
}
else
{
DeviceInfo dev(devId);
version = dev.major() * 10 + dev.minor();
if (devId < cache_size)
versions[devId] = version;
}
return TargetArchs::builtWith(feature_set) && (version >= feature_set);
#endif
}
////////////////////////////////////////////////////////////////////////
// TargetArchs
#ifdef HAVE_CUDA
namespace
{
class CudaArch
@ -128,7 +148,7 @@ namespace
bool hasEqualOrGreaterBin(int major, int minor) const;
private:
static void fromStr(const String& set_as_str, std::vector<int>& arr);
static void fromStr(const char* set_as_str, std::vector<int>& arr);
std::vector<int> bin;
std::vector<int> ptx;
@ -174,12 +194,14 @@ namespace
return !bin.empty() && (bin.back() >= major * 10 + minor);
}
void CudaArch::fromStr(const String& set_as_str, std::vector<int>& arr)
void CudaArch::fromStr(const char* set_as_str, std::vector<int>& arr)
{
arr.clear();
const size_t len = strlen(set_as_str);
size_t pos = 0;
while (pos < set_as_str.size())
while (pos < len)
{
if (isspace(set_as_str[pos]))
{
@ -189,8 +211,8 @@ namespace
{
int cur_value;
int chars_read;
int args_read = sscanf(set_as_str.c_str() + pos, "%d%n", &cur_value, &chars_read);
CV_Assert(args_read == 1);
int args_read = sscanf(set_as_str + pos, "%d%n", &cur_value, &chars_read);
CV_Assert( args_read == 1 );
arr.push_back(cur_value);
pos += chars_read;
@ -201,70 +223,83 @@ namespace
}
}
#endif
bool cv::gpu::TargetArchs::builtWith(cv::gpu::FeatureSet feature_set)
{
#ifndef HAVE_CUDA
(void) feature_set;
throw_no_cuda();
return false;
#else
return cudaArch.builtWith(feature_set);
}
bool cv::gpu::TargetArchs::has(int major, int minor)
{
return hasPtx(major, minor) || hasBin(major, minor);
#endif
}
bool cv::gpu::TargetArchs::hasPtx(int major, int minor)
{
#ifndef HAVE_CUDA
(void) major;
(void) minor;
throw_no_cuda();
return false;
#else
return cudaArch.hasPtx(major, minor);
#endif
}
bool cv::gpu::TargetArchs::hasBin(int major, int minor)
{
#ifndef HAVE_CUDA
(void) major;
(void) minor;
throw_no_cuda();
return false;
#else
return cudaArch.hasBin(major, minor);
#endif
}
bool cv::gpu::TargetArchs::hasEqualOrLessPtx(int major, int minor)
{
#ifndef HAVE_CUDA
(void) major;
(void) minor;
throw_no_cuda();
return false;
#else
return cudaArch.hasEqualOrLessPtx(major, minor);
}
bool cv::gpu::TargetArchs::hasEqualOrGreater(int major, int minor)
{
return hasEqualOrGreaterPtx(major, minor) || hasEqualOrGreaterBin(major, minor);
#endif
}
bool cv::gpu::TargetArchs::hasEqualOrGreaterPtx(int major, int minor)
{
#ifndef HAVE_CUDA
(void) major;
(void) minor;
throw_no_cuda();
return false;
#else
return cudaArch.hasEqualOrGreaterPtx(major, minor);
#endif
}
bool cv::gpu::TargetArchs::hasEqualOrGreaterBin(int major, int minor)
{
#ifndef HAVE_CUDA
(void) major;
(void) minor;
throw_no_cuda();
return false;
#else
return cudaArch.hasEqualOrGreaterBin(major, minor);
#endif
}
bool cv::gpu::deviceSupports(FeatureSet feature_set)
{
static int versions[] =
{
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
};
static const int cache_size = static_cast<int>(sizeof(versions) / sizeof(versions[0]));
const int devId = getDevice();
int version;
if (devId < cache_size && versions[devId] >= 0)
version = versions[devId];
else
{
DeviceInfo dev(devId);
version = dev.majorVersion() * 10 + dev.minorVersion();
if (devId < cache_size)
versions[devId] = version;
}
////////////////////////////////////////////////////////////////////////
// DeviceInfo
return TargetArchs::builtWith(feature_set) && (version >= feature_set);
}
#ifdef HAVE_CUDA
namespace
{
@ -272,116 +307,613 @@ namespace
{
public:
DeviceProps();
~DeviceProps();
cudaDeviceProp* get(int devID);
const cudaDeviceProp* get(int devID) const;
private:
std::vector<cudaDeviceProp*> props_;
std::vector<cudaDeviceProp> props_;
};
DeviceProps::DeviceProps()
{
props_.resize(10, 0);
}
int count = getCudaEnabledDeviceCount();
DeviceProps::~DeviceProps()
{
for (size_t i = 0; i < props_.size(); ++i)
if (count > 0)
{
if (props_[i])
delete props_[i];
props_.resize(count);
for (int devID = 0; devID < count; ++devID)
{
cudaSafeCall( cudaGetDeviceProperties(&props_[devID], devID) );
}
}
props_.clear();
}
cudaDeviceProp* DeviceProps::get(int devID)
const cudaDeviceProp* DeviceProps::get(int devID) const
{
if (devID >= (int) props_.size())
props_.resize(devID + 5, 0);
CV_Assert( static_cast<size_t>(devID) < props_.size() );
if (!props_[devID])
{
props_[devID] = new cudaDeviceProp;
cudaSafeCall( cudaGetDeviceProperties(props_[devID], devID) );
}
return &props_[devID];
}
return props_[devID];
DeviceProps& deviceProps()
{
static DeviceProps props;
return props;
}
}
DeviceProps deviceProps;
#endif
const char* cv::gpu::DeviceInfo::name() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return "";
#else
return deviceProps().get(device_id_)->name;
#endif
}
size_t cv::gpu::DeviceInfo::totalGlobalMem() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return 0;
#else
return deviceProps().get(device_id_)->totalGlobalMem;
#endif
}
size_t cv::gpu::DeviceInfo::sharedMemPerBlock() const
{
return deviceProps.get(device_id_)->sharedMemPerBlock;
#ifndef HAVE_CUDA
throw_no_cuda();
return 0;
#else
return deviceProps().get(device_id_)->sharedMemPerBlock;
#endif
}
int cv::gpu::DeviceInfo::regsPerBlock() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return 0;
#else
return deviceProps().get(device_id_)->regsPerBlock;
#endif
}
bool cv::gpu::DeviceInfo::canMapHostMemory() const
int cv::gpu::DeviceInfo::warpSize() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return 0;
#else
return deviceProps().get(device_id_)->warpSize;
#endif
}
size_t cv::gpu::DeviceInfo::memPitch() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return 0;
#else
return deviceProps().get(device_id_)->memPitch;
#endif
}
int cv::gpu::DeviceInfo::maxThreadsPerBlock() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return 0;
#else
return deviceProps().get(device_id_)->maxThreadsPerBlock;
#endif
}
Vec3i cv::gpu::DeviceInfo::maxThreadsDim() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return Vec3i();
#else
return Vec3i(deviceProps().get(device_id_)->maxThreadsDim);
#endif
}
Vec3i cv::gpu::DeviceInfo::maxGridSize() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return Vec3i();
#else
return Vec3i(deviceProps().get(device_id_)->maxGridSize);
#endif
}
int cv::gpu::DeviceInfo::clockRate() const
{
return deviceProps.get(device_id_)->canMapHostMemory != 0;
#ifndef HAVE_CUDA
throw_no_cuda();
return 0;
#else
return deviceProps().get(device_id_)->clockRate;
#endif
}
size_t cv::gpu::DeviceInfo::totalConstMem() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return 0;
#else
return deviceProps().get(device_id_)->totalConstMem;
#endif
}
int cv::gpu::DeviceInfo::major() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return 0;
#else
return deviceProps().get(device_id_)->major;
#endif
}
int cv::gpu::DeviceInfo::minor() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return 0;
#else
return deviceProps().get(device_id_)->minor;
#endif
}
size_t cv::gpu::DeviceInfo::textureAlignment() const
{
return deviceProps.get(device_id_)->textureAlignment;
#ifndef HAVE_CUDA
throw_no_cuda();
return 0;
#else
return deviceProps().get(device_id_)->textureAlignment;
#endif
}
void cv::gpu::DeviceInfo::queryMemory(size_t& _totalMemory, size_t& _freeMemory) const
size_t cv::gpu::DeviceInfo::texturePitchAlignment() const
{
int prevDeviceID = getDevice();
if (prevDeviceID != device_id_)
setDevice(device_id_);
#ifndef HAVE_CUDA
throw_no_cuda();
return 0;
#else
return deviceProps().get(device_id_)->texturePitchAlignment;
#endif
}
cudaSafeCall( cudaMemGetInfo(&_freeMemory, &_totalMemory) );
int cv::gpu::DeviceInfo::multiProcessorCount() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return 0;
#else
return deviceProps().get(device_id_)->multiProcessorCount;
#endif
}
if (prevDeviceID != device_id_)
setDevice(prevDeviceID);
bool cv::gpu::DeviceInfo::kernelExecTimeoutEnabled() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return false;
#else
return deviceProps().get(device_id_)->kernelExecTimeoutEnabled != 0;
#endif
}
bool cv::gpu::DeviceInfo::integrated() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return false;
#else
return deviceProps().get(device_id_)->integrated != 0;
#endif
}
bool cv::gpu::DeviceInfo::canMapHostMemory() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return false;
#else
return deviceProps().get(device_id_)->canMapHostMemory != 0;
#endif
}
DeviceInfo::ComputeMode cv::gpu::DeviceInfo::computeMode() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return ComputeModeDefault;
#else
static const ComputeMode tbl[] =
{
ComputeModeDefault,
ComputeModeExclusive,
ComputeModeProhibited,
ComputeModeExclusiveProcess
};
return tbl[deviceProps().get(device_id_)->computeMode];
#endif
}
int cv::gpu::DeviceInfo::maxTexture1D() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return 0;
#else
return deviceProps().get(device_id_)->maxTexture1D;
#endif
}
size_t cv::gpu::DeviceInfo::freeMemory() const
int cv::gpu::DeviceInfo::maxTexture1DMipmap() const
{
size_t _totalMemory, _freeMemory;
queryMemory(_totalMemory, _freeMemory);
return _freeMemory;
#ifndef HAVE_CUDA
throw_no_cuda();
return 0;
#else
return deviceProps().get(device_id_)->maxTexture1DMipmap;
#endif
}
size_t cv::gpu::DeviceInfo::totalMemory() const
int cv::gpu::DeviceInfo::maxTexture1DLinear() const
{
size_t _totalMemory, _freeMemory;
queryMemory(_totalMemory, _freeMemory);
return _totalMemory;
#ifndef HAVE_CUDA
throw_no_cuda();
return 0;
#else
return deviceProps().get(device_id_)->maxTexture1DLinear;
#endif
}
Vec2i cv::gpu::DeviceInfo::maxTexture2D() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return Vec2i();
#else
return Vec2i(deviceProps().get(device_id_)->maxTexture2D);
#endif
}
bool cv::gpu::DeviceInfo::supports(FeatureSet feature_set) const
Vec2i cv::gpu::DeviceInfo::maxTexture2DMipmap() const
{
int version = majorVersion() * 10 + minorVersion();
return version >= feature_set;
#ifndef HAVE_CUDA
throw_no_cuda();
return Vec2i();
#else
return Vec2i(deviceProps().get(device_id_)->maxTexture2DMipmap);
#endif
}
Vec3i cv::gpu::DeviceInfo::maxTexture2DLinear() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return Vec3i();
#else
return Vec3i(deviceProps().get(device_id_)->maxTexture2DLinear);
#endif
}
Vec2i cv::gpu::DeviceInfo::maxTexture2DGather() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return Vec2i();
#else
return Vec2i(deviceProps().get(device_id_)->maxTexture2DGather);
#endif
}
Vec3i cv::gpu::DeviceInfo::maxTexture3D() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return Vec3i();
#else
return Vec3i(deviceProps().get(device_id_)->maxTexture3D);
#endif
}
int cv::gpu::DeviceInfo::maxTextureCubemap() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return 0;
#else
return deviceProps().get(device_id_)->maxTextureCubemap;
#endif
}
Vec2i cv::gpu::DeviceInfo::maxTexture1DLayered() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return Vec2i();
#else
return Vec2i(deviceProps().get(device_id_)->maxTexture1DLayered);
#endif
}
Vec3i cv::gpu::DeviceInfo::maxTexture2DLayered() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return Vec3i();
#else
return Vec3i(deviceProps().get(device_id_)->maxTexture2DLayered);
#endif
}
Vec2i cv::gpu::DeviceInfo::maxTextureCubemapLayered() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return Vec2i();
#else
return Vec2i(deviceProps().get(device_id_)->maxTextureCubemapLayered);
#endif
}
int cv::gpu::DeviceInfo::maxSurface1D() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return 0;
#else
return deviceProps().get(device_id_)->maxSurface1D;
#endif
}
Vec2i cv::gpu::DeviceInfo::maxSurface2D() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return Vec2i();
#else
return Vec2i(deviceProps().get(device_id_)->maxSurface2D);
#endif
}
Vec3i cv::gpu::DeviceInfo::maxSurface3D() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return Vec3i();
#else
return Vec3i(deviceProps().get(device_id_)->maxSurface3D);
#endif
}
Vec2i cv::gpu::DeviceInfo::maxSurface1DLayered() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return Vec2i();
#else
return Vec2i(deviceProps().get(device_id_)->maxSurface1DLayered);
#endif
}
Vec3i cv::gpu::DeviceInfo::maxSurface2DLayered() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return Vec3i();
#else
return Vec3i(deviceProps().get(device_id_)->maxSurface2DLayered);
#endif
}
int cv::gpu::DeviceInfo::maxSurfaceCubemap() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return 0;
#else
return deviceProps().get(device_id_)->maxSurfaceCubemap;
#endif
}
Vec2i cv::gpu::DeviceInfo::maxSurfaceCubemapLayered() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return Vec2i();
#else
return Vec2i(deviceProps().get(device_id_)->maxSurfaceCubemapLayered);
#endif
}
size_t cv::gpu::DeviceInfo::surfaceAlignment() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return 0;
#else
return deviceProps().get(device_id_)->surfaceAlignment;
#endif
}
bool cv::gpu::DeviceInfo::concurrentKernels() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return false;
#else
return deviceProps().get(device_id_)->concurrentKernels != 0;
#endif
}
bool cv::gpu::DeviceInfo::ECCEnabled() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return false;
#else
return deviceProps().get(device_id_)->ECCEnabled != 0;
#endif
}
int cv::gpu::DeviceInfo::pciBusID() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return 0;
#else
return deviceProps().get(device_id_)->pciBusID;
#endif
}
int cv::gpu::DeviceInfo::pciDeviceID() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return 0;
#else
return deviceProps().get(device_id_)->pciDeviceID;
#endif
}
int cv::gpu::DeviceInfo::pciDomainID() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return 0;
#else
return deviceProps().get(device_id_)->pciDomainID;
#endif
}
bool cv::gpu::DeviceInfo::tccDriver() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return false;
#else
return deviceProps().get(device_id_)->tccDriver != 0;
#endif
}
int cv::gpu::DeviceInfo::asyncEngineCount() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return 0;
#else
return deviceProps().get(device_id_)->asyncEngineCount;
#endif
}
bool cv::gpu::DeviceInfo::unifiedAddressing() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return false;
#else
return deviceProps().get(device_id_)->unifiedAddressing != 0;
#endif
}
int cv::gpu::DeviceInfo::memoryClockRate() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return 0;
#else
return deviceProps().get(device_id_)->memoryClockRate;
#endif
}
int cv::gpu::DeviceInfo::memoryBusWidth() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return 0;
#else
return deviceProps().get(device_id_)->memoryBusWidth;
#endif
}
int cv::gpu::DeviceInfo::l2CacheSize() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return 0;
#else
return deviceProps().get(device_id_)->l2CacheSize;
#endif
}
int cv::gpu::DeviceInfo::maxThreadsPerMultiProcessor() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return 0;
#else
return deviceProps().get(device_id_)->maxThreadsPerMultiProcessor;
#endif
}
void cv::gpu::DeviceInfo::queryMemory(size_t& _totalMemory, size_t& _freeMemory) const
{
#ifndef HAVE_CUDA
(void) _totalMemory;
(void) _freeMemory;
throw_no_cuda();
#else
int prevDeviceID = getDevice();
if (prevDeviceID != device_id_)
setDevice(device_id_);
cudaSafeCall( cudaMemGetInfo(&_freeMemory, &_totalMemory) );
if (prevDeviceID != device_id_)
setDevice(prevDeviceID);
#endif
}
bool cv::gpu::DeviceInfo::isCompatible() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return false;
#else
// Check PTX compatibility
if (TargetArchs::hasEqualOrLessPtx(majorVersion(), minorVersion()))
if (TargetArchs::hasEqualOrLessPtx(major(), minor()))
return true;
// Check BIN compatibility
for (int i = minorVersion(); i >= 0; --i)
if (TargetArchs::hasBin(majorVersion(), i))
for (int i = minor(); i >= 0; --i)
if (TargetArchs::hasBin(major(), i))
return true;
return false;
#endif
}
void cv::gpu::DeviceInfo::query()
{
const cudaDeviceProp* prop = deviceProps.get(device_id_);
////////////////////////////////////////////////////////////////////////
// print info
name_ = prop->name;
multi_processor_count_ = prop->multiProcessorCount;
majorVersion_ = prop->major;
minorVersion_ = prop->minor;
}
#ifdef HAVE_CUDA
namespace
{
@ -407,8 +939,14 @@ namespace
}
}
#endif
void cv::gpu::printCudaDeviceInfo(int device)
{
#ifndef HAVE_CUDA
(void) device;
throw_no_cuda();
#else
int count = getCudaEnabledDeviceCount();
bool valid = (device >= 0) && (device < count);
@ -484,11 +1022,17 @@ void cv::gpu::printCudaDeviceInfo(int device)
printf(", CUDA Driver Version = %d.%d", driverVersion / 1000, driverVersion % 100);
printf(", CUDA Runtime Version = %d.%d", runtimeVersion/1000, runtimeVersion%100);
printf(", NumDevs = %d\n\n", count);
fflush(stdout);
#endif
}
void cv::gpu::printShortCudaDeviceInfo(int device)
{
#ifndef HAVE_CUDA
(void) device;
throw_no_cuda();
#else
int count = getCudaEnabledDeviceCount();
bool valid = (device >= 0) && (device < count);
@ -514,11 +1058,11 @@ void cv::gpu::printShortCudaDeviceInfo(int device)
printf(", Driver/Runtime ver.%d.%d/%d.%d\n", driverVersion/1000, driverVersion%100, runtimeVersion/1000, runtimeVersion%100);
}
fflush(stdout);
#endif
}
#endif // HAVE_CUDA
////////////////////////////////////////////////////////////////////////
// Error handling

@ -878,7 +878,7 @@ namespace
virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null())
{
DeviceInfo devInfo;
int cc = devInfo.majorVersion() * 10 + devInfo.minorVersion();
int cc = devInfo.major() * 10 + devInfo.minor();
func(src, dst, kernel.ptr<float>(), ksize, anchor, brd_type, cc, StreamAccessor::getStream(s));
}
@ -977,7 +977,7 @@ namespace
virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null())
{
DeviceInfo devInfo;
int cc = devInfo.majorVersion() * 10 + devInfo.minorVersion();
int cc = devInfo.major() * 10 + devInfo.minor();
if (ksize > 16 && cc < 20)
CV_Error(cv::Error::StsNotImplemented, "column linear filter doesn't implemented for kernel size > 16 for device with compute capabilities less than 2.0");

@ -80,7 +80,7 @@ GPU_TEST_P(BroxOpticalFlow, Regression)
brox(loadMat(frame0), loadMat(frame1), u, v);
std::string fname(cvtest::TS::ptr()->get_data_path());
if (devInfo.majorVersion() >= 2)
if (devInfo.major() >= 2)
fname += "opticalflow/brox_optical_flow_cc20.bin";
else
fname += "opticalflow/brox_optical_flow.bin";

@ -91,7 +91,7 @@ bool cv::gpu::StereoBM_GPU::checkIfGpuCallReasonable()
DeviceInfo device_info;
if (device_info.majorVersion() > 1 || device_info.multiProcessorCount() > 16)
if (device_info.major() > 1 || device_info.multiProcessorCount() > 16)
return true;
return false;

@ -287,8 +287,8 @@ namespace perf
cv::gpu::DeviceInfo info(i);
printf("[----------]\n"), fflush(stdout);
printf("[ DEVICE ] \t# %d %s.\n", i, info.name().c_str()), fflush(stdout);
printf("[ ] \tCompute capability: %d.%d\n", (int)info.majorVersion(), (int)info.minorVersion()), fflush(stdout);
printf("[ DEVICE ] \t# %d %s.\n", i, info.name()), fflush(stdout);
printf("[ ] \tCompute capability: %d.%d\n", (int)info.major(), (int)info.minor()), fflush(stdout);
printf("[ ] \tMulti Processor Count: %d\n", info.multiProcessorCount()), fflush(stdout);
printf("[ ] \tTotal memory: %d Mb\n", static_cast<int>(static_cast<int>(info.totalMemory() / 1024.0) / 1024.0)), fflush(stdout);
printf("[ ] \tFree memory: %d Mb\n", static_cast<int>(static_cast<int>(info.freeMemory() / 1024.0) / 1024.0)), fflush(stdout);

@ -682,13 +682,13 @@ void TestBase::Init(int argc, const char* const argv[])
cv::gpu::DeviceInfo info(param_cuda_device);
if (!info.isCompatible())
{
printf("[----------]\n[ FAILURE ] \tDevice %s is NOT compatible with current GPU module build.\n[----------]\n", info.name().c_str()), fflush(stdout);
printf("[----------]\n[ FAILURE ] \tDevice %s is NOT compatible with current GPU module build.\n[----------]\n", info.name()), fflush(stdout);
exit(-1);
}
cv::gpu::setDevice(param_cuda_device);
printf("[----------]\n[ GPU INFO ] \tRun test suite on %s GPU.\n[----------]\n", info.name().c_str()), fflush(stdout);
printf("[----------]\n[ GPU INFO ] \tRun test suite on %s GPU.\n[----------]\n", info.name()), fflush(stdout);
}
#endif

@ -82,8 +82,8 @@ int main()
if (!dev_info.isCompatible())
{
std::cout << "GPU module isn't built for GPU #" << i << " ("
<< dev_info.name() << ", CC " << dev_info.majorVersion()
<< dev_info.minorVersion() << "\n";
<< dev_info.name() << ", CC " << dev_info.major()
<< dev_info.minor() << "\n";
return -1;
}
}

@ -112,8 +112,8 @@ int main(int argc, char** argv)
if (!dev_info.isCompatible())
{
std::cout << "GPU module isn't built for GPU #" << i << " ("
<< dev_info.name() << ", CC " << dev_info.majorVersion()
<< dev_info.minorVersion() << "\n";
<< dev_info.name() << ", CC " << dev_info.major()
<< dev_info.minor() << "\n";
return -1;
}
}

@ -62,8 +62,8 @@ int main()
if (!dev_info.isCompatible())
{
std::cout << "GPU module isn't built for GPU #" << i << " ("
<< dev_info.name() << ", CC " << dev_info.majorVersion()
<< dev_info.minorVersion() << "\n";
<< dev_info.name() << ", CC " << dev_info.major()
<< dev_info.minor() << "\n";
return -1;
}
}

@ -191,7 +191,7 @@ int main(int argc, const char* argv[])
DeviceInfo dev_info(device);
if (!dev_info.isCompatible())
{
cerr << "GPU module isn't built for GPU #" << device << " " << dev_info.name() << ", CC " << dev_info.majorVersion() << '.' << dev_info.minorVersion() << endl;
cerr << "GPU module isn't built for GPU #" << device << " " << dev_info.name() << ", CC " << dev_info.major() << '.' << dev_info.minor() << endl;
return -1;
}
setDevice(device);

@ -81,8 +81,8 @@ int main(int argc, char** argv)
if (!dev_info.isCompatible())
{
std::cout << "GPU module isn't built for GPU #" << i << " ("
<< dev_info.name() << ", CC " << dev_info.majorVersion()
<< dev_info.minorVersion() << "\n";
<< dev_info.name() << ", CC " << dev_info.major()
<< dev_info.minor() << "\n";
return -1;
}
}

Loading…
Cancel
Save