|
|
|
@ -49,18 +49,18 @@ using namespace cv::gpu; |
|
|
|
|
namespace
|
|
|
|
|
{ |
|
|
|
|
template <typename Comparer> |
|
|
|
|
bool compare(const std::string& str, int x, Comparer cmp) |
|
|
|
|
bool compareToSet(const std::string& set_as_str, int value, Comparer cmp) |
|
|
|
|
{ |
|
|
|
|
if (str.find_first_not_of(" ") == string::npos) |
|
|
|
|
if (set_as_str.find_first_not_of(" ") == string::npos) |
|
|
|
|
return false; |
|
|
|
|
|
|
|
|
|
std::stringstream stream(str); |
|
|
|
|
int val; |
|
|
|
|
std::stringstream stream(set_as_str); |
|
|
|
|
int cur_value; |
|
|
|
|
|
|
|
|
|
while (!stream.eof()) |
|
|
|
|
{ |
|
|
|
|
stream >> val; |
|
|
|
|
if (cmp(val, x)) |
|
|
|
|
stream >> cur_value; |
|
|
|
|
if (cmp(cur_value, value)) |
|
|
|
|
return true; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
@ -87,19 +87,19 @@ CV_EXPORTS bool cv::gpu::TargetArchs::has(int major, int minor) |
|
|
|
|
|
|
|
|
|
CV_EXPORTS bool cv::gpu::TargetArchs::hasPtx(int major, int minor) |
|
|
|
|
{ |
|
|
|
|
return ::compare(CUDA_ARCH_PTX, major * 10 + minor, std::equal_to<int>()); |
|
|
|
|
return ::compareToSet(CUDA_ARCH_PTX, major * 10 + minor, std::equal_to<int>()); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CV_EXPORTS bool cv::gpu::TargetArchs::hasBin(int major, int minor) |
|
|
|
|
{ |
|
|
|
|
return ::compare(CUDA_ARCH_BIN, major * 10 + minor, std::equal_to<int>()); |
|
|
|
|
return ::compareToSet(CUDA_ARCH_BIN, major * 10 + minor, std::equal_to<int>()); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CV_EXPORTS bool cv::gpu::TargetArchs::hasEqualOrLessPtx(int major, int minor) |
|
|
|
|
{ |
|
|
|
|
return ::compare(CUDA_ARCH_PTX, major * 10 + minor,
|
|
|
|
|
return ::compareToSet(CUDA_ARCH_PTX, major * 10 + minor,
|
|
|
|
|
std::less_equal<int>()); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
@ -113,14 +113,14 @@ CV_EXPORTS bool cv::gpu::TargetArchs::hasEqualOrGreater(int major, int minor) |
|
|
|
|
|
|
|
|
|
CV_EXPORTS bool cv::gpu::TargetArchs::hasEqualOrGreaterPtx(int major, int minor) |
|
|
|
|
{ |
|
|
|
|
return ::compare(CUDA_ARCH_PTX, major * 10 + minor,
|
|
|
|
|
return ::compareToSet(CUDA_ARCH_PTX, major * 10 + minor,
|
|
|
|
|
std::greater_equal<int>()); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CV_EXPORTS bool cv::gpu::TargetArchs::hasEqualOrGreaterBin(int major, int minor) |
|
|
|
|
{ |
|
|
|
|
return ::compare(CUDA_ARCH_BIN, major * 10 + minor,
|
|
|
|
|
return ::compareToSet(CUDA_ARCH_BIN, major * 10 + minor,
|
|
|
|
|
std::greater_equal<int>()); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
@ -128,16 +128,20 @@ CV_EXPORTS bool cv::gpu::TargetArchs::hasEqualOrGreaterBin(int major, int minor) |
|
|
|
|
#if !defined (HAVE_CUDA) |
|
|
|
|
|
|
|
|
|
CV_EXPORTS int cv::gpu::getCudaEnabledDeviceCount() { return 0; } |
|
|
|
|
CV_EXPORTS string cv::gpu::getDeviceName(int /*device*/) { throw_nogpu(); return 0; }
|
|
|
|
|
CV_EXPORTS void cv::gpu::setDevice(int /*device*/) { throw_nogpu(); }
|
|
|
|
|
CV_EXPORTS void cv::gpu::setDevice(int) { throw_nogpu(); }
|
|
|
|
|
CV_EXPORTS int cv::gpu::getDevice() { throw_nogpu(); return 0; }
|
|
|
|
|
CV_EXPORTS void cv::gpu::getComputeCapability(int /*device*/, int& /*major*/, int& /*minor*/) { throw_nogpu(); }
|
|
|
|
|
CV_EXPORTS int cv::gpu::getNumberOfSMs(int /*device*/) { throw_nogpu(); return 0; }
|
|
|
|
|
CV_EXPORTS void cv::gpu::getGpuMemInfo(size_t& /*free*/, size_t& /*total*/) { throw_nogpu(); }
|
|
|
|
|
CV_EXPORTS bool cv::gpu::hasNativeDoubleSupport(int /*device*/) { throw_nogpu(); return false; } |
|
|
|
|
CV_EXPORTS bool cv::gpu::hasAtomicsSupport(int /*device*/) { throw_nogpu(); return false; } |
|
|
|
|
CV_EXPORTS bool cv::gpu::isCompatibleWith(int device) { throw_nogpu(); return false; } |
|
|
|
|
|
|
|
|
|
cv::gpu::DeviceInfo::DeviceInfo() { throw_nogpu(); } |
|
|
|
|
cv::gpu::DeviceInfo::DeviceInfo(int) { throw_nogpu(); } |
|
|
|
|
string cv::gpu::DeviceInfo::name() const { throw_nogpu(); return ""; } |
|
|
|
|
int cv::gpu::DeviceInfo::major() const { throw_nogpu(); return 0; } |
|
|
|
|
int cv::gpu::DeviceInfo::minor() const { throw_nogpu(); return 0; } |
|
|
|
|
int cv::gpu::DeviceInfo::multiProcessorCount() const { throw_nogpu(); return 0; } |
|
|
|
|
size_t cv::gpu::DeviceInfo::freeMemory() const { throw_nogpu(); return 0; } |
|
|
|
|
size_t cv::gpu::DeviceInfo::totalMemory() const { throw_nogpu(); return 0; } |
|
|
|
|
bool cv::gpu::DeviceInfo::has(cv::gpu::GpuFeature) const { throw_nogpu(); return false; } |
|
|
|
|
bool cv::gpu::DeviceInfo::isCompatible() const { throw_nogpu(); return false; } |
|
|
|
|
void cv::gpu::DeviceInfo::query() const { throw_nogpu(); } |
|
|
|
|
void cv::gpu::DeviceInfo::queryMemory(size_t, size_t) const { throw_nogpu(); } |
|
|
|
|
|
|
|
|
|
#else /* !defined (HAVE_CUDA) */ |
|
|
|
|
|
|
|
|
@ -149,14 +153,6 @@ CV_EXPORTS int cv::gpu::getCudaEnabledDeviceCount() |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CV_EXPORTS string cv::gpu::getDeviceName(int device) |
|
|
|
|
{ |
|
|
|
|
cudaDeviceProp prop; |
|
|
|
|
cudaSafeCall( cudaGetDeviceProperties( &prop, device) ); |
|
|
|
|
return prop.name; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CV_EXPORTS void cv::gpu::setDevice(int device) |
|
|
|
|
{ |
|
|
|
|
cudaSafeCall( cudaSetDevice( device ) ); |
|
|
|
@ -171,65 +167,68 @@ CV_EXPORTS int cv::gpu::getDevice() |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CV_EXPORTS void cv::gpu::getComputeCapability(int device, int& major, int& minor) |
|
|
|
|
size_t cv::gpu::DeviceInfo::freeMemory() const |
|
|
|
|
{ |
|
|
|
|
cudaDeviceProp prop;
|
|
|
|
|
cudaSafeCall( cudaGetDeviceProperties( &prop, device) ); |
|
|
|
|
|
|
|
|
|
major = prop.major; |
|
|
|
|
minor = prop.minor; |
|
|
|
|
size_t free_memory, total_memory; |
|
|
|
|
queryMemory(free_memory, total_memory); |
|
|
|
|
return free_memory; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CV_EXPORTS int cv::gpu::getNumberOfSMs(int device) |
|
|
|
|
size_t cv::gpu::DeviceInfo::totalMemory() const |
|
|
|
|
{ |
|
|
|
|
cudaDeviceProp prop; |
|
|
|
|
cudaSafeCall( cudaGetDeviceProperties( &prop, device ) ); |
|
|
|
|
return prop.multiProcessorCount; |
|
|
|
|
size_t free_memory, total_memory; |
|
|
|
|
queryMemory(free_memory, total_memory); |
|
|
|
|
return total_memory; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CV_EXPORTS void cv::gpu::getGpuMemInfo(size_t& free, size_t& total) |
|
|
|
|
bool cv::gpu::DeviceInfo::has(cv::gpu::GpuFeature feature) const |
|
|
|
|
{ |
|
|
|
|
cudaSafeCall( cudaMemGetInfo( &free, &total ) ); |
|
|
|
|
if (feature == NATIVE_DOUBLE) |
|
|
|
|
return major() > 1 || (major() == 1 && minor() >= 3); |
|
|
|
|
if (feature == ATOMICS) |
|
|
|
|
return major() > 1 || (major() == 1 && minor() >= 1); |
|
|
|
|
return false; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CV_EXPORTS bool cv::gpu::hasNativeDoubleSupport(int device) |
|
|
|
|
bool cv::gpu::DeviceInfo::isCompatible() const |
|
|
|
|
{ |
|
|
|
|
int major, minor; |
|
|
|
|
getComputeCapability(device, major, minor); |
|
|
|
|
return major > 1 || (major == 1 && minor >= 3); |
|
|
|
|
} |
|
|
|
|
// Check PTX compatibility
|
|
|
|
|
if (TargetArchs::hasEqualOrLessPtx(major(), minor())) |
|
|
|
|
return true; |
|
|
|
|
|
|
|
|
|
// Check BIN compatibility
|
|
|
|
|
for (int i = minor(); i >= 0; --i) |
|
|
|
|
if (TargetArchs::hasBin(major(), i)) |
|
|
|
|
return true; |
|
|
|
|
|
|
|
|
|
CV_EXPORTS bool cv::gpu::hasAtomicsSupport(int device)
|
|
|
|
|
{ |
|
|
|
|
int major, minor; |
|
|
|
|
getComputeCapability(device, major, minor); |
|
|
|
|
return major > 1 || (major == 1 && minor >= 1); |
|
|
|
|
return false; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CV_EXPORTS bool cv::gpu::isCompatibleWith(int device) |
|
|
|
|
void cv::gpu::DeviceInfo::query() |
|
|
|
|
{ |
|
|
|
|
// According to the CUDA C Programming Guide Version 3.2: "PTX code
|
|
|
|
|
// produced for some specific compute capability can always be compiled to
|
|
|
|
|
// binary code of greater or equal compute capability".
|
|
|
|
|
cudaDeviceProp prop; |
|
|
|
|
cudaSafeCall(cudaGetDeviceProperties(&prop, device_id_)); |
|
|
|
|
name_ = prop.name; |
|
|
|
|
multi_processor_count_ = prop.multiProcessorCount; |
|
|
|
|
major_ = prop.major; |
|
|
|
|
minor_ = prop.minor; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
int major, minor; |
|
|
|
|
getComputeCapability(device, major, minor); |
|
|
|
|
|
|
|
|
|
// Check PTX compatibility
|
|
|
|
|
if (TargetArchs::hasEqualOrLessPtx(major, minor)) |
|
|
|
|
return true; |
|
|
|
|
void cv::gpu::DeviceInfo::queryMemory(size_t& free_memory, size_t& total_memory) const |
|
|
|
|
{ |
|
|
|
|
int prev_device_id = getDevice(); |
|
|
|
|
if (prev_device_id != device_id_) |
|
|
|
|
setDevice(device_id_); |
|
|
|
|
|
|
|
|
|
// Check CUBIN compatibility
|
|
|
|
|
for (int i = minor; i >= 0; --i) |
|
|
|
|
if (TargetArchs::hasBin(major, i)) |
|
|
|
|
return true; |
|
|
|
|
cudaSafeCall(cudaMemGetInfo(&free_memory, &total_memory)); |
|
|
|
|
|
|
|
|
|
return false; |
|
|
|
|
if (prev_device_id != device_id_) |
|
|
|
|
setDevice(prev_device_id); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#endif |
|
|
|
|