From 4175916b2a5b25789debdb7f79bc14abf039f5de Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Fri, 27 Dec 2013 17:19:38 +0400 Subject: [PATCH 1/2] dynamicuda became private module. --- modules/dynamicuda/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/dynamicuda/CMakeLists.txt b/modules/dynamicuda/CMakeLists.txt index b523bf0fd1..75ace872a3 100644 --- a/modules/dynamicuda/CMakeLists.txt +++ b/modules/dynamicuda/CMakeLists.txt @@ -9,7 +9,7 @@ ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wshadow) ocv_module_include_directories("${OpenCV_SOURCE_DIR}/modules/gpu/include") set(OPENCV_MODULE_TYPE SHARED) if (BUILD_FAT_JAVA_LIB) - ocv_define_module(dynamicuda opencv_java PRIVATE_REQUIRED ${CUDA_LIBRARIES} ${CUDA_npp_LIBRARY}) + ocv_define_module(dynamicuda INTERNAL opencv_java PRIVATE_REQUIRED ${CUDA_LIBRARIES} ${CUDA_npp_LIBRARY}) else() - ocv_define_module(dynamicuda opencv_core PRIVATE_REQUIRED ${CUDA_LIBRARIES} ${CUDA_npp_LIBRARY}) + ocv_define_module(dynamicuda INTERNAL opencv_core PRIVATE_REQUIRED ${CUDA_LIBRARIES} ${CUDA_npp_LIBRARY}) endif() From df63060e4d7c132f26b9601867240eb779534f0c Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Fri, 27 Dec 2013 16:49:26 +0400 Subject: [PATCH 2/2] Bugfix for DeviceInfoFuncTable in dynamicuda amd core modules. --- modules/core/src/gpumat.cpp | 21 ++- .../include/opencv2/dynamicuda/dynamicuda.hpp | 126 ++++++++---------- 2 files changed, 62 insertions(+), 85 deletions(-) diff --git a/modules/core/src/gpumat.cpp b/modules/core/src/gpumat.cpp index 5dae4697d3..ec26801ddc 100644 --- a/modules/core/src/gpumat.cpp +++ b/modules/core/src/gpumat.cpp @@ -279,20 +279,19 @@ bool cv::gpu::TargetArchs::hasEqualOrGreater(int major, int minor) { return devi bool cv::gpu::TargetArchs::hasEqualOrGreaterPtx(int major, int minor) { return deviceInfoFuncTable()->hasEqualOrGreaterPtx(major, minor); } bool cv::gpu::TargetArchs::hasEqualOrGreaterBin(int major, int minor) { return deviceInfoFuncTable()->hasEqualOrGreaterBin(major, minor); } -size_t cv::gpu::DeviceInfo::sharedMemPerBlock() const { return deviceInfoFuncTable()->sharedMemPerBlock(); } -void cv::gpu::DeviceInfo::queryMemory(size_t& total_memory, size_t& free_memory) const { deviceInfoFuncTable()->queryMemory(total_memory, free_memory); } -size_t cv::gpu::DeviceInfo::freeMemory() const { return deviceInfoFuncTable()->freeMemory(); } -size_t cv::gpu::DeviceInfo::totalMemory() const { return deviceInfoFuncTable()->totalMemory(); } -bool cv::gpu::DeviceInfo::supports(FeatureSet feature_set) const { return deviceInfoFuncTable()->supports(feature_set); } -bool cv::gpu::DeviceInfo::isCompatible() const { return deviceInfoFuncTable()->isCompatible(); } +size_t cv::gpu::DeviceInfo::sharedMemPerBlock() const { return deviceInfoFuncTable()->sharedMemPerBlock(device_id_); } +void cv::gpu::DeviceInfo::queryMemory(size_t& total_memory, size_t& free_memory) const { deviceInfoFuncTable()->queryMemory(device_id_, total_memory, free_memory); } +size_t cv::gpu::DeviceInfo::freeMemory() const { return deviceInfoFuncTable()->freeMemory(device_id_); } +size_t cv::gpu::DeviceInfo::totalMemory() const { return deviceInfoFuncTable()->totalMemory(device_id_); } +bool cv::gpu::DeviceInfo::supports(FeatureSet feature_set) const { return deviceInfoFuncTable()->supports(device_id_, feature_set); } +bool cv::gpu::DeviceInfo::isCompatible() const { return deviceInfoFuncTable()->isCompatible(device_id_); } void cv::gpu::DeviceInfo::query() { - deviceInfoFuncTable()->query(); - name_ = deviceInfoFuncTable()->name(); - multi_processor_count_ = deviceInfoFuncTable()->multiProcessorCount(); - majorVersion_ = deviceInfoFuncTable()->majorVersion(); - minorVersion_ = deviceInfoFuncTable()->minorVersion(); + name_ = deviceInfoFuncTable()->name(device_id_); + multi_processor_count_ = deviceInfoFuncTable()->multiProcessorCount(device_id_); + majorVersion_ = deviceInfoFuncTable()->majorVersion(device_id_); + minorVersion_ = deviceInfoFuncTable()->minorVersion(device_id_); } void cv::gpu::printCudaDeviceInfo(int device) { deviceInfoFuncTable()->printCudaDeviceInfo(device); } diff --git a/modules/dynamicuda/include/opencv2/dynamicuda/dynamicuda.hpp b/modules/dynamicuda/include/opencv2/dynamicuda/dynamicuda.hpp index 8973c53049..d4d0220e00 100644 --- a/modules/dynamicuda/include/opencv2/dynamicuda/dynamicuda.hpp +++ b/modules/dynamicuda/include/opencv2/dynamicuda/dynamicuda.hpp @@ -9,18 +9,17 @@ class DeviceInfoFuncTable { public: // cv::DeviceInfo - virtual size_t sharedMemPerBlock() const = 0; - virtual void queryMemory(size_t&, size_t&) const = 0; - virtual size_t freeMemory() const = 0; - virtual size_t totalMemory() const = 0; - virtual bool supports(FeatureSet) const = 0; - virtual bool isCompatible() const = 0; - virtual void query() = 0; - virtual int deviceID() const = 0; - virtual std::string name() const = 0; - virtual int majorVersion() const = 0; - virtual int minorVersion() const = 0; - virtual int multiProcessorCount() const = 0; + virtual size_t sharedMemPerBlock(int id) const = 0; + virtual void queryMemory(int id, size_t&, size_t&) const = 0; + virtual size_t freeMemory(int id) const = 0; + virtual size_t totalMemory(int id) const = 0; + virtual bool supports(int id, FeatureSet) const = 0; + virtual bool isCompatible(int id) const = 0; + virtual std::string name(int id) const = 0; + virtual int majorVersion(int id) const = 0; + virtual int minorVersion(int id) const = 0; + virtual int multiProcessorCount(int id) const = 0; + virtual int getCudaEnabledDeviceCount() const = 0; virtual void setDevice(int) const = 0; virtual int getDevice() const = 0; @@ -46,8 +45,6 @@ public: class GpuFuncTable { public: - virtual ~GpuFuncTable() {} - // GpuMat routines virtual void copy(const Mat& src, GpuMat& dst) const = 0; virtual void copy(const GpuMat& src, Mat& dst) const = 0; @@ -64,23 +61,23 @@ public: virtual void mallocPitch(void** devPtr, size_t* step, size_t width, size_t height) const = 0; virtual void free(void* devPtr) const = 0; + + virtual ~GpuFuncTable() {} }; class EmptyDeviceInfoFuncTable: public DeviceInfoFuncTable { public: - size_t sharedMemPerBlock() const { throw_nogpu; return 0; } - void queryMemory(size_t&, size_t&) const { throw_nogpu; } - size_t freeMemory() const { throw_nogpu; return 0; } - size_t totalMemory() const { throw_nogpu; return 0; } - bool supports(FeatureSet) const { throw_nogpu; return false; } - bool isCompatible() const { throw_nogpu; return false; } - void query() { throw_nogpu; } - int deviceID() const { throw_nogpu; return -1; }; - std::string name() const { throw_nogpu; return std::string(); } - int majorVersion() const { throw_nogpu; return -1; } - int minorVersion() const { throw_nogpu; return -1; } - int multiProcessorCount() const { throw_nogpu; return -1; } + size_t sharedMemPerBlock(int) const { throw_nogpu; return 0; } + void queryMemory(int, size_t&, size_t&) const { throw_nogpu; } + size_t freeMemory(int) const { throw_nogpu; return 0; } + size_t totalMemory(int) const { throw_nogpu; return 0; } + bool supports(int, FeatureSet) const { throw_nogpu; return false; } + bool isCompatible(int) const { throw_nogpu; return false; } + std::string name(int) const { throw_nogpu; return std::string(); } + int majorVersion(int) const { throw_nogpu; return -1; } + int minorVersion(int) const { throw_nogpu; return -1; } + int multiProcessorCount(int) const { throw_nogpu; return -1; } int getCudaEnabledDeviceCount() const { return 0; } @@ -538,94 +535,84 @@ private: }; DeviceProps deviceProps; +const CudaArch cudaArch; class CudaDeviceInfoFuncTable : public DeviceInfoFuncTable { public: - size_t sharedMemPerBlock() const + size_t sharedMemPerBlock(int id) const { - return deviceProps.get(device_id_)->sharedMemPerBlock; + return deviceProps.get(id)->sharedMemPerBlock; } - void queryMemory(size_t& _totalMemory, size_t& _freeMemory) const + void queryMemory(int id, size_t& _totalMemory, size_t& _freeMemory) const { int prevDeviceID = getDevice(); - if (prevDeviceID != device_id_) - setDevice(device_id_); + if (prevDeviceID != id) + setDevice(id); cudaSafeCall( cudaMemGetInfo(&_freeMemory, &_totalMemory) ); - if (prevDeviceID != device_id_) + if (prevDeviceID != id) setDevice(prevDeviceID); } - size_t freeMemory() const + size_t freeMemory(int id) const { size_t _totalMemory, _freeMemory; - queryMemory(_totalMemory, _freeMemory); + queryMemory(id, _totalMemory, _freeMemory); return _freeMemory; } - size_t totalMemory() const + size_t totalMemory(int id) const { size_t _totalMemory, _freeMemory; - queryMemory(_totalMemory, _freeMemory); + queryMemory(id, _totalMemory, _freeMemory); return _totalMemory; } - bool supports(FeatureSet feature_set) const + bool supports(int id, FeatureSet feature_set) const { - int version = majorVersion_ * 10 + minorVersion_; + int version = majorVersion(id) * 10 + minorVersion(id); return version >= feature_set; } - bool isCompatible() const + bool isCompatible(int id) const { // Check PTX compatibility - if (hasEqualOrLessPtx(majorVersion_, minorVersion_)) + if (hasEqualOrLessPtx(majorVersion(id), minorVersion(id))) return true; // Check BIN compatibility - for (int i = minorVersion_; i >= 0; --i) - if (hasBin(majorVersion_, i)) + for (int i = minorVersion(id); i >= 0; --i) + if (hasBin(majorVersion(id), i)) return true; return false; } - void query() + std::string name(int id) const { - const cudaDeviceProp* prop = deviceProps.get(device_id_); - - name_ = prop->name; - multi_processor_count_ = prop->multiProcessorCount; - majorVersion_ = prop->major; - minorVersion_ = prop->minor; + const cudaDeviceProp* prop = deviceProps.get(id); + return prop->name; } - int deviceID() const + int majorVersion(int id) const { - return device_id_; + const cudaDeviceProp* prop = deviceProps.get(id); + return prop->major; } - std::string name() const + int minorVersion(int id) const { - return name_; + const cudaDeviceProp* prop = deviceProps.get(id); + return prop->minor; } - int majorVersion() const + int multiProcessorCount(int id) const { - return majorVersion_; - } - - int minorVersion() const - { - return minorVersion_; - } - - int multiProcessorCount() const - { - return multi_processor_count_; + const cudaDeviceProp* prop = deviceProps.get(id); + return prop->multiProcessorCount; } int getCudaEnabledDeviceCount() const @@ -836,15 +823,6 @@ public: } private: - int device_id_; - - std::string name_; - int multi_processor_count_; - int majorVersion_; - int minorVersion_; - - const CudaArch cudaArch; - int convertSMVer2Cores(int major, int minor) const { // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM