|
|
|
@ -4,6 +4,7 @@ |
|
|
|
|
class DeviceInfoFuncTable |
|
|
|
|
{ |
|
|
|
|
public: |
|
|
|
|
// cv::DeviceInfo
|
|
|
|
|
virtual size_t sharedMemPerBlock() const = 0; |
|
|
|
|
virtual void queryMemory(size_t&, size_t&) const = 0; |
|
|
|
|
virtual size_t freeMemory() const = 0; |
|
|
|
@ -16,25 +17,13 @@ |
|
|
|
|
virtual int majorVersion() const = 0; |
|
|
|
|
virtual int minorVersion() const = 0; |
|
|
|
|
virtual int multiProcessorCount() const = 0; |
|
|
|
|
virtual ~DeviceInfoFuncTable() {}; |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
class GpuFuncTable |
|
|
|
|
{ |
|
|
|
|
public: |
|
|
|
|
virtual ~GpuFuncTable() {} |
|
|
|
|
|
|
|
|
|
// DeviceInfo routines
|
|
|
|
|
virtual int getCudaEnabledDeviceCount() const = 0; |
|
|
|
|
|
|
|
|
|
virtual void setDevice(int) const = 0; |
|
|
|
|
virtual int getDevice() const = 0; |
|
|
|
|
|
|
|
|
|
virtual void resetDevice() const = 0; |
|
|
|
|
|
|
|
|
|
virtual bool deviceSupports(FeatureSet) const = 0; |
|
|
|
|
|
|
|
|
|
// TargetArchs
|
|
|
|
|
// cv::TargetArchs
|
|
|
|
|
virtual bool builtWith(FeatureSet) const = 0; |
|
|
|
|
virtual bool has(int, int) const = 0; |
|
|
|
|
virtual bool hasPtx(int, int) const = 0; |
|
|
|
@ -47,6 +36,14 @@ |
|
|
|
|
virtual void printCudaDeviceInfo(int) const = 0; |
|
|
|
|
virtual void printShortCudaDeviceInfo(int) const = 0; |
|
|
|
|
|
|
|
|
|
virtual ~DeviceInfoFuncTable() {}; |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
class GpuFuncTable |
|
|
|
|
{ |
|
|
|
|
public: |
|
|
|
|
virtual ~GpuFuncTable() {} |
|
|
|
|
|
|
|
|
|
// GpuMat routines
|
|
|
|
|
virtual void copy(const Mat& src, GpuMat& dst) const = 0; |
|
|
|
|
virtual void copy(const GpuMat& src, Mat& dst) const = 0; |
|
|
|
@ -80,13 +77,7 @@ |
|
|
|
|
int majorVersion() const { throw_nogpu; return -1; } |
|
|
|
|
int minorVersion() const { throw_nogpu; return -1; } |
|
|
|
|
int multiProcessorCount() const { throw_nogpu; return -1; } |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
class EmptyFuncTable : public GpuFuncTable |
|
|
|
|
{ |
|
|
|
|
public: |
|
|
|
|
|
|
|
|
|
// DeviceInfo routines
|
|
|
|
|
int getCudaEnabledDeviceCount() const { return 0; } |
|
|
|
|
|
|
|
|
|
void setDevice(int) const { throw_nogpu; } |
|
|
|
@ -107,6 +98,11 @@ |
|
|
|
|
|
|
|
|
|
void printCudaDeviceInfo(int) const { throw_nogpu; } |
|
|
|
|
void printShortCudaDeviceInfo(int) const { throw_nogpu; } |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
class EmptyFuncTable : public GpuFuncTable |
|
|
|
|
{ |
|
|
|
|
public: |
|
|
|
|
|
|
|
|
|
void copy(const Mat&, GpuMat&) const { throw_nogpu; } |
|
|
|
|
void copy(const GpuMat&, Mat&) const { throw_nogpu; } |
|
|
|
@ -568,12 +564,12 @@ namespace cv { namespace gpu { namespace device |
|
|
|
|
bool isCompatible() const |
|
|
|
|
{ |
|
|
|
|
// Check PTX compatibility
|
|
|
|
|
if (TargetArchs::hasEqualOrLessPtx(majorVersion_, minorVersion_)) |
|
|
|
|
if (hasEqualOrLessPtx(majorVersion_, minorVersion_)) |
|
|
|
|
return true; |
|
|
|
|
|
|
|
|
|
// Check BIN compatibility
|
|
|
|
|
for (int i = minorVersion_; i >= 0; --i) |
|
|
|
|
if (TargetArchs::hasBin(majorVersion_, i)) |
|
|
|
|
if (hasBin(majorVersion_, i)) |
|
|
|
|
return true; |
|
|
|
|
|
|
|
|
|
return false; |
|
|
|
@ -614,44 +610,6 @@ namespace cv { namespace gpu { namespace device |
|
|
|
|
return multi_processor_count_; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
private: |
|
|
|
|
int device_id_; |
|
|
|
|
|
|
|
|
|
std::string name_; |
|
|
|
|
int multi_processor_count_; |
|
|
|
|
int majorVersion_; |
|
|
|
|
int minorVersion_; |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
class CudaFuncTable : public GpuFuncTable |
|
|
|
|
{ |
|
|
|
|
protected: |
|
|
|
|
|
|
|
|
|
const CudaArch cudaArch; |
|
|
|
|
|
|
|
|
|
int convertSMVer2Cores(int major, int minor) const |
|
|
|
|
{ |
|
|
|
|
// Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
|
|
|
|
|
typedef struct { |
|
|
|
|
int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
|
|
|
|
|
int Cores; |
|
|
|
|
} SMtoCores; |
|
|
|
|
|
|
|
|
|
SMtoCores gpuArchCoresPerSM[] = { { 0x10, 8 }, { 0x11, 8 }, { 0x12, 8 }, { 0x13, 8 }, { 0x20, 32 }, { 0x21, 48 }, {0x30, 192}, {0x35, 192}, { -1, -1 } }; |
|
|
|
|
|
|
|
|
|
int index = 0; |
|
|
|
|
while (gpuArchCoresPerSM[index].SM != -1) |
|
|
|
|
{ |
|
|
|
|
if (gpuArchCoresPerSM[index].SM == ((major << 4) + minor) ) |
|
|
|
|
return gpuArchCoresPerSM[index].Cores; |
|
|
|
|
index++; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
return -1; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
public: |
|
|
|
|
|
|
|
|
|
int getCudaEnabledDeviceCount() const |
|
|
|
|
{ |
|
|
|
|
int count; |
|
|
|
@ -859,6 +817,42 @@ namespace cv { namespace gpu { namespace device |
|
|
|
|
fflush(stdout); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
private: |
|
|
|
|
int device_id_; |
|
|
|
|
|
|
|
|
|
std::string name_; |
|
|
|
|
int multi_processor_count_; |
|
|
|
|
int majorVersion_; |
|
|
|
|
int minorVersion_; |
|
|
|
|
|
|
|
|
|
const CudaArch cudaArch; |
|
|
|
|
|
|
|
|
|
int convertSMVer2Cores(int major, int minor) const |
|
|
|
|
{ |
|
|
|
|
// Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
|
|
|
|
|
typedef struct { |
|
|
|
|
int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
|
|
|
|
|
int Cores; |
|
|
|
|
} SMtoCores; |
|
|
|
|
|
|
|
|
|
SMtoCores gpuArchCoresPerSM[] = { { 0x10, 8 }, { 0x11, 8 }, { 0x12, 8 }, { 0x13, 8 }, { 0x20, 32 }, { 0x21, 48 }, {0x30, 192}, {0x35, 192}, { -1, -1 } }; |
|
|
|
|
|
|
|
|
|
int index = 0; |
|
|
|
|
while (gpuArchCoresPerSM[index].SM != -1) |
|
|
|
|
{ |
|
|
|
|
if (gpuArchCoresPerSM[index].SM == ((major << 4) + minor) ) |
|
|
|
|
return gpuArchCoresPerSM[index].Cores; |
|
|
|
|
index++; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
return -1; |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
class CudaFuncTable : public GpuFuncTable |
|
|
|
|
{ |
|
|
|
|
public: |
|
|
|
|
|
|
|
|
|
void copy(const Mat& src, GpuMat& dst) const |
|
|
|
|
{ |
|
|
|
|
cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyHostToDevice) ); |
|
|
|
|