CUDA related func tables refactored to remove unneeded dependencies.

11 years ago · 64c94cb22c
parent 6da7c50fb5
commit 64c94cb22c
2 changed files with 204 additions and 210 deletions
--- a/modules/core/src/gpumat.cpp
+++ b/modules/core/src/gpumat.cpp
@ -239,23 +239,23 @@ static DeviceInfoFuncTable* deviceInfoFuncTable()

 //////////////////////////////// Initialization & Info ////////////////////////

-int cv::gpu::getCudaEnabledDeviceCount() { return gpuFuncTable()->getCudaEnabledDeviceCount(); }
+int cv::gpu::getCudaEnabledDeviceCount() { return deviceInfoFuncTable()->getCudaEnabledDeviceCount(); }

-void cv::gpu::setDevice(int device) { gpuFuncTable()->setDevice(device); }
-int cv::gpu::getDevice() { return gpuFuncTable()->getDevice(); }
+void cv::gpu::setDevice(int device) { deviceInfoFuncTable()->setDevice(device); }
+int cv::gpu::getDevice() { return deviceInfoFuncTable()->getDevice(); }

-void cv::gpu::resetDevice() { gpuFuncTable()->resetDevice(); }
+void cv::gpu::resetDevice() { deviceInfoFuncTable()->resetDevice(); }

-bool cv::gpu::deviceSupports(FeatureSet feature_set) { return gpuFuncTable()->deviceSupports(feature_set); }
+bool cv::gpu::deviceSupports(FeatureSet feature_set) { return deviceInfoFuncTable()->deviceSupports(feature_set); }

-bool cv::gpu::TargetArchs::builtWith(FeatureSet feature_set) { return gpuFuncTable()->builtWith(feature_set); }
-bool cv::gpu::TargetArchs::has(int major, int minor) { return gpuFuncTable()->has(major, minor); }
-bool cv::gpu::TargetArchs::hasPtx(int major, int minor) {  return gpuFuncTable()->hasPtx(major, minor); }
-bool cv::gpu::TargetArchs::hasBin(int major, int minor) { return gpuFuncTable()->hasBin(major, minor);  }
-bool cv::gpu::TargetArchs::hasEqualOrLessPtx(int major, int minor) { return gpuFuncTable()->hasEqualOrLessPtx(major, minor); }
-bool cv::gpu::TargetArchs::hasEqualOrGreater(int major, int minor) { return gpuFuncTable()->hasEqualOrGreater(major, minor); }
-bool cv::gpu::TargetArchs::hasEqualOrGreaterPtx(int major, int minor) { return gpuFuncTable()->hasEqualOrGreaterPtx(major, minor); }
-bool cv::gpu::TargetArchs::hasEqualOrGreaterBin(int major, int minor) { return gpuFuncTable()->hasEqualOrGreaterBin(major, minor); }
+bool cv::gpu::TargetArchs::builtWith(FeatureSet feature_set) { return deviceInfoFuncTable()->builtWith(feature_set); }
+bool cv::gpu::TargetArchs::has(int major, int minor) { return deviceInfoFuncTable()->has(major, minor); }
+bool cv::gpu::TargetArchs::hasPtx(int major, int minor) {  return deviceInfoFuncTable()->hasPtx(major, minor); }
+bool cv::gpu::TargetArchs::hasBin(int major, int minor) { return deviceInfoFuncTable()->hasBin(major, minor);  }
+bool cv::gpu::TargetArchs::hasEqualOrLessPtx(int major, int minor) { return deviceInfoFuncTable()->hasEqualOrLessPtx(major, minor); }
+bool cv::gpu::TargetArchs::hasEqualOrGreater(int major, int minor) { return deviceInfoFuncTable()->hasEqualOrGreater(major, minor); }
+bool cv::gpu::TargetArchs::hasEqualOrGreaterPtx(int major, int minor) { return deviceInfoFuncTable()->hasEqualOrGreaterPtx(major, minor); }
+bool cv::gpu::TargetArchs::hasEqualOrGreaterBin(int major, int minor) { return deviceInfoFuncTable()->hasEqualOrGreaterBin(major, minor); }

 size_t cv::gpu::DeviceInfo::sharedMemPerBlock() const { return deviceInfoFuncTable()->sharedMemPerBlock(); }
 void cv::gpu::DeviceInfo::queryMemory(size_t& total_memory, size_t& free_memory) const { deviceInfoFuncTable()->queryMemory(total_memory, free_memory); }
@ -270,8 +270,8 @@ std::string cv::gpu::DeviceInfo::name() const { return deviceInfoFuncTable()->na
 int cv::gpu::DeviceInfo::multiProcessorCount() const { return deviceInfoFuncTable()->multiProcessorCount(); }
 void cv::gpu::DeviceInfo::query() { deviceInfoFuncTable()->query(); }

-void cv::gpu::printCudaDeviceInfo(int device) { gpuFuncTable()->printCudaDeviceInfo(device); }
-void cv::gpu::printShortCudaDeviceInfo(int device) { gpuFuncTable()->printShortCudaDeviceInfo(device); }
+void cv::gpu::printCudaDeviceInfo(int device) { deviceInfoFuncTable()->printCudaDeviceInfo(device); }
+void cv::gpu::printShortCudaDeviceInfo(int device) { deviceInfoFuncTable()->printShortCudaDeviceInfo(device); }

 #ifdef HAVE_CUDA

--- a/modules/core/src/gpumat_cuda.hpp
+++ b/modules/core/src/gpumat_cuda.hpp
@ -4,6 +4,7 @@
    class DeviceInfoFuncTable
    {
    public:
+        // cv::DeviceInfo
        virtual size_t sharedMemPerBlock() const = 0;
        virtual void queryMemory(size_t&, size_t&) const = 0;
        virtual size_t freeMemory() const = 0;
@ -16,25 +17,13 @@
        virtual int majorVersion() const = 0;
        virtual int minorVersion() const = 0;
        virtual int multiProcessorCount() const = 0;
-        virtual ~DeviceInfoFuncTable() {};
-    };
-    
-    class GpuFuncTable
-    {
-    public:
-        virtual ~GpuFuncTable() {}
-
-        // DeviceInfo routines
        virtual int getCudaEnabledDeviceCount() const = 0;
-
        virtual void setDevice(int) const = 0;
        virtual int getDevice() const = 0;
-
        virtual void resetDevice() const  = 0;
-
        virtual bool deviceSupports(FeatureSet) const = 0;

-        // TargetArchs
+        // cv::TargetArchs
        virtual bool builtWith(FeatureSet) const = 0;
        virtual bool has(int, int) const = 0;
        virtual bool hasPtx(int, int) const = 0;
@ -47,6 +36,14 @@
        virtual void printCudaDeviceInfo(int) const = 0;
        virtual void printShortCudaDeviceInfo(int) const = 0;

+        virtual ~DeviceInfoFuncTable() {};
+    };
+
+    class GpuFuncTable
+    {
+    public:
+        virtual ~GpuFuncTable() {}
+
        // GpuMat routines
        virtual void copy(const Mat& src, GpuMat& dst) const = 0;
        virtual void copy(const GpuMat& src, Mat& dst) const = 0;
@ -80,13 +77,7 @@
        int majorVersion() const { throw_nogpu; return -1; }
        int minorVersion() const { throw_nogpu; return -1; }
        int multiProcessorCount() const { throw_nogpu; return -1; }
-    };

-    class EmptyFuncTable : public GpuFuncTable
-    {
-    public:
-        
-        // DeviceInfo routines
        int getCudaEnabledDeviceCount() const { return 0; }

        void setDevice(int) const { throw_nogpu; }
@ -107,6 +98,11 @@

        void printCudaDeviceInfo(int) const { throw_nogpu; }
        void printShortCudaDeviceInfo(int) const { throw_nogpu; }
+    };
+
+    class EmptyFuncTable : public GpuFuncTable
+    {
+    public:

        void copy(const Mat&, GpuMat&) const { throw_nogpu; }
        void copy(const GpuMat&, Mat&) const { throw_nogpu; }
@ -568,12 +564,12 @@ namespace cv { namespace gpu { namespace device
        bool isCompatible() const
        {
            // Check PTX compatibility
-            if (TargetArchs::hasEqualOrLessPtx(majorVersion_, minorVersion_))
+            if (hasEqualOrLessPtx(majorVersion_, minorVersion_))
                return true;

            // Check BIN compatibility
                for (int i = minorVersion_; i >= 0; --i)
-                    if (TargetArchs::hasBin(majorVersion_, i))
+                    if (hasBin(majorVersion_, i))
                        return true;

                    return false;
@ -614,44 +610,6 @@ namespace cv { namespace gpu { namespace device
            return multi_processor_count_;
        }

-    private:
-        int device_id_;
-        
-        std::string name_;
-        int multi_processor_count_;
-        int majorVersion_;
-        int minorVersion_;
-    };
-    
-    class CudaFuncTable : public GpuFuncTable
-    {
-    protected:
-              
-        const CudaArch cudaArch;
-
-        int convertSMVer2Cores(int major, int minor) const
-        {
-            // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
-            typedef struct {
-                int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
-                int Cores;
-            } SMtoCores;
-            
-            SMtoCores gpuArchCoresPerSM[] =  { { 0x10,  8 }, { 0x11,  8 }, { 0x12,  8 }, { 0x13,  8 }, { 0x20, 32 }, { 0x21, 48 }, {0x30, 192}, {0x35, 192}, { -1, -1 }  };
-            
-            int index = 0;
-            while (gpuArchCoresPerSM[index].SM != -1)
-            {
-                if (gpuArchCoresPerSM[index].SM == ((major << 4) + minor) )
-                    return gpuArchCoresPerSM[index].Cores;
-                index++;
-            }
-            
-            return -1;
-        }
-        
-    public:
-
        int getCudaEnabledDeviceCount() const
        {
            int count;
@ -859,6 +817,42 @@ namespace cv { namespace gpu { namespace device
            fflush(stdout);
        }

+    private:
+        int device_id_;
+
+        std::string name_;
+        int multi_processor_count_;
+        int majorVersion_;
+        int minorVersion_;
+
+        const CudaArch cudaArch;
+
+        int convertSMVer2Cores(int major, int minor) const
+        {
+            // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
+            typedef struct {
+                int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
+                int Cores;
+            } SMtoCores;
+
+            SMtoCores gpuArchCoresPerSM[] =  { { 0x10,  8 }, { 0x11,  8 }, { 0x12,  8 }, { 0x13,  8 }, { 0x20, 32 }, { 0x21, 48 }, {0x30, 192}, {0x35, 192}, { -1, -1 }  };
+
+            int index = 0;
+            while (gpuArchCoresPerSM[index].SM != -1)
+            {
+                if (gpuArchCoresPerSM[index].SM == ((major << 4) + minor) )
+                    return gpuArchCoresPerSM[index].Cores;
+                index++;
+            }
+
+            return -1;
+        }
+    };
+
+    class CudaFuncTable : public GpuFuncTable
+    {
+    public:
+
        void copy(const Mat& src, GpuMat& dst) const
        {
            cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyHostToDevice) );