bug fix

14 years ago · eccfc90b77
parent f937d68d60
commit eccfc90b77
3 changed files with 12 additions and 1 deletions
--- a/modules/gpu/src/cuda/fast.cu
+++ b/modules/gpu/src/cuda/fast.cu
@ -223,6 +223,8 @@ namespace cv { namespace gpu { namespace device
        template <bool calcScore, class Mask>
        __global__ void calcKeypoints(const DevMem2Db img, const Mask mask, short2* kpLoc, const unsigned int maxKeypoints, PtrStepi score, const int threshold)
        {
+            #if __CUDA_ARCH__ >= 110
+
            const int j = threadIdx.x + blockIdx.x * blockDim.x + 3;
            const int i = threadIdx.y + blockIdx.y * blockDim.y + 3;

@ -276,6 +278,8 @@ namespace cv { namespace gpu { namespace device
                        kpLoc[ind] = make_short2(j, i);
                }
            }
+
+            #endif
        }

        int calcKeypoints_gpu(DevMem2Db img, DevMem2Db mask, short2* kpLoc, int maxKeypoints, DevMem2Di score, int threshold)
@ -321,6 +325,8 @@ namespace cv { namespace gpu { namespace device

        __global__ void nonmaxSupression(const short2* kpLoc, int count, const DevMem2Di scoreMat, short2* locFinal, float* responseFinal)
        {
+            #if __CUDA_ARCH__ >= 110
+
            const int kpIdx = threadIdx.x + blockIdx.x * blockDim.x;

            if (kpIdx < count)
@ -349,6 +355,8 @@ namespace cv { namespace gpu { namespace device
                    responseFinal[ind] = static_cast<float>(score);
                }
            }
+
+            #endif
        }

        int nonmaxSupression_gpu(const short2* kpLoc, int count, DevMem2Di score, short2* loc, float* response)
--- a/modules/gpu/src/fast.cpp
+++ b/modules/gpu/src/fast.cpp
@ -124,6 +124,7 @@ int cv::gpu::FAST_GPU::calcKeyPointsLocation(const GpuMat& img, const GpuMat& ma

    CV_Assert(img.type() == CV_8UC1);
    CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == img.size()));
+    CV_Assert(TargetArchs::builtWith(GLOBAL_ATOMICS) && DeviceInfo().supports(GLOBAL_ATOMICS));

    int maxKeypoints = static_cast<int>(keypointsRatio * img.size().area());

@ -145,6 +146,8 @@ int cv::gpu::FAST_GPU::getKeyPoints(GpuMat& keypoints)
 {
    using namespace cv::gpu::device::fast;

+    CV_Assert(TargetArchs::builtWith(GLOBAL_ATOMICS) && DeviceInfo().supports(GLOBAL_ATOMICS));
+
    if (count_ == 0)
        return 0;

--- a/modules/gpu/src/orb.cpp
+++ b/modules/gpu/src/orb.cpp
@ -666,7 +666,7 @@ void cv::gpu::ORB_GPU::mergeKeyPoints(GpuMat& keypoints)
        mergeLocation_gpu(keyPointsPyr_[level].ptr<short2>(0), keyPointsRange.ptr<float>(0), keyPointsRange.ptr<float>(1), keyPointsCount_[level], locScale, 0);

        GpuMat range = keyPointsRange.rowRange(2, 4);
-        keyPointsPyr_[level].rowRange(1, 3).copyTo(range);
+        keyPointsPyr_[level](Range(1, 3), Range(0, keyPointsCount_[level])).copyTo(range);
        
        keyPointsRange.row(4).setTo(Scalar::all(level));
        keyPointsRange.row(5).setTo(Scalar::all(params_.patch_size_ * sf));