nms: part 1

12 years ago · d2e88e1d4d
parent a9f10e5cad
commit d2e88e1d4d
4 changed files with 95 additions and 8 deletions
--- a/modules/gpu/include/opencv2/gpu/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu/gpu.hpp
@ -1552,12 +1552,14 @@ public:
        enum {PEDESTRIAN = 0};
    };

+    enum { NO_REJECT = 1, DOLLAR = 2, /*PASCAL = 4,*/ DEFAULT = NO_REJECT};
+
    // An empty cascade will be created.
    // Param minScale is a minimum scale relative to the original size of the image on which cascade will be applyed.
    // Param minScale is a maximum scale relative to the original size of the image on which cascade will be applyed.
    // Param scales is a number of scales from minScale to maxScale.
    // Param rejfactor is used for NMS.
-    SCascade(const double minScale = 0.4, const double maxScale = 5., const int scales = 55, const int rejfactor = 1);
+    SCascade(const double minScale = 0.4, const double maxScale = 5., const int scales = 55, const int rejCriteria = 1);

    virtual ~SCascade();

@ -1595,7 +1597,7 @@ private:
    double maxScale;

    int scales;
-    int rejfactor;
+    int rejCriteria;
 };

 ////////////////////////////////// SURF //////////////////////////////////////////
--- a/modules/gpu/src/cuda/icf-sc.cu
+++ b/modules/gpu/src/cuda/icf-sc.cu
@ -41,9 +41,10 @@
 //M*/

 #include <opencv2/gpu/device/common.hpp>
+
 #include <icf.hpp>
-#include <stdio.h>
 #include <float.h>
+#include <stdio.h>

 namespace cv { namespace gpu { namespace device {
 namespace icf {
@ -79,6 +80,70 @@ namespace icf {
        }
    }

+    __device__ __forceinline__ float overlapArea(const Detection &a, const Detection &b)
+    {
+        int w = ::min(a.x + a.w, b.x + b.w) - ::max(a.x, b.x);
+        int h = ::min(a.y + a.h, b.y + b.h) - ::max(a.y, b.y);
+
+        return (w < 0 || h < 0)? 0.f : (float)(w * h);
+    }
+
+    __global__ void overlap(const uint* n, const Detection* detections, uchar* overlaps)
+    {
+        const int idx = threadIdx.x;
+        const int total = *n;
+
+        for (int i = idx; i < total; i += 192)
+        {
+            const Detection& a = detections[i];
+            bool excluded = false;
+
+            for (int j = i + 1; j < total; ++j)
+            {
+                const Detection& b = detections[j];
+                float ovl = overlapArea(a, b) / ::min(a.w * a.h, b.w * b.h);
+
+                if (ovl > 0.65f)
+                {
+                    int suppessed = (a.confidence > b.confidence)? j : i;
+                    overlaps[suppessed] = 1;
+                    excluded = excluded || (suppessed == i);
+                }
+
+                if (__all(excluded)) break;
+            }
+        }
+    }
+
+    __global__ void collect(const uint* n, const Detection* detections, uchar* overlaps)
+    {
+        const int idx = threadIdx.x;
+        const int total = *n;
+
+        for (int i = idx; i < total; i += 192)
+        {
+            if (!overlaps[i])
+            {
+                const Detection& det = detections[i];
+                // printf("%d: %d %d %d %d %f\n", i, det.x, det.y, det.w, det.h, det.confidence );
+            }
+        }
+    }
+
+    void suppress(const PtrStepSzb& objects, PtrStepSzb overlaps, PtrStepSzi ndetections)
+    {
+        int block = 192;
+        int grid = 1;
+
+        overlap<<<grid, block>>>((uint*)ndetections.ptr(0), (Detection*)objects.ptr(0), (uchar*)overlaps.ptr(0));
+        collect<<<grid, block>>>((uint*)ndetections.ptr(0), (Detection*)objects.ptr(0), (uchar*)overlaps.ptr(0));
+        // if (!stream)
+        {
+            cudaSafeCall( cudaGetLastError());
+            cudaSafeCall( cudaDeviceSynchronize());
+        }
+    }
+
    template<typename Policy>
    struct PrefixSum
    {
--- a/modules/gpu/src/gpu_init.cpp
+++ b/modules/gpu/src/gpu_init.cpp
@ -49,7 +49,7 @@ CV_INIT_ALGORITHM(SCascade, "CascadeDetector.SCascade",
                  obj.info()->addParam(obj, "minScale",    obj.minScale);
                  obj.info()->addParam(obj, "maxScale",    obj.maxScale);
                  obj.info()->addParam(obj, "scales",      obj.scales);
-                  obj.info()->addParam(obj, "rejfactor", obj.rejfactor));
+                  obj.info()->addParam(obj, "rejCriteria", obj.rejCriteria));

 bool initModule_gpu(void)
 {
--- a/modules/gpu/src/softcascade.cpp
+++ b/modules/gpu/src/softcascade.cpp
@ -85,6 +85,8 @@ namespace cv { namespace gpu { namespace device {
 namespace icf {
    void fillBins(cv::gpu::PtrStepSzb hogluv, const cv::gpu::PtrStepSzf& nangle,
        const int fw, const int fh, const int bins, cudaStream_t stream);
+
+    void suppress(const PtrStepSzb& objects, PtrStepSzb overlaps, PtrStepSzi ndetections);
 }

 namespace imgproc {
@ -309,6 +311,8 @@ struct cv::gpu::SCascade::Fields
        hogluv.create((fh / shr) * HOG_LUV_BINS + 1, fw / shr + 1, CV_32SC1);
        hogluv.setTo(cv::Scalar::all(0));

+        overlaps.create(1, 5000, CV_8UC1);
+
        return true;
    }

@ -437,7 +441,15 @@ private:
        }
    }

+#include <iostream>
 public:
+    void suppress(GpuMat& ndetections, GpuMat& objects)
+    {
+        ensureSizeIsEnough(objects.rows, objects.cols, CV_8UC1, overlaps);
+        overlaps.setTo(0);
+        device::icf::suppress(objects, overlaps, ndetections);
+        // std::cout << cv::Mat(overlaps) << std::endl;
+    }

    // scales range
    float minScale;
@ -469,6 +481,9 @@ public:
    // 161x121x10
    GpuMat hogluv;

+    // used for area overlap computing during
+    GpuMat overlaps;
+
    // Cascade from xml
    GpuMat octaves;
    GpuMat stages;
@ -478,6 +493,8 @@ public:

    GpuMat sobelBuf;

+    GpuMat collected;
+
    std::vector<device::icf::Octave> voctaves;

    DeviceInfo info;
@ -494,7 +511,7 @@ public:
 };

 cv::gpu::SCascade::SCascade(const double mins, const double maxs, const int sc, const int rjf)
-: fields(0),  minScale(mins), maxScale(maxs), scales(sc), rejfactor(rjf) {}
+: fields(0),  minScale(mins), maxScale(maxs), scales(sc), rejCriteria(rjf) {}

 cv::gpu::SCascade::~SCascade() { delete fields; }

@ -534,6 +551,9 @@ void cv::gpu::SCascade::detect(InputArray image, InputArray _rois, OutputArray _
    cudaStream_t stream = StreamAccessor::getStream(s);

    flds.detect(rois, tmp, objects, stream);
+
+    // if (rejCriteria != NO_REJECT)
+    flds.suppress(tmp, objects);
 }

 void cv::gpu::SCascade::genRoi(InputArray _roi, OutputArray _mask, Stream& stream) const