From 6389dfe49c3f834060b12355743efb4cf0b351cf Mon Sep 17 00:00:00 2001
From: Dmitry Kurtaev <dmitry.kurtaev+github@gmail.com>
Date: Tue, 14 May 2019 12:08:21 +0300
Subject: [PATCH] Fixed DetectionOutput output blob shape

---
 .../dnn/src/layers/detection_output_layer.cpp | 49 ++++++-------------
 modules/dnn/src/layers/proposal_layer.cpp     | 21 ++++----
 modules/dnn/test/test_backends.cpp            |  2 +-
 modules/dnn/test/test_caffe_importer.cpp      |  4 +-
 4 files changed, 28 insertions(+), 48 deletions(-)
diff --git a/modules/dnn/src/layers/detection_output_layer.cpp b/modules/dnn/src/layers/detection_output_layer.cpp
index 5c413df00b..043be0e7b7 100644
--- a/modules/dnn/src/layers/detection_output_layer.cpp
+++ b/modules/dnn/src/layers/detection_output_layer.cpp
@@ -312,15 +312,13 @@ public:
     {
         std::vector<UMat> inputs;
         std::vector<UMat> outputs;
+        outs.getUMatVector(outputs);
 
         bool use_half = (inps.depth() == CV_16S);
         if (use_half)
         {
             std::vector<UMat> orig_inputs;
-            std::vector<UMat> orig_outputs;
-
             inps.getUMatVector(orig_inputs);
-            outs.getUMatVector(orig_outputs);
 
             inputs.resize(orig_inputs.size());
             for (size_t i = 0; i < orig_inputs.size(); i++)
@@ -329,7 +327,6 @@ public:
         else
         {
             inps.getUMatVector(inputs);
-            outs.getUMatVector(outputs);
         }
 
         std::vector<LabelBBox> allDecodedBBoxes;
@@ -362,19 +359,17 @@ public:
 
         if (numKept == 0)
         {
-            // Set confidences to zeros.
-            Range ranges[] = {Range::all(), Range::all(), Range::all(), Range(2, 3)};
-            if (use_half)
-            {
-                std::vector<UMat> orig_outputs;
-                outs.getUMatVector(orig_outputs);
-                orig_outputs[0](ranges).setTo(0);
-            } else
-                outputs[0](ranges).setTo(0);
+            outputs[0].setTo(0);
             return true;
         }
-        int outputShape[] = {1, 1, (int)numKept, 7};
-        UMat umat = UMat(4, outputShape, CV_32F);
+
+        UMat umat = use_half ? UMat::zeros(4, outputs[0].size, CV_32F) : outputs[0];
+
+        if (!use_half)
+            umat.setTo(0);
+
+        // If there are valid detections
+        if (numKept > 0)
         {
             Mat mat = umat.getMat(ACCESS_WRITE);
             float* outputsData = mat.ptr<float>();
@@ -393,16 +388,7 @@ public:
         {
             UMat half_umat;
             convertFp16(umat, half_umat);
-
-            std::vector<UMat> orig_outputs;
-            outs.getUMatVector(orig_outputs);
-            orig_outputs.clear();
-            orig_outputs.push_back(half_umat);
-            outs.assign(orig_outputs);
-        } else {
-            outputs.clear();
-            outputs.push_back(umat);
-            outs.assign(outputs);
+            outs.assign(std::vector<UMat>(1, half_umat));
         }
 
         return true;
@@ -484,15 +470,12 @@ public:
             numKept += processDetections_(allDecodedBBoxes[i], allConfidenceScores[i], allIndices);
         }
 
+        outputs[0].setTo(0);
+
+        // If there is no detections
         if (numKept == 0)
-        {
-            // Set confidences to zeros.
-            Range ranges[] = {Range::all(), Range::all(), Range::all(), Range(2, 3)};
-            outputs[0](ranges).setTo(0);
             return;
-        }
-        int outputShape[] = {1, 1, (int)numKept, 7};
-        outputs[0].create(4, outputShape, CV_32F);
+
         float* outputsData = outputs[0].ptr<float>();
 
         size_t count = 0;
@@ -703,8 +686,6 @@ public:
                 prior_width += 1.0f;
                 prior_height += 1.0f;
             }
-            CV_Assert(prior_width > 0);
-            CV_Assert(prior_height > 0);
             float prior_center_x = prior_bbox.xmin + prior_width * .5;
             float prior_center_y = prior_bbox.ymin + prior_height * .5;
 
diff --git a/modules/dnn/src/layers/proposal_layer.cpp b/modules/dnn/src/layers/proposal_layer.cpp
index 6514ed3a5c..836fc1831b 100644
--- a/modules/dnn/src/layers/proposal_layer.cpp
+++ b/modules/dnn/src/layers/proposal_layer.cpp
@@ -131,6 +131,9 @@ public:
         CV_Assert(layerInternals.empty());
         internals.push_back(layerOutputs[0]);
 
+        // Detections layer.
+        internals.push_back(shape(1, 1, keepTopAfterNMS, 7));
+
         outputs.resize(2);
         outputs[0] = shape(keepTopAfterNMS, 5);
         outputs[1] = shape(keepTopAfterNMS, 1);
@@ -176,13 +179,14 @@ public:
         internals_.getUMatVector(internals);
 
         CV_Assert(inputs.size() == 3);
-        CV_Assert(internals.size() == 3);
+        CV_Assert(internals.size() == 4);
         const UMat& scores = inputs[0];
         const UMat& bboxDeltas = inputs[1];
         const UMat& imInfo = inputs[2];
         UMat& priorBoxes = internals[0];
         UMat& permuttedScores = internals[1];
         UMat& permuttedDeltas = internals[2];
+        UMat& detections = internals[3];
 
         CV_Assert(imInfo.total() >= 2);
         // We've chosen the smallest data type because we need just a shape from it.
@@ -217,7 +221,7 @@ public:
         layerInputs[2] = priorBoxes;
         layerInputs[3] = umat_fakeImageBlob;
 
-        layerOutputs[0] = UMat();
+        layerOutputs[0] = detections;
         detectionOutputLayer->forward(layerInputs, layerOutputs, internals);
 
         // DetectionOutputLayer produces 1x1xNx7 output where N might be less or
@@ -237,10 +241,6 @@ public:
         dst = outputs[1].rowRange(0, numDets);
         layerOutputs[0].col(2).copyTo(dst);
 
-        if (numDets < keepTopAfterNMS)
-            for (int i = 0; i < 2; ++i)
-                outputs[i].rowRange(numDets, keepTopAfterNMS).setTo(0);
-
         return true;
     }
 #endif
@@ -266,13 +266,14 @@ public:
         internals_arr.getMatVector(internals);
 
         CV_Assert(inputs.size() == 3);
-        CV_Assert(internals.size() == 3);
+        CV_Assert(internals.size() == 4);
         const Mat& scores = inputs[0];
         const Mat& bboxDeltas = inputs[1];
         const Mat& imInfo = inputs[2];
         Mat& priorBoxes = internals[0];
         Mat& permuttedScores = internals[1];
         Mat& permuttedDeltas = internals[2];
+        Mat& detections = internals[3];
 
         CV_Assert(imInfo.total() >= 2);
         // We've chosen the smallest data type because we need just a shape from it.
@@ -302,7 +303,7 @@ public:
         layerInputs[2] = priorBoxes;
         layerInputs[3] = fakeImageBlob;
 
-        layerOutputs[0] = Mat();
+        layerOutputs[0] = detections;
         detectionOutputLayer->forward(layerInputs, layerOutputs, internals);
 
         // DetectionOutputLayer produces 1x1xNx7 output where N might be less or
@@ -319,10 +320,6 @@ public:
         // The scores.
         dst = outputs[1].rowRange(0, numDets);
         layerOutputs[0].col(2).copyTo(dst);
-
-        if (numDets < keepTopAfterNMS)
-            for (int i = 0; i < 2; ++i)
-                outputs[i].rowRange(numDets, keepTopAfterNMS).setTo(0);
     }
 
     virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
diff --git a/modules/dnn/test/test_backends.cpp b/modules/dnn/test/test_backends.cpp
index 015d4e6c7c..564c4986a5 100644
--- a/modules/dnn/test/test_backends.cpp
+++ b/modules/dnn/test/test_backends.cpp
@@ -172,7 +172,7 @@ TEST_P(DNNTestNetwork, MobileNet_SSD_Caffe)
     Mat inp = blobFromImage(sample, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false);
     float diffScores = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 1.5e-2 : 0.0;
     float diffSquares = (target == DNN_TARGET_MYRIAD) ? 0.063  : 0.0;
-    float detectionConfThresh = (target == DNN_TARGET_MYRIAD) ? 0.252  : 0.0;
+    float detectionConfThresh = (target == DNN_TARGET_MYRIAD) ? 0.252  : FLT_MIN;
          processNet("dnn/MobileNetSSD_deploy.caffemodel", "dnn/MobileNetSSD_deploy.prototxt",
                     inp, "detection_out", "", diffScores, diffSquares, detectionConfThresh);
     expectNoFallbacksFromIE(net);
diff --git a/modules/dnn/test/test_caffe_importer.cpp b/modules/dnn/test/test_caffe_importer.cpp
index caaadfd897..3b1cc02a85 100644
--- a/modules/dnn/test/test_caffe_importer.cpp
+++ b/modules/dnn/test/test_caffe_importer.cpp
@@ -204,7 +204,7 @@ TEST(Reproducibility_SSD, Accuracy)
     Mat out = net.forward("detection_out");
 
     Mat ref = blobFromNPY(_tf("ssd_out.npy"));
-    normAssertDetections(ref, out);
+    normAssertDetections(ref, out, "", FLT_MIN);
 }
 
 typedef testing::TestWithParam<tuple<Backend, Target> > Reproducibility_MobileNet_SSD;
@@ -225,6 +225,8 @@ TEST_P(Reproducibility_MobileNet_SSD, Accuracy)
     net.setInput(inp);
     Mat out = net.forward().clone();
 
+    ASSERT_EQ(out.size[2], 100);
+
     const float scores_diff = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 1.5e-2 : 1e-5;
     const float boxes_iou_diff = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 6.3e-2 : 1e-4;
     Mat ref = blobFromNPY(_tf("mobilenet_ssd_caffe_out.npy"));