From 6389dfe49c3f834060b12355743efb4cf0b351cf Mon Sep 17 00:00:00 2001 From: Dmitry Kurtaev Date: Tue, 14 May 2019 12:08:21 +0300 Subject: [PATCH] Fixed DetectionOutput output blob shape --- .../dnn/src/layers/detection_output_layer.cpp | 49 ++++++------------- modules/dnn/src/layers/proposal_layer.cpp | 21 ++++---- modules/dnn/test/test_backends.cpp | 2 +- modules/dnn/test/test_caffe_importer.cpp | 4 +- 4 files changed, 28 insertions(+), 48 deletions(-) diff --git a/modules/dnn/src/layers/detection_output_layer.cpp b/modules/dnn/src/layers/detection_output_layer.cpp index 5c413df00b..043be0e7b7 100644 --- a/modules/dnn/src/layers/detection_output_layer.cpp +++ b/modules/dnn/src/layers/detection_output_layer.cpp @@ -312,15 +312,13 @@ public: { std::vector inputs; std::vector outputs; + outs.getUMatVector(outputs); bool use_half = (inps.depth() == CV_16S); if (use_half) { std::vector orig_inputs; - std::vector orig_outputs; - inps.getUMatVector(orig_inputs); - outs.getUMatVector(orig_outputs); inputs.resize(orig_inputs.size()); for (size_t i = 0; i < orig_inputs.size(); i++) @@ -329,7 +327,6 @@ public: else { inps.getUMatVector(inputs); - outs.getUMatVector(outputs); } std::vector allDecodedBBoxes; @@ -362,19 +359,17 @@ public: if (numKept == 0) { - // Set confidences to zeros. - Range ranges[] = {Range::all(), Range::all(), Range::all(), Range(2, 3)}; - if (use_half) - { - std::vector orig_outputs; - outs.getUMatVector(orig_outputs); - orig_outputs[0](ranges).setTo(0); - } else - outputs[0](ranges).setTo(0); + outputs[0].setTo(0); return true; } - int outputShape[] = {1, 1, (int)numKept, 7}; - UMat umat = UMat(4, outputShape, CV_32F); + + UMat umat = use_half ? UMat::zeros(4, outputs[0].size, CV_32F) : outputs[0]; + + if (!use_half) + umat.setTo(0); + + // If there are valid detections + if (numKept > 0) { Mat mat = umat.getMat(ACCESS_WRITE); float* outputsData = mat.ptr(); @@ -393,16 +388,7 @@ public: { UMat half_umat; convertFp16(umat, half_umat); - - std::vector orig_outputs; - outs.getUMatVector(orig_outputs); - orig_outputs.clear(); - orig_outputs.push_back(half_umat); - outs.assign(orig_outputs); - } else { - outputs.clear(); - outputs.push_back(umat); - outs.assign(outputs); + outs.assign(std::vector(1, half_umat)); } return true; @@ -484,15 +470,12 @@ public: numKept += processDetections_(allDecodedBBoxes[i], allConfidenceScores[i], allIndices); } + outputs[0].setTo(0); + + // If there is no detections if (numKept == 0) - { - // Set confidences to zeros. - Range ranges[] = {Range::all(), Range::all(), Range::all(), Range(2, 3)}; - outputs[0](ranges).setTo(0); return; - } - int outputShape[] = {1, 1, (int)numKept, 7}; - outputs[0].create(4, outputShape, CV_32F); + float* outputsData = outputs[0].ptr(); size_t count = 0; @@ -703,8 +686,6 @@ public: prior_width += 1.0f; prior_height += 1.0f; } - CV_Assert(prior_width > 0); - CV_Assert(prior_height > 0); float prior_center_x = prior_bbox.xmin + prior_width * .5; float prior_center_y = prior_bbox.ymin + prior_height * .5; diff --git a/modules/dnn/src/layers/proposal_layer.cpp b/modules/dnn/src/layers/proposal_layer.cpp index 6514ed3a5c..836fc1831b 100644 --- a/modules/dnn/src/layers/proposal_layer.cpp +++ b/modules/dnn/src/layers/proposal_layer.cpp @@ -131,6 +131,9 @@ public: CV_Assert(layerInternals.empty()); internals.push_back(layerOutputs[0]); + // Detections layer. + internals.push_back(shape(1, 1, keepTopAfterNMS, 7)); + outputs.resize(2); outputs[0] = shape(keepTopAfterNMS, 5); outputs[1] = shape(keepTopAfterNMS, 1); @@ -176,13 +179,14 @@ public: internals_.getUMatVector(internals); CV_Assert(inputs.size() == 3); - CV_Assert(internals.size() == 3); + CV_Assert(internals.size() == 4); const UMat& scores = inputs[0]; const UMat& bboxDeltas = inputs[1]; const UMat& imInfo = inputs[2]; UMat& priorBoxes = internals[0]; UMat& permuttedScores = internals[1]; UMat& permuttedDeltas = internals[2]; + UMat& detections = internals[3]; CV_Assert(imInfo.total() >= 2); // We've chosen the smallest data type because we need just a shape from it. @@ -217,7 +221,7 @@ public: layerInputs[2] = priorBoxes; layerInputs[3] = umat_fakeImageBlob; - layerOutputs[0] = UMat(); + layerOutputs[0] = detections; detectionOutputLayer->forward(layerInputs, layerOutputs, internals); // DetectionOutputLayer produces 1x1xNx7 output where N might be less or @@ -237,10 +241,6 @@ public: dst = outputs[1].rowRange(0, numDets); layerOutputs[0].col(2).copyTo(dst); - if (numDets < keepTopAfterNMS) - for (int i = 0; i < 2; ++i) - outputs[i].rowRange(numDets, keepTopAfterNMS).setTo(0); - return true; } #endif @@ -266,13 +266,14 @@ public: internals_arr.getMatVector(internals); CV_Assert(inputs.size() == 3); - CV_Assert(internals.size() == 3); + CV_Assert(internals.size() == 4); const Mat& scores = inputs[0]; const Mat& bboxDeltas = inputs[1]; const Mat& imInfo = inputs[2]; Mat& priorBoxes = internals[0]; Mat& permuttedScores = internals[1]; Mat& permuttedDeltas = internals[2]; + Mat& detections = internals[3]; CV_Assert(imInfo.total() >= 2); // We've chosen the smallest data type because we need just a shape from it. @@ -302,7 +303,7 @@ public: layerInputs[2] = priorBoxes; layerInputs[3] = fakeImageBlob; - layerOutputs[0] = Mat(); + layerOutputs[0] = detections; detectionOutputLayer->forward(layerInputs, layerOutputs, internals); // DetectionOutputLayer produces 1x1xNx7 output where N might be less or @@ -319,10 +320,6 @@ public: // The scores. dst = outputs[1].rowRange(0, numDets); layerOutputs[0].col(2).copyTo(dst); - - if (numDets < keepTopAfterNMS) - for (int i = 0; i < 2; ++i) - outputs[i].rowRange(numDets, keepTopAfterNMS).setTo(0); } virtual Ptr initInfEngine(const std::vector >&) CV_OVERRIDE diff --git a/modules/dnn/test/test_backends.cpp b/modules/dnn/test/test_backends.cpp index 015d4e6c7c..564c4986a5 100644 --- a/modules/dnn/test/test_backends.cpp +++ b/modules/dnn/test/test_backends.cpp @@ -172,7 +172,7 @@ TEST_P(DNNTestNetwork, MobileNet_SSD_Caffe) Mat inp = blobFromImage(sample, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false); float diffScores = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 1.5e-2 : 0.0; float diffSquares = (target == DNN_TARGET_MYRIAD) ? 0.063 : 0.0; - float detectionConfThresh = (target == DNN_TARGET_MYRIAD) ? 0.252 : 0.0; + float detectionConfThresh = (target == DNN_TARGET_MYRIAD) ? 0.252 : FLT_MIN; processNet("dnn/MobileNetSSD_deploy.caffemodel", "dnn/MobileNetSSD_deploy.prototxt", inp, "detection_out", "", diffScores, diffSquares, detectionConfThresh); expectNoFallbacksFromIE(net); diff --git a/modules/dnn/test/test_caffe_importer.cpp b/modules/dnn/test/test_caffe_importer.cpp index caaadfd897..3b1cc02a85 100644 --- a/modules/dnn/test/test_caffe_importer.cpp +++ b/modules/dnn/test/test_caffe_importer.cpp @@ -204,7 +204,7 @@ TEST(Reproducibility_SSD, Accuracy) Mat out = net.forward("detection_out"); Mat ref = blobFromNPY(_tf("ssd_out.npy")); - normAssertDetections(ref, out); + normAssertDetections(ref, out, "", FLT_MIN); } typedef testing::TestWithParam > Reproducibility_MobileNet_SSD; @@ -225,6 +225,8 @@ TEST_P(Reproducibility_MobileNet_SSD, Accuracy) net.setInput(inp); Mat out = net.forward().clone(); + ASSERT_EQ(out.size[2], 100); + const float scores_diff = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 1.5e-2 : 1e-5; const float boxes_iou_diff = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 6.3e-2 : 1e-4; Mat ref = blobFromNPY(_tf("mobilenet_ssd_caffe_out.npy"));