From 520e3514896823a758ae1c6a305566bd997660c8 Mon Sep 17 00:00:00 2001 From: Dmitry Kurtaev Date: Wed, 24 Apr 2019 12:08:49 +0300 Subject: [PATCH] Fix batching in DetectionOutput layer --- .../dnn/src/layers/detection_output_layer.cpp | 7 +-- modules/dnn/test/test_caffe_importer.cpp | 54 +++++++++++-------- 2 files changed, 37 insertions(+), 24 deletions(-) diff --git a/modules/dnn/src/layers/detection_output_layer.cpp b/modules/dnn/src/layers/detection_output_layer.cpp index e095e72dfd..5c413df00b 100644 --- a/modules/dnn/src/layers/detection_output_layer.cpp +++ b/modules/dnn/src/layers/detection_output_layer.cpp @@ -206,8 +206,9 @@ public: std::vector &outputs, std::vector &internals) const CV_OVERRIDE { + const int num = inputs[0][0]; CV_Assert(inputs.size() >= 3); - CV_Assert(inputs[0][0] == inputs[1][0]); + CV_Assert(num == inputs[1][0]); int numPriors = inputs[2][2] / 4; CV_Assert((numPriors * _numLocClasses * 4) == total(inputs[0], 1)); @@ -216,10 +217,10 @@ public: // num() and channels() are 1. // Since the number of bboxes to be kept is unknown before nms, we manually - // set it to maximal number of detections, [keep_top_k] parameter. + // set it to maximal number of detections, [keep_top_k] parameter multiplied by batch size. // Each row is a 7 dimension std::vector, which stores // [image_id, label, confidence, xmin, ymin, xmax, ymax] - outputs.resize(1, shape(1, 1, _keepTopK, 7)); + outputs.resize(1, shape(1, 1, _keepTopK * num, 7)); return false; } diff --git a/modules/dnn/test/test_caffe_importer.cpp b/modules/dnn/test/test_caffe_importer.cpp index b73aa43bad..dc981233f1 100644 --- a/modules/dnn/test/test_caffe_importer.cpp +++ b/modules/dnn/test/test_caffe_importer.cpp @@ -207,60 +207,72 @@ TEST(Reproducibility_SSD, Accuracy) normAssertDetections(ref, out); } -typedef testing::TestWithParam Reproducibility_MobileNet_SSD; +typedef testing::TestWithParam > Reproducibility_MobileNet_SSD; TEST_P(Reproducibility_MobileNet_SSD, Accuracy) { const string proto = findDataFile("dnn/MobileNetSSD_deploy.prototxt", false); const string model = findDataFile("dnn/MobileNetSSD_deploy.caffemodel", false); Net net = readNetFromCaffe(proto, model); - int targetId = GetParam(); - const float l1 = (targetId == DNN_TARGET_OPENCL_FP16) ? 1.5e-4 : 1e-5; - const float lInf = (targetId == DNN_TARGET_OPENCL_FP16) ? 4e-4 : 1e-4; + int backendId = get<0>(GetParam()); + int targetId = get<1>(GetParam()); - net.setPreferableBackend(DNN_BACKEND_OPENCV); + net.setPreferableBackend(backendId); net.setPreferableTarget(targetId); Mat sample = imread(_tf("street.png")); Mat inp = blobFromImage(sample, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false); net.setInput(inp); - Mat out = net.forward(); + Mat out = net.forward().clone(); - const float scores_diff = (targetId == DNN_TARGET_OPENCL_FP16) ? 4e-4 : 1e-5; - const float boxes_iou_diff = (targetId == DNN_TARGET_OPENCL_FP16) ? 5e-3 : 1e-4; + const float scores_diff = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 1.5e-2 : 1e-5; + const float boxes_iou_diff = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 6.3e-2 : 1e-4; Mat ref = blobFromNPY(_tf("mobilenet_ssd_caffe_out.npy")); - normAssertDetections(ref, out, "", 0.0, scores_diff, boxes_iou_diff); + normAssertDetections(ref, out, "", FLT_MIN, scores_diff, boxes_iou_diff); // Check that detections aren't preserved. inp.setTo(0.0f); net.setInput(inp); - out = net.forward(); - out = out.reshape(1, out.total() / 7); + Mat zerosOut = net.forward(); + zerosOut = zerosOut.reshape(1, zerosOut.total() / 7); - const int numDetections = out.rows; + const int numDetections = zerosOut.rows; ASSERT_NE(numDetections, 0); for (int i = 0; i < numDetections; ++i) { - float confidence = out.ptr(i)[2]; + float confidence = zerosOut.ptr(i)[2]; ASSERT_EQ(confidence, 0); } + // There is something wrong with Reshape layer in Myriad plugin and + // regression with DLIE/OCL_FP16 target. + if (backendId == DNN_BACKEND_INFERENCE_ENGINE) + { + if ((targetId == DNN_TARGET_MYRIAD && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_2) || + targetId == DNN_TARGET_OPENCL_FP16) + return; + } + // Check batching mode. - ref = ref.reshape(1, numDetections); inp = blobFromImages(std::vector(2, sample), 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false); net.setInput(inp); Mat outBatch = net.forward(); // Output blob has a shape 1x1x2Nx7 where N is a number of detection for // a single sample in batch. The first numbers of detection vectors are batch id. - outBatch = outBatch.reshape(1, outBatch.total() / 7); - EXPECT_EQ(outBatch.rows, 2 * numDetections); - normAssert(outBatch.rowRange(0, numDetections), ref, "", l1, lInf); - normAssert(outBatch.rowRange(numDetections, 2 * numDetections).colRange(1, 7), ref.colRange(1, 7), - "", l1, lInf); + // For Inference Engine backend there is -1 delimiter which points the end of detections. + const int numRealDetections = ref.size[2]; + EXPECT_EQ(outBatch.size[2], 2 * numDetections); + out = out.reshape(1, numDetections).rowRange(0, numRealDetections); + outBatch = outBatch.reshape(1, 2 * numDetections); + for (int i = 0; i < 2; ++i) + { + Mat pred = outBatch.rowRange(i * numRealDetections, (i + 1) * numRealDetections); + EXPECT_EQ(countNonZero(pred.col(0) != i), 0); + normAssert(pred.colRange(1, 7), out.colRange(1, 7)); + } } -INSTANTIATE_TEST_CASE_P(/**/, Reproducibility_MobileNet_SSD, - Values(DNN_TARGET_CPU, DNN_TARGET_OPENCL, DNN_TARGET_OPENCL_FP16)); +INSTANTIATE_TEST_CASE_P(/**/, Reproducibility_MobileNet_SSD, dnnBackendsAndTargets()); typedef testing::TestWithParam Reproducibility_ResNet50; TEST_P(Reproducibility_ResNet50, Accuracy)