From 195aad8e6aadea02ba4c1daf4df301f34aeeb4a3 Mon Sep 17 00:00:00 2001 From: Dmitry Kurtaev Date: Wed, 2 Aug 2023 14:28:47 +0300 Subject: [PATCH] Merge pull request #24069 from dkurt:openvino_detection_layer DetectionOutput layer on OpenVINO without limitations #24069 ### Pull Request Readiness Checklist required for https://github.com/opencv/opencv/pull/23987 See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake --- .../dnn/src/layers/detection_output_layer.cpp | 29 ++++++++++++++++--- modules/dnn/test/test_caffe_importer.cpp | 29 ++++++++++++++----- modules/dnn/test/test_model.cpp | 11 ++++--- modules/dnn/test/test_tf_importer.cpp | 5 ++++ 4 files changed, 58 insertions(+), 16 deletions(-) diff --git a/modules/dnn/src/layers/detection_output_layer.cpp b/modules/dnn/src/layers/detection_output_layer.cpp index 61d4f44432..26f483a770 100644 --- a/modules/dnn/src/layers/detection_output_layer.cpp +++ b/modules/dnn/src/layers/detection_output_layer.cpp @@ -221,7 +221,7 @@ public: { return backendId == DNN_BACKEND_OPENCV || (backendId == DNN_BACKEND_CUDA && !_groupByClasses) || - (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && !_locPredTransposed && _bboxesNormalized); + backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH; } bool getMemoryShapes(const std::vector &inputs, @@ -1006,9 +1006,30 @@ public: virtual Ptr initNgraph(const std::vector >& inputs, const std::vector >& nodes) CV_OVERRIDE { CV_Assert(nodes.size() == 3); - auto& box_logits = nodes[0].dynamicCast()->node; - auto& class_preds = nodes[1].dynamicCast()->node; - auto& proposals = nodes[2].dynamicCast()->node; + auto box_logits = nodes[0].dynamicCast()->node; + auto class_preds = nodes[1].dynamicCast()->node; + auto proposals = nodes[2].dynamicCast()->node; + + if (_locPredTransposed) { + // Convert box predictions from yxYX to xyXY + box_logits = std::make_shared(box_logits, + std::make_shared(ngraph::element::i32, ngraph::Shape{3}, std::vector{0, -1, 2}), + true + ); + int axis = 2; + box_logits = std::make_shared(box_logits, + std::make_shared(ngraph::element::i32, ngraph::Shape{1}, &axis), + ngraph::op::v1::Reverse::Mode::INDEX + ); + } + + auto shape = std::make_shared(ngraph::element::i32, ngraph::Shape{2}, std::vector{0, -1}); + box_logits = std::make_shared(box_logits, shape, true); + class_preds = std::make_shared(class_preds, shape, true); + proposals = std::make_shared(proposals, + std::make_shared(ngraph::element::i32, ngraph::Shape{3}, std::vector{0, _varianceEncodedInTarget ? 1 : 2, -1}), + true + ); ngraph::op::DetectionOutputAttrs attrs; attrs.num_classes = _numClasses; diff --git a/modules/dnn/test/test_caffe_importer.cpp b/modules/dnn/test/test_caffe_importer.cpp index 809b959a21..003592a68a 100644 --- a/modules/dnn/test/test_caffe_importer.cpp +++ b/modules/dnn/test/test_caffe_importer.cpp @@ -731,7 +731,7 @@ TEST_P(Test_Caffe_nets, FasterRCNN_vgg16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); #endif - double scoreDiff = 0.0; + double scoreDiff = 0.0, iouDiff = 0.0; #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) // Check 'backward_compatible_check || in_out_elements_equal' failed at core/src/op/reshape.cpp:427: // While validating node 'v1::Reshape bbox_pred_reshape (bbox_pred[0]:f32{1,84}, Constant_265242[0]:i64{4}) -> (f32{?,?,?,?})' with friendly_name 'bbox_pred_reshape': @@ -741,11 +741,20 @@ TEST_P(Test_Caffe_nets, FasterRCNN_vgg16) if (target == DNN_TARGET_OPENCL_FP16) scoreDiff = 0.02; #endif +#if defined(INF_ENGINE_RELEASE) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { + iouDiff = 0.02; + if (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16) { + scoreDiff = 0.04; + iouDiff = 0.06; + } + } +#endif static Mat ref = (Mat_(3, 7) << 0, 2, 0.949398, 99.2454, 210.141, 601.205, 462.849, 0, 7, 0.997022, 481.841, 92.3218, 722.685, 175.953, 0, 12, 0.993028, 133.221, 189.377, 350.994, 563.166); - testFaster("faster_rcnn_vgg16.prototxt", "VGG16_faster_rcnn_final.caffemodel", ref, scoreDiff); + testFaster("faster_rcnn_vgg16.prototxt", "VGG16_faster_rcnn_final.caffemodel", ref, scoreDiff, iouDiff); } TEST_P(Test_Caffe_nets, FasterRCNN_zf) @@ -766,9 +775,6 @@ TEST_P(Test_Caffe_nets, FasterRCNN_zf) ); #endif - if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || - backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_OPENCL_FP16) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16); if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD); @@ -779,7 +785,14 @@ TEST_P(Test_Caffe_nets, FasterRCNN_zf) static Mat ref = (Mat_(3, 7) << 0, 2, 0.90121, 120.407, 115.83, 570.586, 528.395, 0, 7, 0.988779, 469.849, 75.1756, 718.64, 186.762, 0, 12, 0.967198, 138.588, 206.843, 329.766, 553.176); - testFaster("faster_rcnn_zf.prototxt", "ZF_faster_rcnn_final.caffemodel", ref); + + double scoreDiff = 0.0, iouDiff = 0.0; + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { + scoreDiff = 0.02; + iouDiff = 0.13; + } + + testFaster("faster_rcnn_zf.prototxt", "ZF_faster_rcnn_final.caffemodel", ref, scoreDiff, iouDiff); } TEST_P(Test_Caffe_nets, RFCN) @@ -802,8 +815,8 @@ TEST_P(Test_Caffe_nets, RFCN) iouDiff = 0.12; } -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) - if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) +#if defined(INF_ENGINE_RELEASE) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { scoreDiff = 0.1f; iouDiff = 0.2f; diff --git a/modules/dnn/test/test_model.cpp b/modules/dnn/test/test_model.cpp index bd03551ab8..a19923bf28 100644 --- a/modules/dnn/test/test_model.cpp +++ b/modules/dnn/test/test_model.cpp @@ -447,14 +447,17 @@ TEST_P(Test_Model, DetectionOutput) { if (backend == DNN_BACKEND_OPENCV) scoreDiff = 4e-3; -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2022010000) - else if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - scoreDiff = 4e-2; -#endif else scoreDiff = 2e-2; iouDiff = 1.8e-1; } +#if defined(INF_ENGINE_RELEASE) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + { + scoreDiff = 0.05; + iouDiff = 0.08; + } +#endif testDetectModel(weights_file, config_file, img_path, refClassIds, refConfidences, refBoxes, scoreDiff, iouDiff, confThreshold, nmsThreshold, size, mean); diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index e2dfbc706e..42d98baced 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -1816,6 +1816,11 @@ TEST_P(Test_TensorFlow_nets, Mask_RCNN) double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD || target == DNN_TARGET_CPU_FP16) ? 0.2 : 2e-5; double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD || target == DNN_TARGET_CPU_FP16) ? 0.018 : default_lInf; + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + { + scoreDiff = std::max(scoreDiff, 0.02); + iouDiff = std::max(iouDiff, 0.009); + } normAssertDetections(refDetections, outDetections, "", /*threshold for zero confidence*/1e-5, scoreDiff, iouDiff); // Output size of masks is NxCxHxW where