From 195aad8e6aadea02ba4c1daf4df301f34aeeb4a3 Mon Sep 17 00:00:00 2001
From: Dmitry Kurtaev <dmitry.kurtaev@gmail.com>
Date: Wed, 2 Aug 2023 14:28:47 +0300
Subject: [PATCH] Merge pull request #24069 from dkurt:openvino_detection_layer

DetectionOutput layer on OpenVINO without limitations #24069

### Pull Request Readiness Checklist

required for https://github.com/opencv/opencv/pull/23987

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [x] The feature is well documented and sample code can be built with the project CMake
---
 .../dnn/src/layers/detection_output_layer.cpp | 29 ++++++++++++++++---
 modules/dnn/test/test_caffe_importer.cpp      | 29 ++++++++++++++-----
 modules/dnn/test/test_model.cpp               | 11 ++++---
 modules/dnn/test/test_tf_importer.cpp         |  5 ++++
 4 files changed, 58 insertions(+), 16 deletions(-)
diff --git a/modules/dnn/src/layers/detection_output_layer.cpp b/modules/dnn/src/layers/detection_output_layer.cpp
index 61d4f44432..26f483a770 100644
--- a/modules/dnn/src/layers/detection_output_layer.cpp
+++ b/modules/dnn/src/layers/detection_output_layer.cpp
@@ -221,7 +221,7 @@ public:
     {
         return backendId == DNN_BACKEND_OPENCV ||
                (backendId == DNN_BACKEND_CUDA && !_groupByClasses) ||
-               (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && !_locPredTransposed && _bboxesNormalized);
+               backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
     }
 
     bool getMemoryShapes(const std::vector<MatShape> &inputs,
@@ -1006,9 +1006,30 @@ public:
     virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs, const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
     {
         CV_Assert(nodes.size() == 3);
-        auto& box_logits  = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
-        auto& class_preds = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
-        auto& proposals   = nodes[2].dynamicCast<InfEngineNgraphNode>()->node;
+        auto box_logits  = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
+        auto class_preds = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
+        auto proposals   = nodes[2].dynamicCast<InfEngineNgraphNode>()->node;
+
+        if (_locPredTransposed) {
+            // Convert box predictions from yxYX to xyXY
+            box_logits = std::make_shared<ngraph::op::v1::Reshape>(box_logits,
+                std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{3}, std::vector<int32_t>{0, -1, 2}),
+                true
+            );
+            int axis = 2;
+            box_logits = std::make_shared<ngraph::op::v1::Reverse>(box_logits,
+                std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{1}, &axis),
+                ngraph::op::v1::Reverse::Mode::INDEX
+            );
+        }
+
+        auto shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{2}, std::vector<int32_t>{0, -1});
+        box_logits = std::make_shared<ngraph::op::v1::Reshape>(box_logits, shape, true);
+        class_preds = std::make_shared<ngraph::op::v1::Reshape>(class_preds, shape, true);
+        proposals = std::make_shared<ngraph::op::v1::Reshape>(proposals,
+            std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{3}, std::vector<int32_t>{0, _varianceEncodedInTarget ? 1 : 2, -1}),
+            true
+        );
 
         ngraph::op::DetectionOutputAttrs attrs;
         attrs.num_classes                = _numClasses;
diff --git a/modules/dnn/test/test_caffe_importer.cpp b/modules/dnn/test/test_caffe_importer.cpp
index 809b959a21..003592a68a 100644
--- a/modules/dnn/test/test_caffe_importer.cpp
+++ b/modules/dnn/test/test_caffe_importer.cpp
@@ -731,7 +731,7 @@ TEST_P(Test_Caffe_nets, FasterRCNN_vgg16)
         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
 #endif
 
-    double scoreDiff = 0.0;
+    double scoreDiff = 0.0, iouDiff = 0.0;
 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000)
     // Check 'backward_compatible_check || in_out_elements_equal' failed at core/src/op/reshape.cpp:427:
     // While validating node 'v1::Reshape bbox_pred_reshape (bbox_pred[0]:f32{1,84}, Constant_265242[0]:i64{4}) -> (f32{?,?,?,?})' with friendly_name 'bbox_pred_reshape':
@@ -741,11 +741,20 @@ TEST_P(Test_Caffe_nets, FasterRCNN_vgg16)
     if (target == DNN_TARGET_OPENCL_FP16)
         scoreDiff = 0.02;
 #endif
+#if defined(INF_ENGINE_RELEASE)
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) {
+        iouDiff = 0.02;
+        if (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16) {
+            scoreDiff = 0.04;
+            iouDiff = 0.06;
+        }
+    }
+#endif
 
     static Mat ref = (Mat_<float>(3, 7) << 0, 2, 0.949398, 99.2454, 210.141, 601.205, 462.849,
                                            0, 7, 0.997022, 481.841, 92.3218, 722.685, 175.953,
                                            0, 12, 0.993028, 133.221, 189.377, 350.994, 563.166);
-    testFaster("faster_rcnn_vgg16.prototxt", "VGG16_faster_rcnn_final.caffemodel", ref, scoreDiff);
+    testFaster("faster_rcnn_vgg16.prototxt", "VGG16_faster_rcnn_final.caffemodel", ref, scoreDiff, iouDiff);
 }
 
 TEST_P(Test_Caffe_nets, FasterRCNN_zf)
@@ -766,9 +775,6 @@ TEST_P(Test_Caffe_nets, FasterRCNN_zf)
         );
 #endif
 
-    if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ||
-         backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_OPENCL_FP16)
-        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16);
     if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ||
          backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_MYRIAD)
         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD);
@@ -779,7 +785,14 @@ TEST_P(Test_Caffe_nets, FasterRCNN_zf)
     static Mat ref = (Mat_<float>(3, 7) << 0, 2, 0.90121, 120.407, 115.83, 570.586, 528.395,
                                            0, 7, 0.988779, 469.849, 75.1756, 718.64, 186.762,
                                            0, 12, 0.967198, 138.588, 206.843, 329.766, 553.176);
-    testFaster("faster_rcnn_zf.prototxt", "ZF_faster_rcnn_final.caffemodel", ref);
+
+    double scoreDiff = 0.0, iouDiff = 0.0;
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) {
+        scoreDiff = 0.02;
+        iouDiff = 0.13;
+    }
+
+    testFaster("faster_rcnn_zf.prototxt", "ZF_faster_rcnn_final.caffemodel", ref, scoreDiff, iouDiff);
 }
 
 TEST_P(Test_Caffe_nets, RFCN)
@@ -802,8 +815,8 @@ TEST_P(Test_Caffe_nets, RFCN)
         iouDiff = 0.12;
     }
 
-#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000)
-    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16)
+#if defined(INF_ENGINE_RELEASE)
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
     {
         scoreDiff = 0.1f;
         iouDiff = 0.2f;
diff --git a/modules/dnn/test/test_model.cpp b/modules/dnn/test/test_model.cpp
index bd03551ab8..a19923bf28 100644
--- a/modules/dnn/test/test_model.cpp
+++ b/modules/dnn/test/test_model.cpp
@@ -447,14 +447,17 @@ TEST_P(Test_Model, DetectionOutput)
     {
         if (backend == DNN_BACKEND_OPENCV)
             scoreDiff = 4e-3;
-#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2022010000)
-        else if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
-            scoreDiff = 4e-2;
-#endif
         else
             scoreDiff = 2e-2;
         iouDiff = 1.8e-1;
     }
+#if defined(INF_ENGINE_RELEASE)
+        if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+        {
+            scoreDiff = 0.05;
+            iouDiff = 0.08;
+        }
+#endif
 
     testDetectModel(weights_file, config_file, img_path, refClassIds, refConfidences, refBoxes,
                     scoreDiff, iouDiff, confThreshold, nmsThreshold, size, mean);
diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp
index e2dfbc706e..42d98baced 100644
--- a/modules/dnn/test/test_tf_importer.cpp
+++ b/modules/dnn/test/test_tf_importer.cpp
@@ -1816,6 +1816,11 @@ TEST_P(Test_TensorFlow_nets, Mask_RCNN)
 
     double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD || target == DNN_TARGET_CPU_FP16) ? 0.2 : 2e-5;
     double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD || target == DNN_TARGET_CPU_FP16) ? 0.018 : default_lInf;
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+    {
+        scoreDiff = std::max(scoreDiff, 0.02);
+        iouDiff = std::max(iouDiff, 0.009);
+    }
     normAssertDetections(refDetections, outDetections, "", /*threshold for zero confidence*/1e-5, scoreDiff, iouDiff);
 
     // Output size of masks is NxCxHxW where