From 96a8e6d76c6351ba61a13d9f01ab7c9cd79f983f Mon Sep 17 00:00:00 2001
From: Gursimar Singh <gursimar@bigvisionllc.com>
Date: Wed, 3 Jul 2024 16:33:12 +0530
Subject: [PATCH] Merge pull request #25756 from
 gursimarsingh:bug_fix/segmentation_sample

[BUG FIX] Segmentation sample u2netp model results #25756

PR resloves #25753 related to incorrect output from u2netp model in segmentation sample

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [x] There is a reference to the original bug report and related work
- [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [x] The feature is well documented and sample code can be built with the project CMake
---
 samples/dnn/models.yml       |  2 +-
 samples/dnn/segmentation.cpp | 41 ++++++++++++++++++++----------------
 samples/dnn/segmentation.py  | 22 +++++++++----------
 3 files changed, 34 insertions(+), 31 deletions(-)
diff --git a/samples/dnn/models.yml b/samples/dnn/models.yml
index 5577c9ca4e..ae9a3f8dcd 100644
--- a/samples/dnn/models.yml
+++ b/samples/dnn/models.yml
@@ -275,4 +275,4 @@ u2netp:
   width: 320
   height: 320
   rgb: true
-  sample: "segmentation"
\ No newline at end of file
+  sample: "segmentation"
diff --git a/samples/dnn/segmentation.cpp b/samples/dnn/segmentation.cpp
index 4534bfd97f..d017c15c68 100644
--- a/samples/dnn/segmentation.cpp
+++ b/samples/dnn/segmentation.cpp
@@ -79,7 +79,7 @@ int main(int argc, char **argv)
     // Open file with classes names.
     if (parser.has("classes"))
     {
-        string file = parser.get<String>("classes");
+        string file = findFile(parser.get<String>("classes"));
         ifstream ifs(file.c_str());
         if (!ifs.is_open())
             CV_Error(Error::StsError, "File " + file + " not found");
@@ -92,7 +92,7 @@ int main(int argc, char **argv)
     // Open file with colors.
     if (parser.has("colors"))
     {
-        string file = parser.get<String>("colors");
+        string file = findFile(parser.get<String>("colors"));
         ifstream ifs(file.c_str());
         if (!ifs.is_open())
             CV_Error(Error::StsError, "File " + file + " not found");
@@ -146,29 +146,34 @@ int main(int argc, char **argv)
         blobFromImage(frame, blob, scale, Size(inpWidth, inpHeight), mean, swapRB, false);
         //! [Set input blob]
         net.setInput(blob);
-        //! [Make forward pass]
-        Mat score = net.forward();
+        //! [Set input blob]
+
         if (modelName == "u2netp")
         {
-            Mat mask, thresholded_mask, foreground_overlay, background_overlay, foreground_segmented;
-            mask = cv::Mat(score.size[2], score.size[3], CV_32F, score.ptr<float>(0, 0));
-            mask.convertTo(mask, CV_8U, 255);
-            threshold(mask, thresholded_mask, 0, 255, THRESH_BINARY + THRESH_OTSU);
-            resize(thresholded_mask, thresholded_mask, Size(frame.cols, frame.rows), 0, 0, INTER_AREA);
+            vector<Mat> output;
+            net.forward(output, net.getUnconnectedOutLayersNames());
+
+            Mat pred = output[0].reshape(1, output[0].size[2]);
+            pred.convertTo(pred, CV_8U, 255.0);
+            Mat mask;
+            resize(pred, mask, Size(frame.cols, frame.rows), 0, 0, INTER_AREA);
+
             // Create overlays for foreground and background
-            foreground_overlay = Mat::zeros(frame.size(), frame.type());
-            background_overlay = Mat::zeros(frame.size(), frame.type());
-            // Set foreground (object) to red and background to blue
-            foreground_overlay.setTo(Scalar(0, 0, 255), thresholded_mask);
-            Mat inverted_mask;
-            bitwise_not(thresholded_mask, inverted_mask);
-            background_overlay.setTo(Scalar(255, 0, 0), inverted_mask);
+            Mat foreground_overlay;
+
+            // Set foreground (object) to red
+            Mat all_zeros = Mat::zeros(frame.size(), CV_8UC1);
+            vector<Mat> channels = {all_zeros, all_zeros, mask};
+            merge(channels, foreground_overlay);
+
             // Blend the overlays with the original frame
-            addWeighted(frame, 1, foreground_overlay, 0.5, 0, foreground_segmented);
-            addWeighted(foreground_segmented, 1, background_overlay, 0.5, 0, frame);
+            addWeighted(frame, 0.25, foreground_overlay, 0.75, 0, frame);
         }
         else
         {
+            //! [Make forward pass]
+            Mat score = net.forward();
+            //! [Make forward pass]
             Mat segm;
             colorizeSegmentation(score, segm);
             resize(segm, segm, frame.size(), 0, 0, INTER_NEAREST);
diff --git a/samples/dnn/segmentation.py b/samples/dnn/segmentation.py
index f46281f729..bca11bc1d8 100644
--- a/samples/dnn/segmentation.py
+++ b/samples/dnn/segmentation.py
@@ -75,14 +75,14 @@ def showLegend(classes):
         classes = None
 
 # Load a network
-net = cv.dnn.readNet(args.model)
+net = cv.dnn.readNetFromONNX(args.model)
 net.setPreferableBackend(args.backend)
 net.setPreferableTarget(args.target)
 
 winName = 'Deep learning semantic segmentation in OpenCV'
 cv.namedWindow(winName, cv.WINDOW_NORMAL)
 
-cap = cv.VideoCapture(args.input if args.input else 0)
+cap = cv.VideoCapture(cv.samples.findFile(args.input) if args.input else 0)
 legend = None
 while cv.waitKey(1) < 0:
     hasFrame, frame = cap.read()
@@ -96,26 +96,24 @@ while cv.waitKey(1) < 0:
     # Create a 4D blob from a frame.
     inpWidth = args.width if args.width else frameWidth
     inpHeight = args.height if args.height else frameHeight
-    blob = cv.dnn.blobFromImage(frame, args.scale, (inpWidth, inpHeight), args.mean, args.rgb, crop=False)
 
+    blob = cv.dnn.blobFromImage(frame, args.scale, (inpWidth, inpHeight), args.mean, args.rgb, crop=False)
     net.setInput(blob)
-    score = net.forward()
 
     if args.alias == 'u2netp':
-        mask = score[0][0]
-        mask = mask.astype(np.uint8)
-        _, mask = cv.threshold(mask, 0, 255, cv.THRESH_BINARY + cv.THRESH_OTSU)
+        output = net.forward(net.getUnconnectedOutLayersNames())
+        pred = output[0][0, 0, :, :]
+        mask = (pred * 255).astype(np.uint8)
         mask = cv.resize(mask, (frame.shape[1], frame.shape[0]), interpolation=cv.INTER_AREA)
         # Create overlays for foreground and background
         foreground_overlay = np.zeros_like(frame, dtype=np.uint8)
-        background_overlay = np.zeros_like(frame, dtype=np.uint8)
         # Set foreground (object) to red and background to blue
-        foreground_overlay[mask == 255] = [0, 0, 255]  # Red foreground
-        background_overlay[mask == 0] = [255, 0, 0]    # Blue background
+        foreground_overlay[:, :, 2] = mask  # Red foreground
         # Blend the overlays with the original frame
-        foreground_segmented = cv.addWeighted(frame, 1, foreground_overlay, 0.5, 0)
-        frame = cv.addWeighted(foreground_segmented, 1, background_overlay, 0.5, 0)
+        frame = cv.addWeighted(frame, 0.25, foreground_overlay, 0.75, 0)
     else:
+        score = net.forward()
+
         numClasses = score.shape[1]
         height = score.shape[2]
         width = score.shape[3]