From 96a8e6d76c6351ba61a13d9f01ab7c9cd79f983f Mon Sep 17 00:00:00 2001 From: Gursimar Singh Date: Wed, 3 Jul 2024 16:33:12 +0530 Subject: [PATCH] Merge pull request #25756 from gursimarsingh:bug_fix/segmentation_sample [BUG FIX] Segmentation sample u2netp model results #25756 PR resloves #25753 related to incorrect output from u2netp model in segmentation sample ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake --- samples/dnn/models.yml | 2 +- samples/dnn/segmentation.cpp | 41 ++++++++++++++++++++---------------- samples/dnn/segmentation.py | 22 +++++++++---------- 3 files changed, 34 insertions(+), 31 deletions(-) diff --git a/samples/dnn/models.yml b/samples/dnn/models.yml index 5577c9ca4e..ae9a3f8dcd 100644 --- a/samples/dnn/models.yml +++ b/samples/dnn/models.yml @@ -275,4 +275,4 @@ u2netp: width: 320 height: 320 rgb: true - sample: "segmentation" \ No newline at end of file + sample: "segmentation" diff --git a/samples/dnn/segmentation.cpp b/samples/dnn/segmentation.cpp index 4534bfd97f..d017c15c68 100644 --- a/samples/dnn/segmentation.cpp +++ b/samples/dnn/segmentation.cpp @@ -79,7 +79,7 @@ int main(int argc, char **argv) // Open file with classes names. if (parser.has("classes")) { - string file = parser.get("classes"); + string file = findFile(parser.get("classes")); ifstream ifs(file.c_str()); if (!ifs.is_open()) CV_Error(Error::StsError, "File " + file + " not found"); @@ -92,7 +92,7 @@ int main(int argc, char **argv) // Open file with colors. if (parser.has("colors")) { - string file = parser.get("colors"); + string file = findFile(parser.get("colors")); ifstream ifs(file.c_str()); if (!ifs.is_open()) CV_Error(Error::StsError, "File " + file + " not found"); @@ -146,29 +146,34 @@ int main(int argc, char **argv) blobFromImage(frame, blob, scale, Size(inpWidth, inpHeight), mean, swapRB, false); //! [Set input blob] net.setInput(blob); - //! [Make forward pass] - Mat score = net.forward(); + //! [Set input blob] + if (modelName == "u2netp") { - Mat mask, thresholded_mask, foreground_overlay, background_overlay, foreground_segmented; - mask = cv::Mat(score.size[2], score.size[3], CV_32F, score.ptr(0, 0)); - mask.convertTo(mask, CV_8U, 255); - threshold(mask, thresholded_mask, 0, 255, THRESH_BINARY + THRESH_OTSU); - resize(thresholded_mask, thresholded_mask, Size(frame.cols, frame.rows), 0, 0, INTER_AREA); + vector output; + net.forward(output, net.getUnconnectedOutLayersNames()); + + Mat pred = output[0].reshape(1, output[0].size[2]); + pred.convertTo(pred, CV_8U, 255.0); + Mat mask; + resize(pred, mask, Size(frame.cols, frame.rows), 0, 0, INTER_AREA); + // Create overlays for foreground and background - foreground_overlay = Mat::zeros(frame.size(), frame.type()); - background_overlay = Mat::zeros(frame.size(), frame.type()); - // Set foreground (object) to red and background to blue - foreground_overlay.setTo(Scalar(0, 0, 255), thresholded_mask); - Mat inverted_mask; - bitwise_not(thresholded_mask, inverted_mask); - background_overlay.setTo(Scalar(255, 0, 0), inverted_mask); + Mat foreground_overlay; + + // Set foreground (object) to red + Mat all_zeros = Mat::zeros(frame.size(), CV_8UC1); + vector channels = {all_zeros, all_zeros, mask}; + merge(channels, foreground_overlay); + // Blend the overlays with the original frame - addWeighted(frame, 1, foreground_overlay, 0.5, 0, foreground_segmented); - addWeighted(foreground_segmented, 1, background_overlay, 0.5, 0, frame); + addWeighted(frame, 0.25, foreground_overlay, 0.75, 0, frame); } else { + //! [Make forward pass] + Mat score = net.forward(); + //! [Make forward pass] Mat segm; colorizeSegmentation(score, segm); resize(segm, segm, frame.size(), 0, 0, INTER_NEAREST); diff --git a/samples/dnn/segmentation.py b/samples/dnn/segmentation.py index f46281f729..bca11bc1d8 100644 --- a/samples/dnn/segmentation.py +++ b/samples/dnn/segmentation.py @@ -75,14 +75,14 @@ def showLegend(classes): classes = None # Load a network -net = cv.dnn.readNet(args.model) +net = cv.dnn.readNetFromONNX(args.model) net.setPreferableBackend(args.backend) net.setPreferableTarget(args.target) winName = 'Deep learning semantic segmentation in OpenCV' cv.namedWindow(winName, cv.WINDOW_NORMAL) -cap = cv.VideoCapture(args.input if args.input else 0) +cap = cv.VideoCapture(cv.samples.findFile(args.input) if args.input else 0) legend = None while cv.waitKey(1) < 0: hasFrame, frame = cap.read() @@ -96,26 +96,24 @@ while cv.waitKey(1) < 0: # Create a 4D blob from a frame. inpWidth = args.width if args.width else frameWidth inpHeight = args.height if args.height else frameHeight - blob = cv.dnn.blobFromImage(frame, args.scale, (inpWidth, inpHeight), args.mean, args.rgb, crop=False) + blob = cv.dnn.blobFromImage(frame, args.scale, (inpWidth, inpHeight), args.mean, args.rgb, crop=False) net.setInput(blob) - score = net.forward() if args.alias == 'u2netp': - mask = score[0][0] - mask = mask.astype(np.uint8) - _, mask = cv.threshold(mask, 0, 255, cv.THRESH_BINARY + cv.THRESH_OTSU) + output = net.forward(net.getUnconnectedOutLayersNames()) + pred = output[0][0, 0, :, :] + mask = (pred * 255).astype(np.uint8) mask = cv.resize(mask, (frame.shape[1], frame.shape[0]), interpolation=cv.INTER_AREA) # Create overlays for foreground and background foreground_overlay = np.zeros_like(frame, dtype=np.uint8) - background_overlay = np.zeros_like(frame, dtype=np.uint8) # Set foreground (object) to red and background to blue - foreground_overlay[mask == 255] = [0, 0, 255] # Red foreground - background_overlay[mask == 0] = [255, 0, 0] # Blue background + foreground_overlay[:, :, 2] = mask # Red foreground # Blend the overlays with the original frame - foreground_segmented = cv.addWeighted(frame, 1, foreground_overlay, 0.5, 0) - frame = cv.addWeighted(foreground_segmented, 1, background_overlay, 0.5, 0) + frame = cv.addWeighted(frame, 0.25, foreground_overlay, 0.75, 0) else: + score = net.forward() + numClasses = score.shape[1] height = score.shape[2] width = score.shape[3]