Merge pull request #24396 from Tsai-chia-hsiang:yolov8cv

Using cv2 dnn interface to run yolov8 model #24396

This is a sample code for using opencv dnn interface to run ultralytics yolov8 model for object detection.

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [X] I agree to contribute to the project under Apache 2 License.
- [X] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [X] The PR is proposed to the proper branch
- [] There is a reference to the original bug report and related work
- [] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [] The feature is well documented and sample code can be built with the project CMake
pull/24562/head
Chia-Hsiang Tsai 1 year ago committed by GitHub
parent 0e151e3c88
commit 83d70b0f36
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 4
      samples/dnn/common.py
  2. 18
      samples/dnn/models.yml
  3. 36
      samples/dnn/object_detection.py

@ -79,6 +79,10 @@ def add_preproc_args(zoo, parser, sample):
help='Indicate that model works with RGB input images instead BGR ones.') help='Indicate that model works with RGB input images instead BGR ones.')
add_argument(zoo, parser, 'classes', add_argument(zoo, parser, 'classes',
help='Optional path to a text file with names of classes to label detected objects.') help='Optional path to a text file with names of classes to label detected objects.')
add_argument(zoo, parser, 'postprocessing', type=str,
help='Post-processing kind depends on model topology.')
add_argument(zoo, parser, 'background_label_id', type=int, default=-1,
help='An index of background class in predictions. If not negative, exclude such class from list of classes.')
def findFile(filename): def findFile(filename):

@ -33,6 +33,7 @@ yolov4:
height: 416 height: 416
rgb: true rgb: true
classes: "object_detection_classes_yolo.txt" classes: "object_detection_classes_yolo.txt"
background_label_id: 0
sample: "object_detection" sample: "object_detection"
yolov4-tiny: yolov4-tiny:
@ -47,6 +48,7 @@ yolov4-tiny:
height: 416 height: 416
rgb: true rgb: true
classes: "object_detection_classes_yolo.txt" classes: "object_detection_classes_yolo.txt"
background_label_id: 0
sample: "object_detection" sample: "object_detection"
yolov3: yolov3:
@ -61,6 +63,7 @@ yolov3:
height: 416 height: 416
rgb: true rgb: true
classes: "object_detection_classes_yolo.txt" classes: "object_detection_classes_yolo.txt"
background_label_id: 0
sample: "object_detection" sample: "object_detection"
tiny-yolo-voc: tiny-yolo-voc:
@ -75,6 +78,21 @@ tiny-yolo-voc:
height: 416 height: 416
rgb: true rgb: true
classes: "object_detection_classes_pascal_voc.txt" classes: "object_detection_classes_pascal_voc.txt"
background_label_id: 0
sample: "object_detection"
yolov8:
load_info:
url: "https://github.com/CVHub520/X-AnyLabeling/releases/download/v0.1.0/yolov8n.onnx"
sha1: "68f864475d06e2ec4037181052739f268eeac38d"
model: "yolov8n.onnx"
mean: [0, 0, 0]
scale: 0.00392
width: 640
height: 640
rgb: true
postprocessing: "yolov8"
classes: "object_detection_classes_yolo.txt"
sample: "object_detection" sample: "object_detection"
# Caffe implementation of SSD model from https://github.com/chuanqi305/MobileNet-SSD # Caffe implementation of SSD model from https://github.com/chuanqi305/MobileNet-SSD

@ -2,6 +2,7 @@ import cv2 as cv
import argparse import argparse
import numpy as np import numpy as np
import sys import sys
import copy
import time import time
from threading import Thread from threading import Thread
if sys.version_info[0] == 2: if sys.version_info[0] == 2:
@ -27,7 +28,7 @@ parser.add_argument('--out_tf_graph', default='graph.pbtxt',
help='For models from TensorFlow Object Detection API, you may ' help='For models from TensorFlow Object Detection API, you may '
'pass a .config file which was used for training through --config ' 'pass a .config file which was used for training through --config '
'argument. This way an additional .pbtxt file with TensorFlow graph will be created.') 'argument. This way an additional .pbtxt file with TensorFlow graph will be created.')
parser.add_argument('--framework', choices=['caffe', 'tensorflow', 'torch', 'darknet', 'dldt'], parser.add_argument('--framework', choices=['caffe', 'tensorflow', 'torch', 'darknet', 'dldt', 'onnx'],
help='Optional name of an origin framework of the model. ' help='Optional name of an origin framework of the model. '
'Detect it automatically if it does not set.') 'Detect it automatically if it does not set.')
parser.add_argument('--thr', type=float, default=0.5, help='Confidence threshold') parser.add_argument('--thr', type=float, default=0.5, help='Confidence threshold')
@ -86,7 +87,7 @@ if args.classes:
classes = f.read().rstrip('\n').split('\n') classes = f.read().rstrip('\n').split('\n')
# Load a network # Load a network
net = cv.dnn.readNet(cv.samples.findFile(args.model), cv.samples.findFile(args.config), args.framework) net = cv.dnn.readNet(args.model, args.config, args.framework)
net.setPreferableBackend(args.backend) net.setPreferableBackend(args.backend)
net.setPreferableTarget(args.target) net.setPreferableTarget(args.target)
outNames = net.getUnconnectedOutLayersNames() outNames = net.getUnconnectedOutLayersNames()
@ -145,20 +146,32 @@ def postprocess(frame, outs):
classIds.append(int(detection[1]) - 1) # Skip background label classIds.append(int(detection[1]) - 1) # Skip background label
confidences.append(float(confidence)) confidences.append(float(confidence))
boxes.append([left, top, width, height]) boxes.append([left, top, width, height])
elif lastLayer.type == 'Region': elif lastLayer.type == 'Region' or args.postprocessing == 'yolov8':
# Network produces output blob with a shape NxC where N is a number of # Network produces output blob with a shape NxC where N is a number of
# detected objects and C is a number of classes + 4 where the first 4 # detected objects and C is a number of classes + 4 where the first 4
# numbers are [center_x, center_y, width, height] # numbers are [center_x, center_y, width, height]
if args.postprocessing == 'yolov8':
box_scale_w = frameWidth / args.width
box_scale_h = frameHeight / args.height
else:
box_scale_w = frameWidth
box_scale_h = frameHeight
for out in outs: for out in outs:
if args.postprocessing == 'yolov8':
out = out[0].transpose(1, 0)
for detection in out: for detection in out:
scores = detection[5:] scores = detection[4:]
if args.background_label_id >= 0:
scores = np.delete(scores, args.background_label_id)
classId = np.argmax(scores) classId = np.argmax(scores)
confidence = scores[classId] confidence = scores[classId]
if confidence > confThreshold: if confidence > confThreshold:
center_x = int(detection[0] * frameWidth) center_x = int(detection[0] * box_scale_w)
center_y = int(detection[1] * frameHeight) center_y = int(detection[1] * box_scale_h)
width = int(detection[2] * frameWidth) width = int(detection[2] * box_scale_w)
height = int(detection[3] * frameHeight) height = int(detection[3] * box_scale_h)
left = int(center_x - width / 2) left = int(center_x - width / 2)
top = int(center_y - height / 2) top = int(center_y - height / 2)
classIds.append(classId) classIds.append(classId)
@ -170,7 +183,7 @@ def postprocess(frame, outs):
# NMS is used inside Region layer only on DNN_BACKEND_OPENCV for another backends we need NMS in sample # NMS is used inside Region layer only on DNN_BACKEND_OPENCV for another backends we need NMS in sample
# or NMS is required if number of outputs > 1 # or NMS is required if number of outputs > 1
if len(outNames) > 1 or lastLayer.type == 'Region' and args.backend != cv.dnn.DNN_BACKEND_OPENCV: if len(outNames) > 1 or (lastLayer.type == 'Region' or args.postprocessing == 'yolov8') and args.backend != cv.dnn.DNN_BACKEND_OPENCV:
indices = [] indices = []
classIds = np.array(classIds) classIds = np.array(classIds)
boxes = np.array(boxes) boxes = np.array(boxes)
@ -181,7 +194,6 @@ def postprocess(frame, outs):
conf = confidences[class_indices] conf = confidences[class_indices]
box = boxes[class_indices].tolist() box = boxes[class_indices].tolist()
nms_indices = cv.dnn.NMSBoxes(box, conf, confThreshold, nmsThreshold) nms_indices = cv.dnn.NMSBoxes(box, conf, confThreshold, nmsThreshold)
nms_indices = nms_indices[:, 0] if len(nms_indices) else []
indices.extend(class_indices[nms_indices]) indices.extend(class_indices[nms_indices])
else: else:
indices = np.arange(0, len(classIds)) indices = np.arange(0, len(classIds))
@ -282,11 +294,11 @@ def processingThreadBody():
futureOutputs.append(net.forwardAsync()) futureOutputs.append(net.forwardAsync())
else: else:
outs = net.forward(outNames) outs = net.forward(outNames)
predictionsQueue.put(np.copy(outs)) predictionsQueue.put(copy.deepcopy(outs))
while futureOutputs and futureOutputs[0].wait_for(0): while futureOutputs and futureOutputs[0].wait_for(0):
out = futureOutputs[0].get() out = futureOutputs[0].get()
predictionsQueue.put(np.copy([out])) predictionsQueue.put(copy.deepcopy([out]))
del futureOutputs[0] del futureOutputs[0]

Loading…
Cancel
Save