Introduced `BaseSolution` class for Ultralytics solutions (#16671)
Co-authored-by: UltralyticsAssistant <web@ultralytics.com> Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com>pull/16690/head
parent
e5d3427a52
commit
70ba988c68
6 changed files with 275 additions and 303 deletions
@ -0,0 +1,12 @@ |
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license |
||||
|
||||
# Configuration for Ultralytics Solutions |
||||
|
||||
model: "yolo11n.pt" # The Ultralytics YOLO11 model to be used (e.g., yolo11n.pt for YOLO11 nano version) |
||||
|
||||
region: # Object counting, queue or speed estimation region points |
||||
line_width: 2 # Thickness of the lines used to draw regions on the image/video frames |
||||
show: True # Flag to control whether to display output image or not |
||||
show_in: True # Flag to display objects moving *into* the defined region |
||||
show_out: True # Flag to display objects moving *out of* the defined region |
||||
classes: # To count specific classes |
@ -1,243 +1,129 @@ |
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license |
||||
|
||||
from collections import defaultdict |
||||
from shapely.geometry import LineString, Point |
||||
|
||||
import cv2 |
||||
|
||||
from ultralytics.utils.checks import check_imshow, check_requirements |
||||
from ultralytics.solutions.solutions import BaseSolution # Import a parent class |
||||
from ultralytics.utils.plotting import Annotator, colors |
||||
|
||||
check_requirements("shapely>=2.0.0") |
||||
|
||||
from shapely.geometry import LineString, Point, Polygon |
||||
class ObjectCounter(BaseSolution): |
||||
"""A class to manage the counting of objects in a real-time video stream based on their tracks.""" |
||||
|
||||
def __init__(self, **kwargs): |
||||
"""Initialization function for Count class, a child class of BaseSolution class, can be used for counting the |
||||
objects. |
||||
""" |
||||
super().__init__(**kwargs) |
||||
|
||||
class ObjectCounter: |
||||
"""A class to manage the counting of objects in a real-time video stream based on their tracks.""" |
||||
self.in_count = 0 # Counter for objects moving inward |
||||
self.out_count = 0 # Counter for objects moving outward |
||||
self.counted_ids = [] # List of IDs of objects that have been counted |
||||
self.classwise_counts = {} # Dictionary for counts, categorized by object class |
||||
|
||||
def __init__( |
||||
self, |
||||
names, |
||||
reg_pts=None, |
||||
line_thickness=2, |
||||
view_img=False, |
||||
view_in_counts=True, |
||||
view_out_counts=True, |
||||
draw_tracks=False, |
||||
): |
||||
self.initialize_region() # Setup region and counting areas |
||||
|
||||
self.show_in = self.CFG["show_in"] |
||||
self.show_out = self.CFG["show_out"] |
||||
|
||||
def count_objects(self, track_line, box, track_id, prev_position, cls): |
||||
""" |
||||
Initializes the ObjectCounter with various tracking and counting parameters. |
||||
Helper function to count objects within a polygonal region. |
||||
|
||||
Args: |
||||
names (dict): Dictionary of class names. |
||||
reg_pts (list): List of points defining the counting region. |
||||
line_thickness (int): Line thickness for bounding boxes. |
||||
view_img (bool): Flag to control whether to display the video stream. |
||||
view_in_counts (bool): Flag to control whether to display the in counts on the video stream. |
||||
view_out_counts (bool): Flag to control whether to display the out counts on the video stream. |
||||
draw_tracks (bool): Flag to control whether to draw the object tracks. |
||||
track_line (dict): last 30 frame track record |
||||
box (list): Bounding box data for specific track in current frame |
||||
track_id (int): track ID of the object |
||||
prev_position (tuple): last frame position coordinates of the track |
||||
cls (int): Class index for classwise count updates |
||||
""" |
||||
# Mouse events |
||||
self.is_drawing = False |
||||
self.selected_point = None |
||||
|
||||
# Region & Line Information |
||||
self.reg_pts = [(20, 400), (1260, 400)] if reg_pts is None else reg_pts |
||||
self.counting_region = None |
||||
|
||||
# Image and annotation Information |
||||
self.im0 = None |
||||
self.tf = line_thickness |
||||
self.view_img = view_img |
||||
self.view_in_counts = view_in_counts |
||||
self.view_out_counts = view_out_counts |
||||
|
||||
self.names = names # Classes names |
||||
self.window_name = "Ultralytics YOLOv8 Object Counter" |
||||
|
||||
# Object counting Information |
||||
self.in_counts = 0 |
||||
self.out_counts = 0 |
||||
self.count_ids = [] |
||||
self.class_wise_count = {} |
||||
|
||||
# Tracks info |
||||
self.track_history = defaultdict(list) |
||||
self.draw_tracks = draw_tracks |
||||
|
||||
# Check if environment supports imshow |
||||
self.env_check = check_imshow(warn=True) |
||||
|
||||
# Initialize counting region |
||||
if len(self.reg_pts) == 2: |
||||
print("Line Counter Initiated.") |
||||
self.counting_region = LineString(self.reg_pts) |
||||
elif len(self.reg_pts) >= 3: |
||||
print("Polygon Counter Initiated.") |
||||
self.counting_region = Polygon(self.reg_pts) |
||||
else: |
||||
print("Invalid Region points provided, region_points must be 2 for lines or >= 3 for polygons.") |
||||
print("Using Line Counter Now") |
||||
self.counting_region = LineString(self.reg_pts) |
||||
|
||||
# Define the counting line segment |
||||
self.counting_line_segment = LineString( |
||||
[ |
||||
(self.reg_pts[0][0], self.reg_pts[0][1]), |
||||
(self.reg_pts[1][0], self.reg_pts[1][1]), |
||||
] |
||||
) |
||||
|
||||
def mouse_event_for_region(self, event, x, y, flags, params): |
||||
if prev_position is None or track_id in self.counted_ids: |
||||
return |
||||
|
||||
centroid = self.r_s.centroid |
||||
dx = (box[0] - prev_position[0]) * (centroid.x - prev_position[0]) |
||||
dy = (box[1] - prev_position[1]) * (centroid.y - prev_position[1]) |
||||
|
||||
if len(self.region) >= 3 and self.r_s.contains(Point(track_line[-1])): |
||||
self.counted_ids.append(track_id) |
||||
# For polygon region |
||||
if dx > 0: |
||||
self.in_count += 1 |
||||
self.classwise_counts[self.names[cls]]["IN"] += 1 |
||||
else: |
||||
self.out_count += 1 |
||||
self.classwise_counts[self.names[cls]]["OUT"] += 1 |
||||
|
||||
elif len(self.region) < 3 and LineString([prev_position, box[:2]]).intersects(self.l_s): |
||||
self.counted_ids.append(track_id) |
||||
# For linear region |
||||
if dx > 0 and dy > 0: |
||||
self.in_count += 1 |
||||
self.classwise_counts[self.names[cls]]["IN"] += 1 |
||||
else: |
||||
self.out_count += 1 |
||||
self.classwise_counts[self.names[cls]]["OUT"] += 1 |
||||
|
||||
def store_classwise_counts(self, cls): |
||||
""" |
||||
Handles mouse events for defining and moving the counting region in a real-time video stream. |
||||
Initialize class-wise counts if not already present. |
||||
|
||||
Args: |
||||
event (int): The type of mouse event (e.g., cv2.EVENT_MOUSEMOVE, cv2.EVENT_LBUTTONDOWN, etc.). |
||||
x (int): The x-coordinate of the mouse pointer. |
||||
y (int): The y-coordinate of the mouse pointer. |
||||
flags (int): Any associated event flags (e.g., cv2.EVENT_FLAG_CTRLKEY, cv2.EVENT_FLAG_SHIFTKEY, etc.). |
||||
params (dict): Additional parameters for the function. |
||||
cls (int): Class index for classwise count updates |
||||
""" |
||||
if event == cv2.EVENT_LBUTTONDOWN: |
||||
for i, point in enumerate(self.reg_pts): |
||||
if ( |
||||
isinstance(point, (tuple, list)) |
||||
and len(point) >= 2 |
||||
and (abs(x - point[0]) < 10 and abs(y - point[1]) < 10) |
||||
): |
||||
self.selected_point = i |
||||
self.is_drawing = True |
||||
break |
||||
|
||||
elif event == cv2.EVENT_MOUSEMOVE: |
||||
if self.is_drawing and self.selected_point is not None: |
||||
self.reg_pts[self.selected_point] = (x, y) |
||||
self.counting_region = Polygon(self.reg_pts) |
||||
|
||||
elif event == cv2.EVENT_LBUTTONUP: |
||||
self.is_drawing = False |
||||
self.selected_point = None |
||||
|
||||
def extract_and_process_tracks(self, tracks): |
||||
"""Extracts and processes tracks for object counting in a video stream.""" |
||||
# Annotator Init and region drawing |
||||
annotator = Annotator(self.im0, self.tf, self.names) |
||||
|
||||
# Draw region or line |
||||
annotator.draw_region(reg_pts=self.reg_pts, color=(104, 0, 123), thickness=self.tf * 2) |
||||
|
||||
# Extract tracks for OBB or object detection |
||||
track_data = tracks[0].obb or tracks[0].boxes |
||||
|
||||
if track_data and track_data.id is not None: |
||||
boxes = track_data.xyxy.cpu() |
||||
clss = track_data.cls.cpu().tolist() |
||||
track_ids = track_data.id.int().cpu().tolist() |
||||
|
||||
# Extract tracks |
||||
for box, track_id, cls in zip(boxes, track_ids, clss): |
||||
# Draw bounding box |
||||
annotator.box_label(box, label=self.names[cls], color=colors(int(track_id), True)) |
||||
|
||||
# Store class info |
||||
if self.names[cls] not in self.class_wise_count: |
||||
self.class_wise_count[self.names[cls]] = {"IN": 0, "OUT": 0} |
||||
|
||||
# Draw Tracks |
||||
track_line = self.track_history[track_id] |
||||
track_line.append((float((box[0] + box[2]) / 2), float((box[1] + box[3]) / 2))) |
||||
if len(track_line) > 30: |
||||
track_line.pop(0) |
||||
|
||||
# Draw track trails |
||||
if self.draw_tracks: |
||||
annotator.draw_centroid_and_tracks( |
||||
track_line, |
||||
color=colors(int(track_id), True), |
||||
track_thickness=self.tf, |
||||
) |
||||
if self.names[cls] not in self.classwise_counts: |
||||
self.classwise_counts[self.names[cls]] = {"IN": 0, "OUT": 0} |
||||
|
||||
prev_position = self.track_history[track_id][-2] if len(self.track_history[track_id]) > 1 else None |
||||
def display_counts(self, im0): |
||||
""" |
||||
Helper function to display object counts on the frame. |
||||
|
||||
# Count objects in any polygon |
||||
if len(self.reg_pts) >= 3: |
||||
is_inside = self.counting_region.contains(Point(track_line[-1])) |
||||
|
||||
if prev_position is not None and is_inside and track_id not in self.count_ids: |
||||
self.count_ids.append(track_id) |
||||
|
||||
if (box[0] - prev_position[0]) * (self.counting_region.centroid.x - prev_position[0]) > 0: |
||||
self.in_counts += 1 |
||||
self.class_wise_count[self.names[cls]]["IN"] += 1 |
||||
else: |
||||
self.out_counts += 1 |
||||
self.class_wise_count[self.names[cls]]["OUT"] += 1 |
||||
|
||||
# Count objects using line |
||||
elif len(self.reg_pts) == 2: |
||||
if ( |
||||
prev_position is not None |
||||
and track_id not in self.count_ids |
||||
and LineString([(prev_position[0], prev_position[1]), (box[0], box[1])]).intersects( |
||||
self.counting_line_segment |
||||
) |
||||
): |
||||
self.count_ids.append(track_id) |
||||
|
||||
# Determine the direction of movement (IN or OUT) |
||||
dx = (box[0] - prev_position[0]) * (self.counting_region.centroid.x - prev_position[0]) |
||||
dy = (box[1] - prev_position[1]) * (self.counting_region.centroid.y - prev_position[1]) |
||||
if dx > 0 and dy > 0: |
||||
self.in_counts += 1 |
||||
self.class_wise_count[self.names[cls]]["IN"] += 1 |
||||
else: |
||||
self.out_counts += 1 |
||||
self.class_wise_count[self.names[cls]]["OUT"] += 1 |
||||
|
||||
labels_dict = {} |
||||
|
||||
for key, value in self.class_wise_count.items(): |
||||
if value["IN"] != 0 or value["OUT"] != 0: |
||||
if not self.view_in_counts and not self.view_out_counts: |
||||
continue |
||||
elif not self.view_in_counts: |
||||
labels_dict[str.capitalize(key)] = f"OUT {value['OUT']}" |
||||
elif not self.view_out_counts: |
||||
labels_dict[str.capitalize(key)] = f"IN {value['IN']}" |
||||
else: |
||||
labels_dict[str.capitalize(key)] = f"IN {value['IN']} OUT {value['OUT']}" |
||||
Args: |
||||
im0 (ndarray): The input image or frame |
||||
""" |
||||
labels_dict = { |
||||
str.capitalize(key): f"{'IN ' + str(value['IN']) if self.show_in else ''} " |
||||
f"{'OUT ' + str(value['OUT']) if self.show_out else ''}".strip() |
||||
for key, value in self.classwise_counts.items() |
||||
if value["IN"] != 0 or value["OUT"] != 0 |
||||
} |
||||
|
||||
if labels_dict: |
||||
annotator.display_analytics(self.im0, labels_dict, (104, 31, 17), (255, 255, 255), 10) |
||||
|
||||
def display_frames(self): |
||||
"""Displays the current frame with annotations and regions in a window.""" |
||||
if self.env_check: |
||||
cv2.namedWindow(self.window_name) |
||||
if len(self.reg_pts) == 4: # only add mouse event If user drawn region |
||||
cv2.setMouseCallback(self.window_name, self.mouse_event_for_region, {"region_points": self.reg_pts}) |
||||
cv2.imshow(self.window_name, self.im0) |
||||
# Break Window |
||||
if cv2.waitKey(1) & 0xFF == ord("q"): |
||||
return |
||||
|
||||
def start_counting(self, im0, tracks): |
||||
self.annotator.display_analytics(im0, labels_dict, (104, 31, 17), (255, 255, 255), 10) |
||||
|
||||
def count(self, im0): |
||||
""" |
||||
Main function to start the object counting process. |
||||
Processes input data (frames or object tracks) and updates counts. |
||||
|
||||
Args: |
||||
im0 (ndarray): Current frame from the video stream. |
||||
tracks (list): List of tracks obtained from the object tracking process. |
||||
im0 (ndarray): The input image that will be used for processing |
||||
Returns |
||||
im0 (ndarray): The processed image for more usage |
||||
""" |
||||
self.im0 = im0 # store image |
||||
self.extract_and_process_tracks(tracks) # draw region even if no objects |
||||
|
||||
if self.view_img: |
||||
self.display_frames() |
||||
return self.im0 |
||||
self.annotator = Annotator(im0, line_width=self.line_width) # Initialize annotator |
||||
self.extract_tracks(im0) # Extract tracks |
||||
|
||||
self.annotator.draw_region( |
||||
reg_pts=self.region, color=(104, 0, 123), thickness=self.line_width * 2 |
||||
) # Draw region |
||||
|
||||
# Iterate over bounding boxes, track ids and classes index |
||||
if self.track_data is not None and self.track_data.id is not None: |
||||
for box, track_id, cls in zip(self.boxes, self.track_ids, self.clss): |
||||
# Draw bounding box and counting region |
||||
self.annotator.box_label(box, label=self.names[cls], color=colors(track_id, True)) |
||||
self.store_tracking_history(track_id, box) # Store track history |
||||
self.store_classwise_counts(cls) # store classwise counts in dict |
||||
|
||||
# Draw centroid of objects |
||||
self.annotator.draw_centroid_and_tracks( |
||||
self.track_line, color=colors(int(track_id), True), track_thickness=self.line_width |
||||
) |
||||
|
||||
# store previous position of track for object counting |
||||
prev_position = self.track_history[track_id][-2] if len(self.track_history[track_id]) > 1 else None |
||||
self.count_objects(self.track_line, box, track_id, prev_position, cls) # Perform object counting |
||||
|
||||
self.display_counts(im0) # Display the counts on the frame |
||||
self.display_output(im0) # display output with base class function |
||||
|
||||
if __name__ == "__main__": |
||||
classes_names = {0: "person", 1: "car"} # example class names |
||||
ObjectCounter(classes_names) |
||||
return im0 # return output image for more usage |
||||
|
@ -0,0 +1,88 @@ |
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license |
||||
|
||||
from collections import defaultdict |
||||
from pathlib import Path |
||||
|
||||
import cv2 |
||||
from shapely.geometry import LineString, Polygon |
||||
|
||||
from ultralytics import YOLO |
||||
from ultralytics.utils import yaml_load |
||||
from ultralytics.utils.checks import check_imshow |
||||
|
||||
DEFAULT_SOL_CFG_PATH = Path(__file__).resolve().parents[1] / "cfg/solutions/default.yaml" |
||||
|
||||
|
||||
class BaseSolution: |
||||
"""A class to manage all the Ultralytics Solutions: https://docs.ultralytics.com/solutions/.""" |
||||
|
||||
def __init__(self, **kwargs): |
||||
""" |
||||
Base initializer for all solutions. |
||||
|
||||
Child classes should call this with necessary parameters. |
||||
""" |
||||
# Load config and update with args |
||||
self.CFG = yaml_load(DEFAULT_SOL_CFG_PATH) |
||||
self.CFG.update(kwargs) |
||||
print("Ultralytics Solutions: ✅", self.CFG) |
||||
|
||||
self.region = self.CFG["region"] # Store region data for other classes usage |
||||
self.line_width = self.CFG["line_width"] # Store line_width for usage |
||||
|
||||
# Load Model and store classes names |
||||
self.model = YOLO(self.CFG["model"]) |
||||
self.names = self.model.names |
||||
|
||||
# Initialize environment and region setup |
||||
self.env_check = check_imshow(warn=True) |
||||
self.track_history = defaultdict(list) |
||||
|
||||
def extract_tracks(self, im0): |
||||
""" |
||||
Apply object tracking and extract tracks. |
||||
|
||||
Args: |
||||
im0 (ndarray): The input image or frame |
||||
""" |
||||
self.tracks = self.model.track(source=im0, persist=True, classes=self.CFG["classes"]) |
||||
|
||||
# Extract tracks for OBB or object detection |
||||
self.track_data = self.tracks[0].obb or self.tracks[0].boxes |
||||
|
||||
if self.track_data and self.track_data.id is not None: |
||||
self.boxes = self.track_data.xyxy.cpu() |
||||
self.clss = self.track_data.cls.cpu().tolist() |
||||
self.track_ids = self.track_data.id.int().cpu().tolist() |
||||
|
||||
def store_tracking_history(self, track_id, box): |
||||
""" |
||||
Store object tracking history. |
||||
|
||||
Args: |
||||
track_id (int): The track ID of the object |
||||
box (list): Bounding box coordinates of the object |
||||
""" |
||||
# Store tracking history |
||||
self.track_line = self.track_history[track_id] |
||||
self.track_line.append(((box[0] + box[2]) / 2, (box[1] + box[3]) / 2)) |
||||
if len(self.track_line) > 30: |
||||
self.track_line.pop(0) |
||||
|
||||
def initialize_region(self): |
||||
"""Initialize the counting region and line segment based on config.""" |
||||
self.region = [(20, 400), (1260, 400)] if self.region is None else self.region |
||||
self.r_s = Polygon(self.region) if len(self.region) >= 3 else LineString(self.region) |
||||
self.l_s = LineString([(self.region[0][0], self.region[0][1]), (self.region[1][0], self.region[1][1])]) |
||||
|
||||
def display_output(self, im0): |
||||
""" |
||||
Display the results of the processing, which could involve showing frames, printing counts, or saving results. |
||||
|
||||
Args: |
||||
im0 (ndarray): The input image or frame |
||||
""" |
||||
if self.CFG.get("show") and self.env_check: |
||||
cv2.imshow("Ultralytics Solutions", im0) |
||||
if cv2.waitKey(1) & 0xFF == ord("q"): |
||||
return |
Loading…
Reference in new issue