Add `docformatter` to pre-commit (#5279)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Burhan <62214284+Burhan-Q@users.noreply.github.com>
pull/5262/head
Glenn Jocher 1 year ago committed by GitHub
parent c7aa83da31
commit 7517667a33
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 5
      .pre-commit-config.yaml
  2. 44
      docs/build_reference.py
  3. 4
      docs/reference/models/utils/ops.md
  4. 9
      examples/YOLOv8-ONNXRuntime/main.py
  5. 41
      examples/YOLOv8-OpenCV-ONNX-Python/main.py
  6. 8
      examples/YOLOv8-Region-Counter/yolov8_region_counter.py
  7. 9
      setup.cfg
  8. 8
      setup.py
  9. 9
      tests/conftest.py
  10. 11
      tests/test_cli.py
  11. 10
      tests/test_cuda.py
  12. 5
      tests/test_engine.py
  13. 72
      tests/test_python.py
  14. 9
      ultralytics/cfg/__init__.py
  15. 263
      ultralytics/data/augment.py
  16. 3
      ultralytics/data/base.py
  17. 12
      ultralytics/data/build.py
  18. 7
      ultralytics/data/converter.py
  19. 18
      ultralytics/data/dataset.py
  20. 118
      ultralytics/data/loaders.py
  21. 6
      ultralytics/data/utils.py
  22. 4
      ultralytics/engine/exporter.py
  23. 12
      ultralytics/engine/model.py
  24. 16
      ultralytics/engine/predictor.py
  25. 26
      ultralytics/engine/results.py
  26. 50
      ultralytics/engine/trainer.py
  27. 40
      ultralytics/engine/tuner.py
  28. 7
      ultralytics/engine/validator.py
  29. 4
      ultralytics/hub/__init__.py
  30. 21
      ultralytics/hub/auth.py
  31. 1
      ultralytics/hub/session.py
  32. 4
      ultralytics/hub/utils.py
  33. 3
      ultralytics/models/fastsam/model.py
  34. 2
      ultralytics/models/fastsam/predict.py
  35. 16
      ultralytics/models/fastsam/prompt.py
  36. 4
      ultralytics/models/nas/model.py
  37. 10
      ultralytics/models/rtdetr/model.py
  38. 3
      ultralytics/models/rtdetr/predict.py
  39. 3
      ultralytics/models/rtdetr/train.py
  40. 1
      ultralytics/models/rtdetr/val.py
  41. 18
      ultralytics/models/sam/amg.py
  42. 11
      ultralytics/models/sam/model.py
  43. 6
      ultralytics/models/sam/modules/decoders.py
  44. 27
      ultralytics/models/sam/modules/encoders.py
  45. 40
      ultralytics/models/sam/modules/tiny_encoder.py
  46. 6
      ultralytics/models/sam/modules/transformer.py
  47. 12
      ultralytics/models/sam/predict.py
  48. 43
      ultralytics/models/utils/loss.py
  49. 32
      ultralytics/models/utils/ops.py
  50. 1
      ultralytics/models/yolo/classify/predict.py
  51. 6
      ultralytics/models/yolo/classify/train.py
  52. 1
      ultralytics/models/yolo/classify/val.py
  53. 7
      ultralytics/models/yolo/detect/train.py
  54. 6
      ultralytics/models/yolo/model.py
  55. 1
      ultralytics/models/yolo/pose/predict.py
  56. 2
      ultralytics/models/yolo/segment/predict.py
  57. 2
      ultralytics/models/yolo/segment/val.py
  58. 77
      ultralytics/nn/autobackend.py
  59. 22
      ultralytics/nn/modules/__init__.py
  60. 16
      ultralytics/nn/modules/block.py
  61. 23
      ultralytics/nn/modules/conv.py
  62. 9
      ultralytics/nn/modules/head.py
  63. 62
      ultralytics/nn/modules/transformer.py
  64. 10
      ultralytics/nn/modules/utils.py
  65. 82
      ultralytics/nn/tasks.py
  66. 59
      ultralytics/trackers/bot_sort.py
  67. 86
      ultralytics/trackers/byte_tracker.py
  68. 1
      ultralytics/trackers/track.py
  69. 23
      ultralytics/trackers/utils/gmc.py
  70. 12
      ultralytics/trackers/utils/kalman_filter.py
  71. 49
      ultralytics/utils/__init__.py
  72. 4
      ultralytics/utils/autobatch.py
  73. 15
      ultralytics/utils/benchmarks.py
  74. 4
      ultralytics/utils/callbacks/base.py
  75. 14
      ultralytics/utils/callbacks/comet.py
  76. 9
      ultralytics/utils/callbacks/dvc.py
  77. 11
      ultralytics/utils/callbacks/neptune.py
  78. 1
      ultralytics/utils/callbacks/wb.py
  79. 22
      ultralytics/utils/checks.py
  80. 3
      ultralytics/utils/dist.py
  81. 10
      ultralytics/utils/downloads.py
  82. 6
      ultralytics/utils/files.py
  83. 15
      ultralytics/utils/instance.py
  84. 8
      ultralytics/utils/loss.py
  85. 68
      ultralytics/utils/metrics.py
  86. 19
      ultralytics/utils/ops.py
  87. 16
      ultralytics/utils/patches.py
  88. 3
      ultralytics/utils/plotting.py
  89. 14
      ultralytics/utils/torch_utils.py
  90. 3
      ultralytics/utils/triton.py

@ -62,6 +62,11 @@ repos:
args:
- --ignore-words-list=crate,nd,strack,dota,ane,segway,fo
- repo: https://github.com/PyCQA/docformatter
rev: v1.7.5
hooks:
- id: docformatter
# - repo: https://github.com/asottile/yesqa
# rev: v1.4.0
# hooks:

@ -18,7 +18,15 @@ CODE_DIR = ROOT
REFERENCE_DIR = ROOT.parent / 'docs/reference'
def extract_classes_and_functions(filepath):
def extract_classes_and_functions(filepath: Path):
"""Extracts class and function names from a given Python file.
Args:
filepath (Path): The path to the Python file.
Returns:
(tuple): A tuple containing lists of class and function names.
"""
with open(filepath, 'r') as file:
content = file.read()
@ -31,7 +39,15 @@ def extract_classes_and_functions(filepath):
return classes, functions
def create_markdown(py_filepath, module_path, classes, functions):
def create_markdown(py_filepath: Path, module_path: str, classes: list, functions: list):
"""Creates a Markdown file containing the API reference for the given Python module.
Args:
py_filepath (Path): The path to the Python file.
module_path (str): The import path for the Python module.
classes (list): A list of class names within the module.
functions (list): A list of function names within the module.
"""
md_filepath = py_filepath.with_suffix('.md')
# Read existing content and keep header content between first two ---
@ -64,17 +80,35 @@ def create_markdown(py_filepath, module_path, classes, functions):
def nested_dict():
"""Creates and returns a nested defaultdict.
Returns:
(defaultdict): A nested defaultdict object.
"""
return defaultdict(nested_dict)
def sort_nested_dict(d):
def sort_nested_dict(d: dict):
"""Sorts a nested dictionary recursively.
Args:
d (dict): The dictionary to sort.
Returns:
(dict): The sorted dictionary.
"""
return {
key: sort_nested_dict(value) if isinstance(value, dict) else value
for key, value in sorted(d.items())
}
def create_nav_menu_yaml(nav_items):
def create_nav_menu_yaml(nav_items: list):
"""Creates a YAML file for the navigation menu based on the provided list of items.
Args:
nav_items (list): A list of relative file paths to Markdown files for the navigation menu.
"""
nav_tree = nested_dict()
for item_str in nav_items:
@ -90,6 +124,7 @@ def create_nav_menu_yaml(nav_items):
nav_tree_sorted = sort_nested_dict(nav_tree)
def _dict_to_yaml(d, level=0):
"""Converts a nested dictionary to a YAML-formatted string with indentation."""
yaml_str = ''
indent = ' ' * level
for k, v in d.items():
@ -105,6 +140,7 @@ def create_nav_menu_yaml(nav_items):
def main():
"""Main function to extract class and function names, create Markdown files, and generate a YAML navigation menu."""
nav_items = []
for root, _, files in os.walk(CODE_DIR):
for file in files:

@ -16,7 +16,3 @@ keywords: Ultralytics, YOLO, HungarianMatcher, inverse_sigmoid, detection models
---
## ::: ultralytics.models.utils.ops.get_cdn_group
<br><br>
---
## ::: ultralytics.models.utils.ops.inverse_sigmoid
<br><br>

@ -9,11 +9,12 @@ from ultralytics.utils import ASSETS, yaml_load
from ultralytics.utils.checks import check_requirements, check_yaml
class Yolov8:
class YOLOv8:
"""YOLOv8 object detection model class for handling inference and visualization."""
def __init__(self, onnx_model, input_image, confidence_thres, iou_thres):
"""
Initializes an instance of the Yolov8 class.
Initializes an instance of the YOLOv8 class.
Args:
onnx_model: Path to the ONNX model.
@ -213,8 +214,8 @@ if __name__ == '__main__':
# Check the requirements and select the appropriate backend (CPU or GPU)
check_requirements('onnxruntime-gpu' if torch.cuda.is_available() else 'onnxruntime')
# Create an instance of the Yolov8 class with the specified arguments
detection = Yolov8(args.model, args.img, args.conf_thres, args.iou_thres)
# Create an instance of the YOLOv8 class with the specified arguments
detection = YOLOv8(args.model, args.img, args.conf_thres, args.iou_thres)
# Perform object detection and obtain the output image
output_image = detection.main()

@ -7,11 +7,22 @@ from ultralytics.utils import ASSETS, yaml_load
from ultralytics.utils.checks import check_yaml
CLASSES = yaml_load(check_yaml('coco128.yaml'))['names']
colors = np.random.uniform(0, 255, size=(len(CLASSES), 3))
def draw_bounding_box(img, class_id, confidence, x, y, x_plus_w, y_plus_h):
"""
Draws bounding boxes on the input image based on the provided arguments.
Args:
img (numpy.ndarray): The input image to draw the bounding box on.
class_id (int): Class ID of the detected object.
confidence (float): Confidence score of the detected object.
x (int): X-coordinate of the top-left corner of the bounding box.
y (int): Y-coordinate of the top-left corner of the bounding box.
x_plus_w (int): X-coordinate of the bottom-right corner of the bounding box.
y_plus_h (int): Y-coordinate of the bottom-right corner of the bounding box.
"""
label = f'{CLASSES[class_id]} ({confidence:.2f})'
color = colors[class_id]
cv2.rectangle(img, (x, y), (x_plus_w, y_plus_h), color, 2)
@ -19,18 +30,39 @@ def draw_bounding_box(img, class_id, confidence, x, y, x_plus_w, y_plus_h):
def main(onnx_model, input_image):
"""
Main function to load ONNX model, perform inference, draw bounding boxes, and display the output image.
Args:
onnx_model (str): Path to the ONNX model.
input_image (str): Path to the input image.
Returns:
list: List of dictionaries containing detection information such as class_id, class_name, confidence, etc.
"""
# Load the ONNX model
model: cv2.dnn.Net = cv2.dnn.readNetFromONNX(onnx_model)
# Read the input image
original_image: np.ndarray = cv2.imread(input_image)
[height, width, _] = original_image.shape
# Prepare a square image for inference
length = max((height, width))
image = np.zeros((length, length, 3), np.uint8)
image[0:height, 0:width] = original_image
# Calculate scale factor
scale = length / 640
# Preprocess the image and prepare blob for model
blob = cv2.dnn.blobFromImage(image, scalefactor=1 / 255, size=(640, 640), swapRB=True)
model.setInput(blob)
# Perform inference
outputs = model.forward()
# Prepare output array
outputs = np.array([cv2.transpose(outputs[0])])
rows = outputs.shape[1]
@ -38,6 +70,7 @@ def main(onnx_model, input_image):
scores = []
class_ids = []
# Iterate through output to collect bounding boxes, confidence scores, and class IDs
for i in range(rows):
classes_scores = outputs[0][i][4:]
(minScore, maxScore, minClassLoc, (x, maxClassIndex)) = cv2.minMaxLoc(classes_scores)
@ -49,9 +82,12 @@ def main(onnx_model, input_image):
scores.append(maxScore)
class_ids.append(maxClassIndex)
# Apply NMS (Non-maximum suppression)
result_boxes = cv2.dnn.NMSBoxes(boxes, scores, 0.25, 0.45, 0.5)
detections = []
# Iterate through NMS results to draw bounding boxes and labels
for i in range(len(result_boxes)):
index = result_boxes[i]
box = boxes[index]
@ -65,6 +101,7 @@ def main(onnx_model, input_image):
draw_bounding_box(original_image, class_ids[index], scores[index], round(box[0] * scale), round(box[1] * scale),
round((box[0] + box[2]) * scale), round((box[1] + box[3]) * scale))
# Display the image with bounding boxes
cv2.imshow('image', original_image)
cv2.waitKey(0)
cv2.destroyAllWindows()
@ -74,7 +111,7 @@ def main(onnx_model, input_image):
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--model', default='yolov8n.onnx', help='Input your onnx model.')
parser.add_argument('--model', default='yolov8n.onnx', help='Input your ONNX model.')
parser.add_argument('--img', default=str(ASSETS / 'bus.jpg'), help='Path to input image.')
args = parser.parse_args()
main(args.model, args.img)

@ -33,10 +33,6 @@ counting_regions = [
}, ]
def is_inside_polygon(point, polygon):
return polygon.contains(Point(point))
def mouse_callback(event, x, y, flags, param):
"""Mouse call back event."""
global current_region
@ -44,7 +40,7 @@ def mouse_callback(event, x, y, flags, param):
# Mouse left button down event
if event == cv2.EVENT_LBUTTONDOWN:
for region in counting_regions:
if is_inside_polygon((x, y), region['polygon']):
if region['polygon'].contains(Point((x, y))):
current_region = region
current_region['dragging'] = True
current_region['offset_x'] = x
@ -150,7 +146,7 @@ def run(
# Check if detection inside region
for region in counting_regions:
if is_inside_polygon((x, y), region['polygon']):
if region['polygon'].contains(Point((x, y))):
region['counts'] += 1
# Draw regions (Polygons/Rectangles)

@ -60,3 +60,12 @@ SPACE_BETWEEN_ENDING_COMMA_AND_CLOSING_BRACKET = True
SPLIT_BEFORE_CLOSING_BRACKET = False
SPLIT_BEFORE_FIRST_ARGUMENT = False
# EACH_DICT_ENTRY_ON_SEPARATE_LINE = False
[docformatter]
wrap-summaries = 120
wrap-descriptions = 120
in-place = true
make-summary-multi-line = false
pre-summary-newline = true
force-wrap = false
close-quotes-on-newline = true

@ -12,6 +12,12 @@ README = (PARENT / 'README.md').read_text(encoding='utf-8')
def get_version():
"""
Retrieve the version number from the 'ultralytics/__init__.py' file.
Returns:
(str): The version number extracted from the '__version__' attribute in the 'ultralytics/__init__.py' file.
"""
file = PARENT / 'ultralytics/__init__.py'
return re.search(r'^__version__ = [\'"]([^\'"]*)[\'"]', file.read_text(encoding='utf-8'), re.M)[1]
@ -24,7 +30,7 @@ def parse_requirements(file_path: Path):
file_path (str | Path): Path to the requirements.txt file.
Returns:
List[str]: List of parsed requirements.
(List[str]): List of parsed requirements.
"""
requirements = []

@ -9,7 +9,8 @@ TMP = Path(__file__).resolve().parent / 'tmp' # temp directory for test files
def pytest_addoption(parser):
"""Add custom command-line options to pytest.
"""
Add custom command-line options to pytest.
Args:
parser (pytest.config.Parser): The pytest parser object.
@ -18,7 +19,8 @@ def pytest_addoption(parser):
def pytest_configure(config):
"""Register custom markers to avoid pytest warnings.
"""
Register custom markers to avoid pytest warnings.
Args:
config (pytest.config.Config): The pytest config object.
@ -27,7 +29,8 @@ def pytest_configure(config):
def pytest_runtest_setup(item):
"""Setup hook to skip tests marked as slow if the --slow option is not provided.
"""
Setup hook to skip tests marked as slow if the --slow option is not provided.
Args:
item (pytest.Item): The test item object.

@ -22,11 +22,12 @@ EXPORT_ARGS = [
def run(cmd):
# Run a subprocess command with check=True
"""Execute a shell command using subprocess."""
subprocess.run(cmd.split(), check=True)
def test_special_modes():
"""Test various special command modes of YOLO."""
run('yolo help')
run('yolo checks')
run('yolo version')
@ -36,31 +37,37 @@ def test_special_modes():
@pytest.mark.parametrize('task,model,data', TASK_ARGS)
def test_train(task, model, data):
"""Test YOLO training for a given task, model, and data."""
run(f'yolo train {task} model={model}.yaml data={data} imgsz=32 epochs=1 cache=disk')
@pytest.mark.parametrize('task,model,data', TASK_ARGS)
def test_val(task, model, data):
"""Test YOLO validation for a given task, model, and data."""
run(f'yolo val {task} model={WEIGHTS_DIR / model}.pt data={data} imgsz=32 save_txt save_json')
@pytest.mark.parametrize('task,model,data', TASK_ARGS)
def test_predict(task, model, data):
"""Test YOLO prediction on sample assets for a given task and model."""
run(f'yolo predict model={WEIGHTS_DIR / model}.pt source={ASSETS} imgsz=32 save save_crop save_txt')
@pytest.mark.parametrize('model,format', EXPORT_ARGS)
def test_export(model, format):
"""Test exporting a YOLO model to different formats."""
run(f'yolo export model={WEIGHTS_DIR / model}.pt format={format} imgsz=32')
def test_rtdetr(task='detect', model='yolov8n-rtdetr.yaml', data='coco8.yaml'):
"""Test the RTDETR functionality with the Ultralytics framework."""
# Warning: MUST use imgsz=640
run(f'yolo train {task} model={model} data={data} --imgsz= 640 epochs =1, cache = disk') # add coma, spaces to args
run(f"yolo predict {task} model={model} source={ASSETS / 'bus.jpg'} imgsz=640 save save_crop save_txt")
def test_fastsam(task='segment', model=WEIGHTS_DIR / 'FastSAM-s.pt', data='coco8-seg.yaml'):
"""Test FastSAM segmentation functionality within Ultralytics."""
source = ASSETS / 'bus.jpg'
run(f'yolo segment val {task} model={model} data={data} imgsz=32')
@ -97,6 +104,7 @@ def test_fastsam(task='segment', model=WEIGHTS_DIR / 'FastSAM-s.pt', data='coco8
def test_mobilesam():
"""Test MobileSAM segmentation functionality using Ultralytics."""
from ultralytics import SAM
# Load the model
@ -121,5 +129,6 @@ def test_mobilesam():
@pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available')
@pytest.mark.skipif(CUDA_DEVICE_COUNT < 2, reason='DDP is not available')
def test_train_gpu(task, model, data):
"""Test YOLO training on GPU(s) for various tasks and models."""
run(f'yolo train {task} model={model}.yaml data={data} imgsz=32 epochs=1 device=0') # single GPU
run(f'yolo train {task} model={model}.pt data={data} imgsz=32 epochs=1 device=0,1') # multi GPU

@ -1,4 +1,5 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
import contextlib
import pytest
@ -17,18 +18,21 @@ BUS = ASSETS / 'bus.jpg'
def test_checks():
"""Validate CUDA settings against torch CUDA functions."""
assert torch.cuda.is_available() == CUDA_IS_AVAILABLE
assert torch.cuda.device_count() == CUDA_DEVICE_COUNT
@pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available')
def test_train():
"""Test model training on a minimal dataset."""
device = 0 if CUDA_DEVICE_COUNT == 1 else [0, 1]
YOLO(MODEL).train(data=DATA, imgsz=64, epochs=1, device=device) # requires imgsz>=64
@pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available')
def test_predict_multiple_devices():
"""Validate model prediction on multiple devices."""
model = YOLO('yolov8n.pt')
model = model.cpu()
assert str(model.device) == 'cpu'
@ -53,6 +57,7 @@ def test_predict_multiple_devices():
@pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available')
def test_autobatch():
"""Check batch size for YOLO model using autobatch."""
from ultralytics.utils.autobatch import check_train_batch_size
check_train_batch_size(YOLO(MODEL).model.cuda(), imgsz=128, amp=True)
@ -60,6 +65,7 @@ def test_autobatch():
@pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available')
def test_utils_benchmarks():
"""Profile YOLO models for performance benchmarks."""
from ultralytics.utils.benchmarks import ProfileModels
# Pre-export a dynamic engine model to use dynamic inference
@ -69,6 +75,7 @@ def test_utils_benchmarks():
@pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available')
def test_predict_sam():
"""Test SAM model prediction with various prompts."""
from ultralytics import SAM
from ultralytics.models.sam import Predictor as SAMPredictor
@ -102,6 +109,7 @@ def test_predict_sam():
@pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available')
def test_model_ray_tune():
"""Tune YOLO model with Ray optimization library."""
with contextlib.suppress(RuntimeError): # RuntimeError may be caused by out-of-memory
YOLO('yolov8n-cls.yaml').tune(use_ray=True,
data='imagenet10',
@ -115,12 +123,14 @@ def test_model_ray_tune():
@pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available')
def test_model_tune():
"""Tune YOLO model for performance."""
YOLO('yolov8n-pose.pt').tune(data='coco8-pose.yaml', plots=False, imgsz=32, epochs=1, iterations=2, device='cpu')
YOLO('yolov8n-cls.pt').tune(data='imagenet10', plots=False, imgsz=32, epochs=1, iterations=2, device='cpu')
@pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available')
def test_pycocotools():
"""Validate model predictions using pycocotools."""
from ultralytics.models.yolo.detect import DetectionValidator
from ultralytics.models.yolo.pose import PoseValidator
from ultralytics.models.yolo.segment import SegmentationValidator

@ -14,10 +14,12 @@ MODEL = WEIGHTS_DIR / 'yolov8n'
def test_func(*args): # noqa
"""Test function callback."""
print('callback test passed')
def test_export():
"""Test model exporting functionality."""
exporter = Exporter()
exporter.add_callback('on_export_start', test_func)
assert test_func in exporter.callbacks['on_export_start'], 'callback test failed'
@ -26,6 +28,7 @@ def test_export():
def test_detect():
"""Test object detection functionality."""
overrides = {'data': 'coco8.yaml', 'model': CFG_DET, 'imgsz': 32, 'epochs': 1, 'save': False}
CFG.data = 'coco8.yaml'
CFG.imgsz = 32
@ -61,6 +64,7 @@ def test_detect():
def test_segment():
"""Test image segmentation functionality."""
overrides = {'data': 'coco8-seg.yaml', 'model': CFG_SEG, 'imgsz': 32, 'epochs': 1, 'save': False}
CFG.data = 'coco8-seg.yaml'
CFG.imgsz = 32
@ -98,6 +102,7 @@ def test_segment():
def test_classify():
"""Test image classification functionality."""
overrides = {'data': 'imagenet10', 'model': CFG_CLS, 'imgsz': 32, 'epochs': 1, 'save': False}
CFG.data = 'imagenet10'
CFG.imgsz = 32

@ -27,11 +27,13 @@ IS_TMP_WRITEABLE = is_dir_writeable(TMP)
def test_model_forward():
"""Test the forward pass of the YOLO model."""
model = YOLO(CFG)
model(source=None, imgsz=32, augment=True) # also test no source and augment
def test_model_methods():
"""Test various methods and properties of the YOLO model."""
model = YOLO(MODEL)
# Model methods
@ -51,7 +53,7 @@ def test_model_methods():
def test_model_profile():
# Test profile=True model argument
"""Test profiling of the YOLO model with 'profile=True' argument."""
from ultralytics.nn.tasks import DetectionModel
model = DetectionModel() # build model
@ -61,7 +63,7 @@ def test_model_profile():
@pytest.mark.skipif(not IS_TMP_WRITEABLE, reason='directory is not writeable')
def test_predict_txt():
# Write a list of sources (file, dir, glob, recursive glob) to a txt file
"""Test YOLO predictions with sources (file, dir, glob, recursive glob) specified in a text file."""
txt_file = TMP / 'sources.txt'
with open(txt_file, 'w') as f:
for x in [ASSETS / 'bus.jpg', ASSETS, ASSETS / '*', ASSETS / '**/*.jpg']:
@ -70,6 +72,7 @@ def test_predict_txt():
def test_predict_img():
"""Test YOLO prediction on various types of image sources."""
model = YOLO(MODEL)
seg_model = YOLO(WEIGHTS_DIR / 'yolov8n-seg.pt')
cls_model = YOLO(WEIGHTS_DIR / 'yolov8n-cls.pt')
@ -105,7 +108,7 @@ def test_predict_img():
def test_predict_grey_and_4ch():
# Convert SOURCE to greyscale and 4-ch
"""Test YOLO prediction on SOURCE converted to greyscale and 4-channel images."""
im = Image.open(SOURCE)
directory = TMP / 'im4'
directory.mkdir(parents=True, exist_ok=True)
@ -132,8 +135,11 @@ def test_predict_grey_and_4ch():
@pytest.mark.skipif(not ONLINE, reason='environment is offline')
@pytest.mark.skipif(not IS_TMP_WRITEABLE, reason='directory is not writeable')
def test_track_stream():
# Test YouTube streaming inference (short 10 frame video) with non-default ByteTrack tracker
# imgsz=160 required for tracking for higher confidence and better matches
"""
Test YouTube streaming tracking (short 10 frame video) with non-default ByteTrack tracker.
Note imgsz=160 required for tracking for higher confidence and better matches
"""
import yaml
model = YOLO(MODEL)
@ -153,37 +159,44 @@ def test_track_stream():
def test_val():
"""Test the validation mode of the YOLO model."""
YOLO(MODEL).val(data='coco8.yaml', imgsz=32, save_hybrid=True)
def test_train_scratch():
"""Test training the YOLO model from scratch."""
model = YOLO(CFG)
model.train(data='coco8.yaml', epochs=2, imgsz=32, cache='disk', batch=-1, close_mosaic=1, name='model')
model(SOURCE)
def test_train_pretrained():
"""Test training the YOLO model from a pre-trained state."""
model = YOLO(WEIGHTS_DIR / 'yolov8n-seg.pt')
model.train(data='coco8-seg.yaml', epochs=1, imgsz=32, cache='ram', copy_paste=0.5, mixup=0.5, name=0)
model(SOURCE)
def test_export_torchscript():
"""Test exporting the YOLO model to TorchScript format."""
f = YOLO(MODEL).export(format='torchscript', optimize=False)
YOLO(f)(SOURCE) # exported model inference
def test_export_onnx():
"""Test exporting the YOLO model to ONNX format."""
f = YOLO(MODEL).export(format='onnx', dynamic=True)
YOLO(f)(SOURCE) # exported model inference
def test_export_openvino():
"""Test exporting the YOLO model to OpenVINO format."""
f = YOLO(MODEL).export(format='openvino')
YOLO(f)(SOURCE) # exported model inference
def test_export_coreml():
"""Test exporting the YOLO model to CoreML format."""
if not WINDOWS: # RuntimeError: BlobWriter not loaded with coremltools 7.0 on windows
if MACOS:
f = YOLO(MODEL).export(format='coreml')
@ -193,7 +206,11 @@ def test_export_coreml():
def test_export_tflite(enabled=False):
# TF suffers from install conflicts on Windows and macOS
"""
Test exporting the YOLO model to TFLite format.
Note TF suffers from install conflicts on Windows and macOS.
"""
if enabled and LINUX:
model = YOLO(MODEL)
f = model.export(format='tflite')
@ -201,7 +218,11 @@ def test_export_tflite(enabled=False):
def test_export_pb(enabled=False):
# TF suffers from install conflicts on Windows and macOS
"""
Test exporting the YOLO model to *.pb format.
Note TF suffers from install conflicts on Windows and macOS.
"""
if enabled and LINUX:
model = YOLO(MODEL)
f = model.export(format='pb')
@ -209,18 +230,24 @@ def test_export_pb(enabled=False):
def test_export_paddle(enabled=False):
# Paddle protobuf requirements conflicting with onnx protobuf requirements
"""
Test exporting the YOLO model to Paddle format.
Note Paddle protobuf requirements conflicting with onnx protobuf requirements.
"""
if enabled:
YOLO(MODEL).export(format='paddle')
@pytest.mark.slow
def test_export_ncnn():
"""Test exporting the YOLO model to NCNN format."""
f = YOLO(MODEL).export(format='ncnn')
YOLO(f)(SOURCE) # exported model inference
def test_all_model_yamls():
"""Test YOLO model creation for all available YAML configurations."""
for m in (ROOT / 'cfg' / 'models').rglob('*.yaml'):
if 'rtdetr' in m.name:
if TORCH_1_9: # torch<=1.8 issue - TypeError: __init__() got an unexpected keyword argument 'batch_first'
@ -230,6 +257,7 @@ def test_all_model_yamls():
def test_workflow():
"""Test the complete workflow including training, validation, prediction, and exporting."""
model = YOLO(MODEL)
model.train(data='coco8.yaml', epochs=1, imgsz=32, optimizer='SGD')
model.val(imgsz=32)
@ -238,12 +266,14 @@ def test_workflow():
def test_predict_callback_and_setup():
# Test callback addition for prediction
def on_predict_batch_end(predictor): # results -> List[batch_size]
"""Test callback functionality during YOLO prediction."""
def on_predict_batch_end(predictor):
"""Callback function that handles operations at the end of a prediction batch."""
path, im0s, _, _ = predictor.batch
im0s = im0s if isinstance(im0s, list) else [im0s]
bs = [predictor.dataset.bs for _ in range(len(path))]
predictor.results = zip(predictor.results, im0s, bs)
predictor.results = zip(predictor.results, im0s, bs) # results is List[batch_size]
model = YOLO(MODEL)
model.add_callback('on_predict_batch_end', on_predict_batch_end)
@ -259,6 +289,7 @@ def test_predict_callback_and_setup():
def test_results():
"""Test various result formats for the YOLO model."""
for m in 'yolov8n-pose.pt', 'yolov8n-seg.pt', 'yolov8n.pt', 'yolov8n-cls.pt':
results = YOLO(WEIGHTS_DIR / m)([SOURCE, SOURCE], imgsz=160)
for r in results:
@ -274,7 +305,7 @@ def test_results():
@pytest.mark.skipif(not ONLINE, reason='environment is offline')
def test_data_utils():
# Test functions in ultralytics/data/utils.py
"""Test utility functions in ultralytics/data/utils.py."""
from ultralytics.data.utils import HUBDatasetStats, autosplit
from ultralytics.utils.downloads import zip_directory
@ -294,7 +325,7 @@ def test_data_utils():
@pytest.mark.skipif(not ONLINE, reason='environment is offline')
def test_data_converter():
# Test dataset converters
"""Test dataset converters."""
from ultralytics.data.converter import coco80_to_coco91_class, convert_coco
file = 'instances_val2017.json'
@ -304,6 +335,7 @@ def test_data_converter():
def test_data_annotator():
"""Test automatic data annotation."""
from ultralytics.data.annotator import auto_annotate
auto_annotate(ASSETS,
@ -313,7 +345,7 @@ def test_data_annotator():
def test_events():
# Test event sending
"""Test event sending functionality."""
from ultralytics.hub.utils import Events
events = Events()
@ -324,6 +356,7 @@ def test_events():
def test_cfg_init():
"""Test configuration initialization utilities."""
from ultralytics.cfg import check_dict_alignment, copy_default_cfg, smart_value
with contextlib.suppress(SyntaxError):
@ -334,6 +367,7 @@ def test_cfg_init():
def test_utils_init():
"""Test initialization utilities."""
from ultralytics.utils import get_git_branch, get_git_origin_url, get_ubuntu_version, is_github_actions_ci
get_ubuntu_version()
@ -343,6 +377,7 @@ def test_utils_init():
def test_utils_checks():
"""Test various utility checks."""
checks.check_yolov5u_filename('yolov5n.pt')
checks.git_describe(ROOT)
checks.check_requirements() # check requirements.txt
@ -354,12 +389,14 @@ def test_utils_checks():
def test_utils_benchmarks():
"""Test model benchmarking."""
from ultralytics.utils.benchmarks import ProfileModels
ProfileModels(['yolov8n.yaml'], imgsz=32, min_time=1, num_timed_runs=3, num_warmup_runs=1).profile()
def test_utils_torchutils():
"""Test Torch utility functions."""
from ultralytics.nn.modules.conv import Conv
from ultralytics.utils.torch_utils import get_flops_with_torch_profiler, profile, time_sync
@ -373,12 +410,14 @@ def test_utils_torchutils():
@pytest.mark.skipif(not ONLINE, reason='environment is offline')
def test_utils_downloads():
"""Test file download utilities."""
from ultralytics.utils.downloads import get_google_drive_file_info
get_google_drive_file_info('https://drive.google.com/file/d/1cqT-cJgANNrhIHCrEufUYhQ4RqiWG_lJ/view?usp=drive_link')
def test_utils_ops():
"""Test various operations utilities."""
from ultralytics.utils.ops import (ltwh2xywh, ltwh2xyxy, make_divisible, xywh2ltwh, xywh2xyxy, xywhn2xyxy,
xywhr2xyxyxyxy, xyxy2ltwh, xyxy2xywh, xyxy2xywhn, xyxyxyxy2xywhr)
@ -396,6 +435,7 @@ def test_utils_ops():
def test_utils_files():
"""Test file handling utilities."""
from ultralytics.utils.files import file_age, file_date, get_latest_run, spaces_in_path
file_age(SOURCE)
@ -409,6 +449,7 @@ def test_utils_files():
def test_nn_modules_conv():
"""Test Convolutional Neural Network modules."""
from ultralytics.nn.modules.conv import CBAM, Conv2, ConvTranspose, DWConvTranspose2d, Focus
c1, c2 = 8, 16 # input and output channels
@ -427,6 +468,7 @@ def test_nn_modules_conv():
def test_nn_modules_block():
"""Test Neural Network block modules."""
from ultralytics.nn.modules.block import C1, C3TR, BottleneckCSP, C3Ghost, C3x
c1, c2 = 8, 16 # input and output channels
@ -442,6 +484,7 @@ def test_nn_modules_block():
@pytest.mark.skipif(not ONLINE, reason='environment is offline')
def test_hub():
"""Test Ultralytics HUB functionalities."""
from ultralytics.hub import export_fmts_hub, logout
from ultralytics.hub.utils import smart_request
@ -453,6 +496,7 @@ def test_hub():
@pytest.mark.slow
@pytest.mark.skipif(not ONLINE, reason='environment is offline')
def test_triton():
"""Test NVIDIA Triton Server functionalities."""
checks.check_requirements('tritonclient[all]')
import subprocess
import time

@ -180,8 +180,8 @@ def _handle_deprecation(custom):
def check_dict_alignment(base: Dict, custom: Dict, e=None):
"""
This function checks for any mismatched keys between a custom configuration list and a base configuration list.
If any mismatched keys are found, the function prints out similar keys from the base list and exits the program.
This function checks for any mismatched keys between a custom configuration list and a base configuration list. If
any mismatched keys are found, the function prints out similar keys from the base list and exits the program.
Args:
custom (dict): a dictionary of custom configuration options
@ -205,9 +205,8 @@ def check_dict_alignment(base: Dict, custom: Dict, e=None):
def merge_equals_args(args: List[str]) -> List[str]:
"""
Merges arguments around isolated '=' args in a list of strings.
The function considers cases where the first argument ends with '=' or the second starts with '=',
as well as when the middle one is an equals sign.
Merges arguments around isolated '=' args in a list of strings. The function considers cases where the first
argument ends with '=' or the second starts with '=', as well as when the middle one is an equals sign.
Args:
args (List[str]): A list of strings where each element is an argument.

@ -20,16 +20,30 @@ from .utils import polygons2masks, polygons2masks_overlap
# TODO: we might need a BaseTransform to make all these augments be compatible with both classification and semantic
class BaseTransform:
"""
Base class for image transformations.
This is a generic transformation class that can be extended for specific image processing needs.
The class is designed to be compatible with both classification and semantic segmentation tasks.
Methods:
__init__: Initializes the BaseTransform object.
apply_image: Applies image transformation to labels.
apply_instances: Applies transformations to object instances in labels.
apply_semantic: Applies semantic segmentation to an image.
__call__: Applies all label transformations to an image, instances, and semantic masks.
"""
def __init__(self) -> None:
"""Initializes the BaseTransform object."""
pass
def apply_image(self, labels):
"""Applies image transformation to labels."""
"""Applies image transformations to labels."""
pass
def apply_instances(self, labels):
"""Applies transformations to input 'labels' and returns object instances."""
"""Applies transformations to object instances in labels."""
pass
def apply_semantic(self, labels):
@ -37,13 +51,14 @@ class BaseTransform:
pass
def __call__(self, labels):
"""Applies label transformations to an image, instances and semantic masks."""
"""Applies all label transformations to an image, instances, and semantic masks."""
self.apply_image(labels)
self.apply_instances(labels)
self.apply_semantic(labels)
class Compose:
"""Class for composing multiple image transformations."""
def __init__(self, transforms):
"""Initializes the Compose object with a list of transforms."""
@ -60,18 +75,23 @@ class Compose:
self.transforms.append(transform)
def tolist(self):
"""Converts list of transforms to a standard Python list."""
"""Converts the list of transforms to a standard Python list."""
return self.transforms
def __repr__(self):
"""Return string representation of object."""
"""Returns a string representation of the object."""
return f"{self.__class__.__name__}({', '.join([f'{t}' for t in self.transforms])})"
class BaseMixTransform:
"""This implementation is from mmyolo."""
"""
Class for base mix (MixUp/Mosaic) transformations.
This implementation is from mmyolo.
"""
def __init__(self, dataset, pre_transform=None, p=0.0) -> None:
"""Initializes the BaseMixTransform object with dataset, pre_transform, and probability."""
self.dataset = dataset
self.pre_transform = pre_transform
self.p = p
@ -262,8 +282,10 @@ class Mosaic(BaseMixTransform):
class MixUp(BaseMixTransform):
"""Class for applying MixUp augmentation to the dataset."""
def __init__(self, dataset, pre_transform=None, p=0.0) -> None:
"""Initializes MixUp object with dataset, pre_transform, and probability of applying MixUp."""
super().__init__(dataset=dataset, pre_transform=pre_transform, p=p)
def get_indexes(self):
@ -271,7 +293,7 @@ class MixUp(BaseMixTransform):
return random.randint(0, len(self.dataset) - 1)
def _mix_transform(self, labels):
"""Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf."""
"""Applies MixUp augmentation as per https://arxiv.org/pdf/1710.09412.pdf."""
r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0
labels2 = labels['mix_labels'][0]
labels['img'] = (labels['img'] * r + labels2['img'] * (1 - r)).astype(np.uint8)
@ -281,6 +303,28 @@ class MixUp(BaseMixTransform):
class RandomPerspective:
"""
Implements random perspective and affine transformations on images and corresponding bounding boxes, segments, and
keypoints. These transformations include rotation, translation, scaling, and shearing. The class also offers the
option to apply these transformations conditionally with a specified probability.
Attributes:
degrees (float): Degree range for random rotations.
translate (float): Fraction of total width and height for random translation.
scale (float): Scaling factor interval, e.g., a scale factor of 0.1 allows a resize between 90%-110%.
shear (float): Shear intensity (angle in degrees).
perspective (float): Perspective distortion factor.
border (tuple): Tuple specifying mosaic border.
pre_transform (callable): A function/transform to apply to the image before starting the random transformation.
Methods:
affine_transform(img, border): Applies a series of affine transformations to the image.
apply_bboxes(bboxes, M): Transforms bounding boxes using the calculated affine matrix.
apply_segments(segments, M): Transforms segments and generates new bounding boxes.
apply_keypoints(keypoints, M): Transforms keypoints.
__call__(labels): Main method to apply transformations to both images and their corresponding annotations.
box_candidates(box1, box2): Filters out bounding boxes that don't meet certain criteria post-transformation.
"""
def __init__(self,
degrees=0.0,
@ -290,17 +334,31 @@ class RandomPerspective:
perspective=0.0,
border=(0, 0),
pre_transform=None):
"""Initializes RandomPerspective object with transformation parameters."""
self.degrees = degrees
self.translate = translate
self.scale = scale
self.shear = shear
self.perspective = perspective
# Mosaic border
self.border = border
self.border = border # mosaic border
self.pre_transform = pre_transform
def affine_transform(self, img, border):
"""Center."""
"""
Applies a sequence of affine transformations centered around the image center.
Args:
img (ndarray): Input image.
border (tuple): Border dimensions.
Returns:
img (ndarray): Transformed image.
M (ndarray): Transformation matrix.
s (float): Scale factor.
"""
# Center
C = np.eye(3, dtype=np.float32)
C[0, 2] = -img.shape[1] / 2 # x translation (pixels)
@ -462,8 +520,22 @@ class RandomPerspective:
labels['resized_shape'] = img.shape[:2]
return labels
def box_candidates(self, box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16): # box1(4,n), box2(4,n)
# Compute box candidates: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
def box_candidates(self, box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16):
"""
Compute box candidates based on a set of thresholds. This method compares the characteristics of the boxes
before and after augmentation to decide whether a box is a candidate for further processing.
Args:
box1 (numpy.ndarray): The 4,n bounding box before augmentation, represented as [x1, y1, x2, y2].
box2 (numpy.ndarray): The 4,n bounding box after augmentation, represented as [x1, y1, x2, y2].
wh_thr (float, optional): The width and height threshold in pixels. Default is 2.
ar_thr (float, optional): The aspect ratio threshold. Default is 100.
area_thr (float, optional): The area ratio threshold. Default is 0.1.
eps (float, optional): A small epsilon value to prevent division by zero. Default is 1e-16.
Returns:
(numpy.ndarray): A boolean array indicating which boxes are candidates based on the given thresholds.
"""
w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps)) # aspect ratio
@ -471,14 +543,32 @@ class RandomPerspective:
class RandomHSV:
"""
This class is responsible for performing random adjustments to the Hue, Saturation, and Value (HSV) channels of an
image.
The adjustments are random but within limits set by hgain, sgain, and vgain.
"""
def __init__(self, hgain=0.5, sgain=0.5, vgain=0.5) -> None:
"""
Initialize RandomHSV class with gains for each HSV channel.
Args:
hgain (float, optional): Maximum variation for hue. Default is 0.5.
sgain (float, optional): Maximum variation for saturation. Default is 0.5.
vgain (float, optional): Maximum variation for value. Default is 0.5.
"""
self.hgain = hgain
self.sgain = sgain
self.vgain = vgain
def __call__(self, labels):
"""Applies image HSV augmentation"""
"""
Applies random HSV augmentation to an image within the predefined limits.
The modified image replaces the original image in the input 'labels' dict.
"""
img = labels['img']
if self.hgain or self.sgain or self.vgain:
r = np.random.uniform(-1, 1, 3) * [self.hgain, self.sgain, self.vgain] + 1 # random gains
@ -496,9 +586,22 @@ class RandomHSV:
class RandomFlip:
"""Applies random horizontal or vertical flip to an image with a given probability."""
"""
Applies a random horizontal or vertical flip to an image with a given probability.
Also updates any instances (bounding boxes, keypoints, etc.) accordingly.
"""
def __init__(self, p=0.5, direction='horizontal', flip_idx=None) -> None:
"""
Initializes the RandomFlip class with probability and direction.
Args:
p (float, optional): The probability of applying the flip. Must be between 0 and 1. Default is 0.5.
direction (str, optional): The direction to apply the flip. Must be 'horizontal' or 'vertical'.
Default is 'horizontal'.
flip_idx (array-like, optional): Index mapping for flipping keypoints, if any.
"""
assert direction in ['horizontal', 'vertical'], f'Support direction `horizontal` or `vertical`, got {direction}'
assert 0 <= p <= 1.0
@ -507,7 +610,16 @@ class RandomFlip:
self.flip_idx = flip_idx
def __call__(self, labels):
"""Resize image and padding for detection, instance segmentation, pose."""
"""
Applies random flip to an image and updates any instances like bounding boxes or keypoints accordingly.
Args:
labels (dict): A dictionary containing the keys 'img' and 'instances'. 'img' is the image to be flipped.
'instances' is an object containing bounding boxes and optionally keypoints.
Returns:
(dict): The same dict with the flipped image and updated instances under the 'img' and 'instances' keys.
"""
img = labels['img']
instances = labels.pop('instances')
instances.convert_bbox(format='xywh')
@ -599,12 +711,38 @@ class LetterBox:
class CopyPaste:
"""
Implements the Copy-Paste augmentation as described in the paper https://arxiv.org/abs/2012.07177. This class is
responsible for applying the Copy-Paste augmentation on images and their corresponding instances.
"""
def __init__(self, p=0.5) -> None:
"""
Initializes the CopyPaste class with a given probability.
Args:
p (float, optional): The probability of applying the Copy-Paste augmentation. Must be between 0 and 1.
Default is 0.5.
"""
self.p = p
def __call__(self, labels):
"""Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy)."""
"""
Applies the Copy-Paste augmentation to the given image and instances.
Args:
labels (dict): A dictionary containing:
- 'img': The image to augment.
- 'cls': Class labels associated with the instances.
- 'instances': Object containing bounding boxes, and optionally, keypoints and segments.
Returns:
(dict): Dict with augmented image and updated instances under the 'img', 'cls', and 'instances' keys.
Notes:
1. Instances are expected to have 'segments' as one of their attributes for this augmentation to work.
2. This method modifies the input dictionary 'labels' in place.
"""
im = labels['img']
cls = labels['cls']
h, w = im.shape[:2]
@ -639,9 +777,13 @@ class CopyPaste:
class Albumentations:
"""Albumentations transformations. Optional, uninstall package to disable.
Applies Blur, Median Blur, convert to grayscale, Contrast Limited Adaptive Histogram Equalization,
random change of brightness and contrast, RandomGamma and lowering of image quality by compression."""
"""
Albumentations transformations.
Optional, uninstall package to disable. Applies Blur, Median Blur, convert to grayscale, Contrast Limited Adaptive
Histogram Equalization, random change of brightness and contrast, RandomGamma and lowering of image quality by
compression.
"""
def __init__(self, p=1.0):
"""Initialize the transform object for YOLO bbox formatted params."""
@ -690,6 +832,19 @@ class Albumentations:
# TODO: technically this is not an augmentation, maybe we should put this to another files
class Format:
"""
Formats image annotations for object detection, instance segmentation, and pose estimation tasks. The class
standardizes the image and instance annotations to be used by the `collate_fn` in PyTorch DataLoader.
Attributes:
bbox_format (str): Format for bounding boxes. Default is 'xywh'.
normalize (bool): Whether to normalize bounding boxes. Default is True.
return_mask (bool): Return instance masks for segmentation. Default is False.
return_keypoint (bool): Return keypoints for pose estimation. Default is False.
mask_ratio (int): Downsample ratio for masks. Default is 4.
mask_overlap (bool): Whether to overlap masks. Default is True.
batch_idx (bool): Keep batch indexes. Default is True.
"""
def __init__(self,
bbox_format='xywh',
@ -699,6 +854,7 @@ class Format:
mask_ratio=4,
mask_overlap=True,
batch_idx=True):
"""Initializes the Format class with given parameters."""
self.bbox_format = bbox_format
self.normalize = normalize
self.return_mask = return_mask # set False when training detection only
@ -746,7 +902,7 @@ class Format:
return img
def _format_segments(self, instances, cls, w, h):
"""convert polygon points to bitmap."""
"""Convert polygon points to bitmap."""
segments = instances.segments
if self.mask_overlap:
masks, sorted_idx = polygons2masks_overlap((h, w), segments, downsample_ratio=self.mask_ratio)
@ -851,35 +1007,75 @@ def classify_albumentations(
class ClassifyLetterBox:
"""YOLOv8 LetterBox class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()])"""
"""
YOLOv8 LetterBox class for image preprocessing, designed to be part of a transformation pipeline, e.g.,
T.Compose([LetterBox(size), ToTensor()]).
Attributes:
h (int): Target height of the image.
w (int): Target width of the image.
auto (bool): If True, automatically solves for short side using stride.
stride (int): The stride value, used when 'auto' is True.
"""
def __init__(self, size=(640, 640), auto=False, stride=32):
"""Resizes image and crops it to center with max dimensions 'h' and 'w'."""
"""
Initializes the ClassifyLetterBox class with a target size, auto-flag, and stride.
Args:
size (Union[int, Tuple[int, int]]): The target dimensions (height, width) for the letterbox.
auto (bool): If True, automatically calculates the short side based on stride.
stride (int): The stride value, used when 'auto' is True.
"""
super().__init__()
self.h, self.w = (size, size) if isinstance(size, int) else size
self.auto = auto # pass max size integer, automatically solve for short side using stride
self.stride = stride # used with auto
def __call__(self, im): # im = np.array HWC
def __call__(self, im):
"""
Resizes the image and pads it with a letterbox method.
Args:
im (numpy.ndarray): The input image as a numpy array of shape HWC.
Returns:
(numpy.ndarray): The letterboxed and resized image as a numpy array.
"""
imh, imw = im.shape[:2]
r = min(self.h / imh, self.w / imw) # ratio of new/old
h, w = round(imh * r), round(imw * r) # resized image
r = min(self.h / imh, self.w / imw) # ratio of new/old dimensions
h, w = round(imh * r), round(imw * r) # resized image dimensions
# Calculate padding dimensions
hs, ws = (math.ceil(x / self.stride) * self.stride for x in (h, w)) if self.auto else (self.h, self.w)
top, left = round((hs - h) / 2 - 0.1), round((ws - w) / 2 - 0.1)
# Create padded image
im_out = np.full((hs, ws, 3), 114, dtype=im.dtype)
im_out[top:top + h, left:left + w] = cv2.resize(im, (w, h), interpolation=cv2.INTER_LINEAR)
return im_out
class CenterCrop:
"""YOLOv8 CenterCrop class for image preprocessing, i.e. T.Compose([CenterCrop(size), ToTensor()])"""
"""YOLOv8 CenterCrop class for image preprocessing, designed to be part of a transformation pipeline, e.g.,
T.Compose([CenterCrop(size), ToTensor()]).
"""
def __init__(self, size=640):
"""Converts an image from numpy array to PyTorch tensor."""
super().__init__()
self.h, self.w = (size, size) if isinstance(size, int) else size
def __call__(self, im): # im = np.array HWC
def __call__(self, im):
"""
Resizes and crops the center of the image using a letterbox method.
Args:
im (numpy.ndarray): The input image as a numpy array of shape HWC.
Returns:
(numpy.ndarray): The center-cropped and resized image as a numpy array.
"""
imh, imw = im.shape[:2]
m = min(imh, imw) # min dimension
top, left = (imh - m) // 2, (imw - m) // 2
@ -887,14 +1083,23 @@ class CenterCrop:
class ToTensor:
"""YOLOv8 ToTensor class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()])."""
"""YOLOv8 ToTensor class for image preprocessing, i.e., T.Compose([LetterBox(size), ToTensor()])."""
def __init__(self, half=False):
"""Initialize YOLOv8 ToTensor object with optional half-precision support."""
super().__init__()
self.half = half
def __call__(self, im): # im = np.array HWC in BGR order
def __call__(self, im):
"""
Transforms an image from a numpy array to a PyTorch tensor, applying optional half-precision and normalization.
Args:
im (numpy.ndarray): Input image as a numpy array with shape (H, W, C) in BGR order.
Returns:
(torch.Tensor): The transformed image as a PyTorch tensor in float32 or float16, normalized to [0, 1].
"""
im = np.ascontiguousarray(im.transpose((2, 0, 1))[::-1]) # HWC to CHW -> BGR to RGB -> contiguous
im = torch.from_numpy(im) # to torch
im = im.half() if self.half else im.float() # uint8 to fp16/32

@ -62,6 +62,7 @@ class BaseDataset(Dataset):
classes=None,
fraction=1.0):
super().__init__()
"""Initialize BaseDataset with given configuration and options."""
self.img_path = img_path
self.imgsz = imgsz
self.augment = augment
@ -256,7 +257,7 @@ class BaseDataset(Dataset):
return len(self.labels)
def update_labels_info(self, label):
"""custom your label format here."""
"""Custom your label format here."""
return label
def build_transforms(self, hyp=None):

@ -20,7 +20,11 @@ from .utils import PIN_MEMORY
class InfiniteDataLoader(dataloader.DataLoader):
"""Dataloader that reuses workers. Uses same syntax as vanilla DataLoader."""
"""
Dataloader that reuses workers.
Uses same syntax as vanilla DataLoader.
"""
def __init__(self, *args, **kwargs):
"""Dataloader that infinitely recycles workers, inherits from DataLoader."""
@ -38,7 +42,9 @@ class InfiniteDataLoader(dataloader.DataLoader):
yield next(self.iterator)
def reset(self):
"""Reset iterator.
"""
Reset iterator.
This is useful when we want to modify settings of dataset while training.
"""
self.iterator = self._get_iterator()
@ -70,7 +76,7 @@ def seed_worker(worker_id): # noqa
def build_yolo_dataset(cfg, img_path, batch, data, mode='train', rect=False, stride=32):
"""Build YOLO Dataset"""
"""Build YOLO Dataset."""
return YOLODataset(
img_path=img_path,
imgsz=cfg.imgsz,

@ -12,7 +12,8 @@ from ultralytics.utils import TQDM
def coco91_to_coco80_class():
"""Converts 91-index COCO class IDs to 80-index COCO class IDs.
"""
Converts 91-index COCO class IDs to 80-index COCO class IDs.
Returns:
(list): A list of 91 class IDs where the index represents the 80-index class ID and the value is the
@ -51,7 +52,8 @@ def convert_coco(labels_dir='../coco/annotations/',
use_segments=False,
use_keypoints=False,
cls91to80=True):
"""Converts COCO dataset annotations to a format suitable for training YOLOv5 models.
"""
Converts COCO dataset annotations to a format suitable for training YOLOv5 models.
Args:
labels_dir (str, optional): Path to directory containing COCO dataset annotation files.
@ -203,6 +205,7 @@ def convert_dota_to_yolo_obb(dota_root_path: str):
'helipad': 17}
def convert_label(image_name, image_width, image_height, orig_label_dir, save_dir):
"""Converts a single image's DOTA annotation to YOLO OBB format and saves it to a specified directory."""
orig_label_path = orig_label_dir / f'{image_name}.txt'
save_path = save_dir / f'{image_name}.txt'

@ -33,6 +33,7 @@ class YOLODataset(BaseDataset):
"""
def __init__(self, *args, data=None, use_segments=False, use_keypoints=False, **kwargs):
"""Initializes the YOLODataset with optional configurations for segments and keypoints."""
self.use_segments = use_segments
self.use_keypoints = use_keypoints
self.data = data
@ -40,7 +41,9 @@ class YOLODataset(BaseDataset):
super().__init__(*args, **kwargs)
def cache_labels(self, path=Path('./labels.cache')):
"""Cache dataset labels, check images and read shapes.
"""
Cache dataset labels, check images and read shapes.
Args:
path (Path): path where to save the cache file (default: Path('./labels.cache')).
Returns:
@ -157,7 +160,7 @@ class YOLODataset(BaseDataset):
self.transforms = self.build_transforms(hyp)
def update_labels_info(self, label):
"""custom your label format here."""
"""Custom your label format here."""
# NOTE: cls is not with bboxes now, classification and semantic segmentation need an independent cls label
# we can make it also support classification and semantic segmentation by add or remove some dict keys there.
bboxes = label.pop('bboxes')
@ -254,6 +257,7 @@ class ClassificationDataset(torchvision.datasets.ImageFolder):
return {'img': sample, 'cls': j}
def __len__(self) -> int:
"""Return the total number of samples in the dataset."""
return len(self.samples)
def verify_images(self):
@ -320,6 +324,16 @@ def save_dataset_cache_file(prefix, path, x):
# TODO: support semantic segmentation
class SemanticDataset(BaseDataset):
"""
Semantic Segmentation Dataset.
This class is responsible for handling datasets used for semantic segmentation tasks. It inherits functionalities
from the BaseDataset class.
Note:
This class is currently a placeholder and needs to be populated with methods and attributes for supporting
semantic segmentation tasks.
"""
def __init__(self):
"""Initialize a SemanticDataset object."""

@ -22,6 +22,7 @@ from ultralytics.utils.checks import check_requirements
@dataclass
class SourceTypes:
"""Class to represent various types of input sources for predictions."""
webcam: bool = False
screenshot: bool = False
from_img: bool = False
@ -29,7 +30,34 @@ class SourceTypes:
class LoadStreams:
"""Stream Loader, i.e. `yolo predict source='rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP, TCP streams`."""
"""
Stream Loader for various types of video streams.
Suitable for use with `yolo predict source='rtsp://example.com/media.mp4'`, supports RTSP, RTMP, HTTP, and TCP streams.
Attributes:
sources (str): The source input paths or URLs for the video streams.
imgsz (int): The image size for processing, defaults to 640.
vid_stride (int): Video frame-rate stride, defaults to 1.
buffer (bool): Whether to buffer input streams, defaults to False.
running (bool): Flag to indicate if the streaming thread is running.
mode (str): Set to 'stream' indicating real-time capture.
imgs (list): List of image frames for each stream.
fps (list): List of FPS for each stream.
frames (list): List of total frames for each stream.
threads (list): List of threads for each stream.
shape (list): List of shapes for each stream.
caps (list): List of cv2.VideoCapture objects for each stream.
bs (int): Batch size for processing.
Methods:
__init__: Initialize the stream loader.
update: Read stream frames in daemon thread.
close: Close stream loader and release resources.
__iter__: Returns an iterator object for the class.
__next__: Returns source paths, transformed, and original images for processing.
__len__: Return the length of the sources object.
"""
def __init__(self, sources='file.streams', imgsz=640, vid_stride=1, buffer=False):
"""Initialize instance variables and check for consistent input stream shapes."""
@ -149,10 +177,33 @@ class LoadStreams:
class LoadScreenshots:
"""YOLOv8 screenshot dataloader, i.e. `yolo predict source=screen`."""
"""
YOLOv8 screenshot dataloader.
This class manages the loading of screenshot images for processing with YOLOv8.
Suitable for use with `yolo predict source=screen`.
Attributes:
source (str): The source input indicating which screen to capture.
imgsz (int): The image size for processing, defaults to 640.
screen (int): The screen number to capture.
left (int): The left coordinate for screen capture area.
top (int): The top coordinate for screen capture area.
width (int): The width of the screen capture area.
height (int): The height of the screen capture area.
mode (str): Set to 'stream' indicating real-time capture.
frame (int): Counter for captured frames.
sct (mss.mss): Screen capture object from `mss` library.
bs (int): Batch size, set to 1.
monitor (dict): Monitor configuration details.
Methods:
__iter__: Returns an iterator object.
__next__: Captures the next screenshot and returns it.
"""
def __init__(self, source, imgsz=640):
"""source = [screen_number left top width height] (pixels)."""
"""Source = [screen_number left top width height] (pixels)."""
check_requirements('mss')
import mss # noqa
@ -192,7 +243,28 @@ class LoadScreenshots:
class LoadImages:
"""YOLOv8 image/video dataloader, i.e. `yolo predict source=image.jpg/vid.mp4`."""
"""
YOLOv8 image/video dataloader.
This class manages the loading and pre-processing of image and video data for YOLOv8. It supports loading from
various formats, including single image files, video files, and lists of image and video paths.
Attributes:
imgsz (int): Image size, defaults to 640.
files (list): List of image and video file paths.
nf (int): Total number of files (images and videos).
video_flag (list): Flags indicating whether a file is a video (True) or an image (False).
mode (str): Current mode, 'image' or 'video'.
vid_stride (int): Stride for video frame-rate, defaults to 1.
bs (int): Batch size, set to 1 for this class.
cap (cv2.VideoCapture): Video capture object for OpenCV.
frame (int): Frame counter for video.
frames (int): Total number of frames in the video.
count (int): Counter for iteration, initialized at 0 during `__iter__()`.
Methods:
_new_video(path): Create a new cv2.VideoCapture object for a given video path.
"""
def __init__(self, path, imgsz=640, vid_stride=1):
"""Initialize the Dataloader and raise FileNotFoundError if file not found."""
@ -285,6 +357,24 @@ class LoadImages:
class LoadPilAndNumpy:
"""
Load images from PIL and Numpy arrays for batch processing.
This class is designed to manage loading and pre-processing of image data from both PIL and Numpy formats.
It performs basic validation and format conversion to ensure that the images are in the required format for
downstream processing.
Attributes:
paths (list): List of image paths or autogenerated filenames.
im0 (list): List of images stored as Numpy arrays.
imgsz (int): Image size, defaults to 640.
mode (str): Type of data being processed, defaults to 'image'.
bs (int): Batch size, equivalent to the length of `im0`.
count (int): Counter for iteration, initialized at 0 during `__iter__()`.
Methods:
_single_check(im): Validate and format a single image to a Numpy array.
"""
def __init__(self, im0, imgsz=640):
"""Initialize PIL and Numpy Dataloader."""
@ -326,8 +416,24 @@ class LoadPilAndNumpy:
class LoadTensor:
"""
Load images from torch.Tensor data.
This class manages the loading and pre-processing of image data from PyTorch tensors for further processing.
Attributes:
im0 (torch.Tensor): The input tensor containing the image(s).
bs (int): Batch size, inferred from the shape of `im0`.
mode (str): Current mode, set to 'image'.
paths (list): List of image paths or filenames.
count (int): Counter for iteration, initialized at 0 during `__iter__()`.
Methods:
_single_check(im, stride): Validate and possibly modify the input tensor.
"""
def __init__(self, im0) -> None:
"""Initialize Tensor Dataloader."""
self.im0 = self._single_check(im0)
self.bs = self.im0.shape[0]
self.mode = 'image'
@ -370,9 +476,7 @@ class LoadTensor:
def autocast_list(source):
"""
Merges a list of source of different types into a list of numpy arrays or PIL images
"""
"""Merges a list of source of different types into a list of numpy arrays or PIL images."""
files = []
for im in source:
if isinstance(im, (str, Path)): # filename or uri

@ -547,9 +547,9 @@ class HUBDatasetStats:
def compress_one_image(f, f_new=None, max_dim=1920, quality=50):
"""
Compresses a single image file to reduced size while preserving its aspect ratio and quality using either the
Python Imaging Library (PIL) or OpenCV library. If the input image is smaller than the maximum dimension, it will
not be resized.
Compresses a single image file to reduced size while preserving its aspect ratio and quality using either the Python
Imaging Library (PIL) or OpenCV library. If the input image is smaller than the maximum dimension, it will not be
resized.
Args:
f (str): The path to the input image file.

@ -986,9 +986,7 @@ class Exporter:
return model
def add_callback(self, event: str, callback):
"""
Appends the given callback.
"""
"""Appends the given callback."""
self.callbacks[event].append(callback)
def run_callbacks(self, event: str):

@ -159,9 +159,7 @@ class Model(nn.Module):
self.overrides['task'] = self.task
def _check_is_pytorch_model(self):
"""
Raises TypeError is model is not a PyTorch model
"""
"""Raises TypeError is model is not a PyTorch model."""
pt_str = isinstance(self.model, (str, Path)) and Path(self.model).suffix == '.pt'
pt_module = isinstance(self.model, nn.Module)
if not (pt_module or pt_str):
@ -173,9 +171,7 @@ class Model(nn.Module):
f"argument directly in your inference command, i.e. 'model.predict(source=..., device=0)'")
def reset_weights(self):
"""
Resets the model modules parameters to randomly initialized values, losing all training information.
"""
"""Resets the model modules parameters to randomly initialized values, losing all training information."""
self._check_is_pytorch_model()
for m in self.model.modules():
if hasattr(m, 'reset_parameters'):
@ -185,9 +181,7 @@ class Model(nn.Module):
return self
def load(self, weights='yolov8n.pt'):
"""
Transfers parameters with matching names and shapes from 'weights' to model.
"""
"""Transfers parameters with matching names and shapes from 'weights' to model."""
self._check_is_pytorch_model()
if isinstance(weights, (str, Path)):
weights, self.ckpt = attempt_load_one_weight(weights)

@ -58,7 +58,7 @@ Example:
class BasePredictor:
"""
BasePredictor
BasePredictor.
A base class for creating predictors.
@ -109,7 +109,8 @@ class BasePredictor:
callbacks.add_integration_callbacks(self)
def preprocess(self, im):
"""Prepares input image before inference.
"""
Prepares input image before inference.
Args:
im (torch.Tensor | List(np.ndarray)): BCHW for tensor, [(HWC) x B] for list.
@ -128,6 +129,7 @@ class BasePredictor:
return im
def inference(self, im, *args, **kwargs):
"""Runs inference on a given image using the specified model and arguments."""
visualize = increment_path(self.save_dir / Path(self.batch[0][0]).stem,
mkdir=True) if self.args.visualize and (not self.source_type.tensor) else False
return self.model(im, augment=self.args.augment, visualize=visualize)
@ -194,7 +196,11 @@ class BasePredictor:
return list(self.stream_inference(source, model, *args, **kwargs)) # merge list of Result into one
def predict_cli(self, source=None, model=None):
"""Method used for CLI prediction. It uses always generator as outputs as not required by CLI mode."""
"""
Method used for CLI prediction.
It uses always generator as outputs as not required by CLI mode.
"""
gen = self.stream_inference(source, model)
for _ in gen: # running CLI inference without accumulating any outputs (do not modify)
pass
@ -352,7 +358,5 @@ class BasePredictor:
callback(self)
def add_callback(self, event: str, func):
"""
Add callback
"""
"""Add callback."""
self.callbacks[event].append(func)

@ -1,6 +1,6 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
"""
Ultralytics Results, Boxes and Masks classes for handling inference results
Ultralytics Results, Boxes and Masks classes for handling inference results.
Usage: See https://docs.ultralytics.com/modes/predict/
"""
@ -19,12 +19,11 @@ from ultralytics.utils.torch_utils import smart_inference_mode
class BaseTensor(SimpleClass):
"""
Base tensor class with additional methods for easy manipulation and device handling.
"""
"""Base tensor class with additional methods for easy manipulation and device handling."""
def __init__(self, data, orig_shape) -> None:
"""Initialize BaseTensor with data and original shape.
"""
Initialize BaseTensor with data and original shape.
Args:
data (torch.Tensor | np.ndarray): Predictions, such as bboxes, masks and keypoints.
@ -126,6 +125,18 @@ class Results(SimpleClass):
self.probs = probs
def _apply(self, fn, *args, **kwargs):
"""
Applies a function to all non-empty attributes and returns a new Results object with modified attributes. This
function is internally called by methods like .to(), .cuda(), .cpu(), etc.
Args:
fn (str): The name of the function to apply.
*args: Variable length argument list to pass to the function.
**kwargs: Arbitrary keyword arguments to pass to the function.
Returns:
Results: A new Results object with attributes modified by the applied function.
"""
r = self.new()
for k in self._keys:
v = getattr(self, k)
@ -250,9 +261,7 @@ class Results(SimpleClass):
return annotator.result()
def verbose(self):
"""
Return log string for each task.
"""
"""Return log string for each task."""
log_string = ''
probs = self.probs
boxes = self.boxes
@ -537,6 +546,7 @@ class Probs(BaseTensor):
"""
def __init__(self, probs, orig_shape=None) -> None:
"""Initialize the Probs class with classification probabilities and optional original shape of the image."""
super().__init__(probs, orig_shape)
@property

@ -1,6 +1,6 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
"""
Train a model on a dataset
Train a model on a dataset.
Usage:
$ yolo mode=train model=yolov8n.pt data=coco128.yaml imgsz=640 epochs=100 batch=16
@ -37,7 +37,7 @@ from ultralytics.utils.torch_utils import (EarlyStopping, ModelEMA, de_parallel,
class BaseTrainer:
"""
BaseTrainer
BaseTrainer.
A base class for creating trainers.
@ -143,15 +143,11 @@ class BaseTrainer:
callbacks.add_integration_callbacks(self)
def add_callback(self, event: str, callback):
"""
Appends the given callback.
"""
"""Appends the given callback."""
self.callbacks[event].append(callback)
def set_callback(self, event: str, callback):
"""
Overrides the existing callbacks with the given callback.
"""
"""Overrides the existing callbacks with the given callback."""
self.callbacks[event] = [callback]
def run_callbacks(self, event: str):
@ -207,9 +203,7 @@ class BaseTrainer:
world_size=world_size)
def _setup_train(self, world_size):
"""
Builds dataloaders and optimizer on correct rank process.
"""
"""Builds dataloaders and optimizer on correct rank process."""
# Model
self.run_callbacks('on_pretrain_routine_start')
@ -450,14 +444,14 @@ class BaseTrainer:
@staticmethod
def get_dataset(data):
"""
Get train, val path from data dict if it exists. Returns None if data format is not recognized.
Get train, val path from data dict if it exists.
Returns None if data format is not recognized.
"""
return data['train'], data.get('val') or data.get('test')
def setup_model(self):
"""
load/create/download model for any task.
"""
"""Load/create/download model for any task."""
if isinstance(self.model, torch.nn.Module): # if model is loaded beforehand. No setup needed
return
@ -482,14 +476,14 @@ class BaseTrainer:
self.ema.update(self.model)
def preprocess_batch(self, batch):
"""
Allows custom preprocessing model inputs and ground truths depending on task type.
"""
"""Allows custom preprocessing model inputs and ground truths depending on task type."""
return batch
def validate(self):
"""
Runs validation on test set using self.validator. The returned dict is expected to contain "fitness" key.
Runs validation on test set using self.validator.
The returned dict is expected to contain "fitness" key.
"""
metrics = self.validator(self)
fitness = metrics.pop('fitness', -self.loss.detach().cpu().numpy()) # use loss as fitness measure if not found
@ -506,26 +500,20 @@ class BaseTrainer:
raise NotImplementedError('get_validator function not implemented in trainer')
def get_dataloader(self, dataset_path, batch_size=16, rank=0, mode='train'):
"""
Returns dataloader derived from torch.data.Dataloader.
"""
"""Returns dataloader derived from torch.data.Dataloader."""
raise NotImplementedError('get_dataloader function not implemented in trainer')
def build_dataset(self, img_path, mode='train', batch=None):
"""Build dataset"""
"""Build dataset."""
raise NotImplementedError('build_dataset function not implemented in trainer')
def label_loss_items(self, loss_items=None, prefix='train'):
"""
Returns a loss dict with labelled training loss items tensor
"""
"""Returns a loss dict with labelled training loss items tensor."""
# Not needed for classification but necessary for segmentation & detection
return {'loss': loss_items} if loss_items is not None else ['loss']
def set_model_attributes(self):
"""
To set or update model parameters before training.
"""
"""To set or update model parameters before training."""
self.model.names = self.data['names']
def build_targets(self, preds, targets):
@ -632,8 +620,8 @@ class BaseTrainer:
def build_optimizer(self, model, name='auto', lr=0.001, momentum=0.9, decay=1e-5, iterations=1e5):
"""
Constructs an optimizer for the given model, based on the specified optimizer name, learning rate,
momentum, weight decay, and number of iterations.
Constructs an optimizer for the given model, based on the specified optimizer name, learning rate, momentum,
weight decay, and number of iterations.
Args:
model (torch.nn.Module): The model for which to build an optimizer.

@ -31,32 +31,32 @@ from ultralytics.utils.plotting import plot_tune_results
class Tuner:
"""
Class responsible for hyperparameter tuning of YOLO models.
Class responsible for hyperparameter tuning of YOLO models.
The class evolves YOLO model hyperparameters over a given number of iterations
by mutating them according to the search space and retraining the model to evaluate their performance.
The class evolves YOLO model hyperparameters over a given number of iterations
by mutating them according to the search space and retraining the model to evaluate their performance.
Attributes:
space (dict): Hyperparameter search space containing bounds and scaling factors for mutation.
tune_dir (Path): Directory where evolution logs and results will be saved.
tune_csv (Path): Path to the CSV file where evolution logs are saved.
Attributes:
space (dict): Hyperparameter search space containing bounds and scaling factors for mutation.
tune_dir (Path): Directory where evolution logs and results will be saved.
tune_csv (Path): Path to the CSV file where evolution logs are saved.
Methods:
_mutate(hyp: dict) -> dict:
Mutates the given hyperparameters within the bounds specified in `self.space`.
Methods:
_mutate(hyp: dict) -> dict:
Mutates the given hyperparameters within the bounds specified in `self.space`.
__call__():
Executes the hyperparameter evolution across multiple iterations.
__call__():
Executes the hyperparameter evolution across multiple iterations.
Example:
Tune hyperparameters for YOLOv8n on COCO8 at imgsz=640 and epochs=30 for 300 tuning iterations.
```python
from ultralytics import YOLO
Example:
Tune hyperparameters for YOLOv8n on COCO8 at imgsz=640 and epochs=30 for 300 tuning iterations.
```python
from ultralytics import YOLO
model = YOLO('yolov8n.pt')
model.tune(data='coco8.yaml', epochs=10, iterations=300, optimizer='AdamW', plots=False, save=False, val=False)
```
"""
model = YOLO('yolov8n.pt')
model.tune(data='coco8.yaml', epochs=10, iterations=300, optimizer='AdamW', plots=False, save=False, val=False)
```
"""
def __init__(self, args=DEFAULT_CFG, _callbacks=None):
"""

@ -36,7 +36,7 @@ from ultralytics.utils.torch_utils import de_parallel, select_device, smart_infe
class BaseValidator:
"""
BaseValidator
BaseValidator.
A base class for creating validators.
@ -102,8 +102,7 @@ class BaseValidator:
@smart_inference_mode()
def __call__(self, trainer=None, model=None):
"""
Supports validation of a pre-trained model if passed or a model being trained if trainer is passed (trainer
"""Supports validation of a pre-trained model if passed or a model being trained if trainer is passed (trainer
gets priority).
"""
self.training = trainer is not None
@ -260,7 +259,7 @@ class BaseValidator:
raise NotImplementedError('get_dataloader function not implemented for this validator')
def build_dataset(self, img_path):
"""Build dataset"""
"""Build dataset."""
raise NotImplementedError('build_dataset function not implemented in validator')
def preprocess(self, batch):

@ -80,8 +80,8 @@ def get_export(model_id='', format='torchscript'):
def check_dataset(path='', task='detect'):
"""
Function for error-checking HUB dataset Zip file before upload. It checks a dataset for errors before it is
uploaded to the HUB. Usage examples are given below.
Function for error-checking HUB dataset Zip file before upload. It checks a dataset for errors before it is uploaded
to the HUB. Usage examples are given below.
Args:
path (str, optional): Path to data.zip (with data.yaml inside data.zip). Defaults to ''.

@ -9,6 +9,19 @@ API_KEY_URL = f'{HUB_WEB_ROOT}/settings?tab=api+keys'
class Auth:
"""
Manages authentication processes including API key handling, cookie-based authentication, and header generation.
The class supports different methods of authentication:
1. Directly using an API key.
2. Authenticating using browser cookies (specifically in Google Colab).
3. Prompting the user to enter an API key.
Attributes:
id_token (str or bool): Token used for identity verification, initialized as False.
api_key (str or bool): API key for authentication, initialized as False.
model_key (bool): Placeholder for model key, initialized as False.
"""
id_token = api_key = model_key = False
def __init__(self, api_key='', verbose=False):
@ -54,7 +67,9 @@ class Auth:
def request_api_key(self, max_attempts=3):
"""
Prompt the user to input their API key. Returns the model ID.
Prompt the user to input their API key.
Returns the model ID.
"""
import getpass
for attempts in range(max_attempts):
@ -86,8 +101,8 @@ class Auth:
def auth_with_cookies(self) -> bool:
"""
Attempt to fetch authentication via cookies and set id_token.
User must be logged in to HUB and running in a supported browser.
Attempt to fetch authentication via cookies and set id_token. User must be logged in to HUB and running in a
supported browser.
Returns:
bool: True if authentication is successful, False otherwise.

@ -84,6 +84,7 @@ class HUBTrainingSession:
def _handle_signal(self, signum, frame):
"""
Handle kill signals and prevent heartbeats from being sent on Colab after termination.
This method does not use frame, it is included as it is passed by signal.
"""
if self.alive is True:

@ -161,9 +161,7 @@ class Events:
url = 'https://www.google-analytics.com/mp/collect?measurement_id=G-X8NCJYTQXM&api_secret=QLQrATrNSwGRFRLE-cbHJw'
def __init__(self):
"""
Initializes the Events object with default values for events, rate_limit, and metadata.
"""
"""Initializes the Events object with default values for events, rate_limit, and metadata."""
self.events = [] # events list
self.rate_limit = 60.0 # rate limit (seconds)
self.t = 0.0 # rate limit timer (seconds)

@ -22,7 +22,7 @@ class FastSAM(Model):
"""
def __init__(self, model='FastSAM-x.pt'):
"""Call the __init__ method of the parent class (YOLO) with the updated default model"""
"""Call the __init__ method of the parent class (YOLO) with the updated default model."""
if str(model) == 'FastSAM.pt':
model = 'FastSAM-x.pt'
assert Path(model).suffix not in ('.yaml', '.yml'), 'FastSAM models only support pre-trained models.'
@ -30,4 +30,5 @@ class FastSAM(Model):
@property
def task_map(self):
"""Returns a dictionary mapping segment task to corresponding predictor and validator classes."""
return {'segment': {'predictor': FastSAMPredictor, 'validator': FastSAMValidator}}

@ -11,10 +11,12 @@ from ultralytics.utils import DEFAULT_CFG, ops
class FastSAMPredictor(DetectionPredictor):
def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
"""Initializes FastSAMPredictor class by inheriting from DetectionPredictor and setting task to 'segment'."""
super().__init__(cfg, overrides, _callbacks)
self.args.task = 'segment'
def postprocess(self, preds, img, orig_imgs):
"""Postprocesses the predictions, applies non-max suppression, scales the boxes, and returns the results."""
p = ops.non_max_suppression(
preds[0],
self.args.conf,

@ -15,6 +15,7 @@ from ultralytics.utils import TQDM
class FastSAMPrompt:
def __init__(self, source, results, device='cuda') -> None:
"""Initializes FastSAMPrompt with given source, results and device, and assigns clip for linear assignment."""
self.device = device
self.results = results
self.source = source
@ -30,6 +31,7 @@ class FastSAMPrompt:
@staticmethod
def _segment_image(image, bbox):
"""Segments the given image according to the provided bounding box coordinates."""
image_array = np.array(image)
segmented_image_array = np.zeros_like(image_array)
x1, y1, x2, y2 = bbox
@ -45,6 +47,9 @@ class FastSAMPrompt:
@staticmethod
def _format_results(result, filter=0):
"""Formats detection results into list of annotations each containing ID, segmentation, bounding box, score and
area.
"""
annotations = []
n = len(result.masks.data) if result.masks is not None else 0
for i in range(n):
@ -61,6 +66,9 @@ class FastSAMPrompt:
@staticmethod
def _get_bbox_from_mask(mask):
"""Applies morphological transformations to the mask, displays it, and if with_contours is True, draws
contours.
"""
mask = mask.astype(np.uint8)
contours, hierarchy = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
x1, y1, w, h = cv2.boundingRect(contours[0])
@ -195,6 +203,7 @@ class FastSAMPrompt:
@torch.no_grad()
def retrieve(self, model, preprocess, elements, search_text: str, device) -> int:
"""Processes images and text with a model, calculates similarity, and returns softmax score."""
preprocessed_images = [preprocess(image).to(device) for image in elements]
tokenized_text = self.clip.tokenize([search_text]).to(device)
stacked_images = torch.stack(preprocessed_images)
@ -206,6 +215,7 @@ class FastSAMPrompt:
return probs[:, 0].softmax(dim=0)
def _crop_image(self, format_results):
"""Crops an image based on provided annotation format and returns cropped images and related data."""
if os.path.isdir(self.source):
raise ValueError(f"'{self.source}' is a directory, not a valid source for this function.")
image = Image.fromarray(cv2.cvtColor(self.results[0].orig_img, cv2.COLOR_BGR2RGB))
@ -229,6 +239,7 @@ class FastSAMPrompt:
return cropped_boxes, cropped_images, not_crop, filter_id, annotations
def box_prompt(self, bbox):
"""Modifies the bounding box properties and calculates IoU between masks and bounding box."""
if self.results[0].masks is not None:
assert (bbox[2] != 0 and bbox[3] != 0)
if os.path.isdir(self.source):
@ -261,7 +272,8 @@ class FastSAMPrompt:
self.results[0].masks.data = torch.tensor(np.array([masks[max_iou_index].cpu().numpy()]))
return self.results
def point_prompt(self, points, pointlabel): # numpy 处理
def point_prompt(self, points, pointlabel): # numpy
"""Adjusts points on detected masks based on user input and returns the modified results."""
if self.results[0].masks is not None:
if os.path.isdir(self.source):
raise ValueError(f"'{self.source}' is a directory, not a valid source for this function.")
@ -284,6 +296,7 @@ class FastSAMPrompt:
return self.results
def text_prompt(self, text):
"""Processes a text prompt, applies it to existing results and returns the updated results."""
if self.results[0].masks is not None:
format_results = self._format_results(self.results[0], 0)
cropped_boxes, cropped_images, not_crop, filter_id, annotations = self._crop_image(format_results)
@ -296,4 +309,5 @@ class FastSAMPrompt:
return self.results
def everything_prompt(self):
"""Returns the processed results from the previous methods in the class."""
return self.results

@ -25,12 +25,13 @@ from .val import NASValidator
class NAS(Model):
def __init__(self, model='yolo_nas_s.pt') -> None:
"""Initializes the NAS model with the provided or default 'yolo_nas_s.pt' model."""
assert Path(model).suffix not in ('.yaml', '.yml'), 'YOLO-NAS models only support pre-trained models.'
super().__init__(model, task='detect')
@smart_inference_mode()
def _load(self, weights: str, task: str):
# Load or create new NAS model
"""Loads an existing NAS model weights or creates a new NAS model with pretrained weights if not provided."""
import super_gradients
suffix = Path(weights).suffix
if suffix == '.pt':
@ -58,4 +59,5 @@ class NAS(Model):
@property
def task_map(self):
"""Returns a dictionary mapping tasks to respective predictor and validator classes."""
return {'detect': {'predictor': NASPredictor, 'validator': NASValidator}}

@ -1,7 +1,5 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
"""
RT-DETR model interface
"""
"""RT-DETR model interface."""
from ultralytics.engine.model import Model
from ultralytics.nn.tasks import RTDETRDetectionModel
@ -11,17 +9,17 @@ from .val import RTDETRValidator
class RTDETR(Model):
"""
RTDETR model interface.
"""
"""RTDETR model interface."""
def __init__(self, model='rtdetr-l.pt') -> None:
"""Initializes the RTDETR model with the given model file, defaulting to 'rtdetr-l.pt'."""
if model and model.split('.')[-1] not in ('pt', 'yaml', 'yml'):
raise NotImplementedError('RT-DETR only supports creating from *.pt file or *.yaml file.')
super().__init__(model=model, task='detect')
@property
def task_map(self):
"""Returns a dictionary mapping task names to corresponding Ultralytics task classes for RTDETR model."""
return {
'detect': {
'predictor': RTDETRPredictor,

@ -48,7 +48,8 @@ class RTDETRPredictor(BasePredictor):
return results
def pre_transform(self, im):
"""Pre-transform input image before inference.
"""
Pre-transform input image before inference.
Args:
im (List(np.ndarray)): (N, 3, h, w) for tensor, [(h, w, 3) x N] for list.

@ -37,7 +37,8 @@ class RTDETRTrainer(DetectionTrainer):
return model
def build_dataset(self, img_path, mode='val', batch=None):
"""Build RTDETR Dataset
"""
Build RTDETR Dataset.
Args:
img_path (str): Path to the folder containing images.

@ -16,6 +16,7 @@ __all__ = 'RTDETRValidator', # tuple or list
class RTDETRDataset(YOLODataset):
def __init__(self, *args, data=None, **kwargs):
"""Initialize the RTDETRDataset class by inheriting from the YOLODataset class."""
super().__init__(*args, data=data, use_segments=False, use_keypoints=False, **kwargs)
# NOTE: add stretch version load_image for rtdetr mosaic

@ -32,9 +32,10 @@ def batch_iterator(batch_size: int, *args) -> Generator[List[Any], None, None]:
def calculate_stability_score(masks: torch.Tensor, mask_threshold: float, threshold_offset: float) -> torch.Tensor:
"""
Computes the stability score for a batch of masks. The stability
score is the IoU between the binary masks obtained by thresholding
the predicted mask logits at high and low values.
Computes the stability score for a batch of masks.
The stability score is the IoU between the binary masks obtained by thresholding the predicted mask logits at high
and low values.
"""
# One mask is always contained inside the other.
# Save memory by preventing unnecessary cast to torch.int64
@ -60,7 +61,11 @@ def build_all_layer_point_grids(n_per_side: int, n_layers: int, scale_per_layer:
def generate_crop_boxes(im_size: Tuple[int, ...], n_layers: int,
overlap_ratio: float) -> Tuple[List[List[int]], List[int]]:
"""Generates a list of crop boxes of different sizes. Each layer has (2**i)**2 boxes for the ith layer."""
"""
Generates a list of crop boxes of different sizes.
Each layer has (2**i)**2 boxes for the ith layer.
"""
crop_boxes, layer_idxs = [], []
im_h, im_w = im_size
short_side = min(im_h, im_w)
@ -145,8 +150,9 @@ def remove_small_regions(mask: np.ndarray, area_thresh: float, mode: str) -> Tup
def batched_mask_to_box(masks: torch.Tensor) -> torch.Tensor:
"""
Calculates boxes in XYXY format around masks. Return [0,0,0,0] for
an empty mask. For input shape C1xC2x...xHxW, the output shape is C1xC2x...x4.
Calculates boxes in XYXY format around masks.
Return [0,0,0,0] for an empty mask. For input shape C1xC2x...xHxW, the output shape is C1xC2x...x4.
"""
# torch.max below raises an error on empty inputs, just skip in this case
if torch.numel(masks) == 0:

@ -1,7 +1,5 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
"""
SAM model interface
"""
"""SAM model interface."""
from pathlib import Path
@ -13,16 +11,16 @@ from .predict import Predictor
class SAM(Model):
"""
SAM model interface.
"""
"""SAM model interface."""
def __init__(self, model='sam_b.pt') -> None:
"""Initializes the SAM model instance with the specified pre-trained model file."""
if model and Path(model).suffix not in ('.pt', '.pth'):
raise NotImplementedError('SAM prediction requires pre-trained *.pt or *.pth model.')
super().__init__(model=model, task='segment')
def _load(self, weights: str, task=None):
"""Loads the provided weights into the SAM model."""
self.model = build_sam(weights)
def predict(self, source, stream=False, bboxes=None, points=None, labels=None, **kwargs):
@ -48,4 +46,5 @@ class SAM(Model):
@property
def task_map(self):
"""Returns a dictionary mapping the 'segment' task to its corresponding 'Predictor'."""
return {'segment': {'predictor': Predictor}}

@ -98,7 +98,11 @@ class MaskDecoder(nn.Module):
sparse_prompt_embeddings: torch.Tensor,
dense_prompt_embeddings: torch.Tensor,
) -> Tuple[torch.Tensor, torch.Tensor]:
"""Predicts masks. See 'forward' for more details."""
"""
Predicts masks.
See 'forward' for more details.
"""
# Concatenate output tokens
output_tokens = torch.cat([self.iou_token.weight, self.mask_tokens.weight], dim=0)
output_tokens = output_tokens.unsqueeze(0).expand(sparse_prompt_embeddings.size(0), -1, -1)

@ -100,6 +100,9 @@ class ImageEncoderViT(nn.Module):
)
def forward(self, x: torch.Tensor) -> torch.Tensor:
"""Processes input through patch embedding, applies positional embedding if present, and passes through blocks
and neck.
"""
x = self.patch_embed(x)
if self.pos_embed is not None:
x = x + self.pos_embed
@ -157,8 +160,8 @@ class PromptEncoder(nn.Module):
def get_dense_pe(self) -> torch.Tensor:
"""
Returns the positional encoding used to encode point prompts,
applied to a dense set of points the shape of the image encoding.
Returns the positional encoding used to encode point prompts, applied to a dense set of points the shape of the
image encoding.
Returns:
torch.Tensor: Positional encoding with shape 1x(embed_dim)x(embedding_h)x(embedding_w)
@ -204,9 +207,7 @@ class PromptEncoder(nn.Module):
boxes: Optional[torch.Tensor],
masks: Optional[torch.Tensor],
) -> int:
"""
Gets the batch size of the output given the batch size of the input prompts.
"""
"""Gets the batch size of the output given the batch size of the input prompts."""
if points is not None:
return points[0].shape[0]
elif boxes is not None:
@ -217,6 +218,7 @@ class PromptEncoder(nn.Module):
return 1
def _get_device(self) -> torch.device:
"""Returns the device of the first point embedding's weight tensor."""
return self.point_embeddings[0].weight.device
def forward(
@ -259,11 +261,10 @@ class PromptEncoder(nn.Module):
class PositionEmbeddingRandom(nn.Module):
"""
Positional encoding using random spatial frequencies.
"""
"""Positional encoding using random spatial frequencies."""
def __init__(self, num_pos_feats: int = 64, scale: Optional[float] = None) -> None:
"""Initializes a position embedding using random spatial frequencies."""
super().__init__()
if scale is None or scale <= 0.0:
scale = 1.0
@ -304,7 +305,7 @@ class PositionEmbeddingRandom(nn.Module):
class Block(nn.Module):
"""Transformer blocks with support of window attention and residual propagation blocks"""
"""Transformer blocks with support of window attention and residual propagation blocks."""
def __init__(
self,
@ -351,6 +352,7 @@ class Block(nn.Module):
self.window_size = window_size
def forward(self, x: torch.Tensor) -> torch.Tensor:
"""Executes a forward pass through the transformer block with window attention and non-overlapping windows."""
shortcut = x
x = self.norm1(x)
# Window partition
@ -404,6 +406,7 @@ class Attention(nn.Module):
self.rel_pos_w = nn.Parameter(torch.zeros(2 * input_size[1] - 1, head_dim))
def forward(self, x: torch.Tensor) -> torch.Tensor:
"""Applies the forward operation including attention, normalization, MLP, and indexing within window limits."""
B, H, W, _ = x.shape
# qkv with shape (3, B, nHead, H * W, C)
qkv = self.qkv(x).reshape(B, H * W, 3, self.num_heads, -1).permute(2, 0, 3, 1, 4)
@ -448,6 +451,7 @@ def window_unpartition(windows: torch.Tensor, window_size: int, pad_hw: Tuple[in
hw: Tuple[int, int]) -> torch.Tensor:
"""
Window unpartition into original sequences and removing padding.
Args:
windows (tensor): input tokens with [B * num_windows, window_size, window_size, C].
window_size (int): window size.
@ -540,9 +544,7 @@ def add_decomposed_rel_pos(
class PatchEmbed(nn.Module):
"""
Image to Patch Embedding.
"""
"""Image to Patch Embedding."""
def __init__(
self,
@ -565,4 +567,5 @@ class PatchEmbed(nn.Module):
self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=kernel_size, stride=stride, padding=padding)
def forward(self, x: torch.Tensor) -> torch.Tensor:
"""Computes patch embedding by applying convolution and transposing resulting tensor."""
return self.proj(x).permute(0, 2, 3, 1) # B C H W -> B H W C

@ -23,6 +23,9 @@ from ultralytics.utils.instance import to_2tuple
class Conv2d_BN(torch.nn.Sequential):
def __init__(self, a, b, ks=1, stride=1, pad=0, dilation=1, groups=1, bn_weight_init=1):
"""Initializes the MBConv model with given input channels, output channels, expansion ratio, activation, and
drop path.
"""
super().__init__()
self.add_module('c', torch.nn.Conv2d(a, b, ks, stride, pad, dilation, groups, bias=False))
bn = torch.nn.BatchNorm2d(b)
@ -34,6 +37,9 @@ class Conv2d_BN(torch.nn.Sequential):
class PatchEmbed(nn.Module):
def __init__(self, in_chans, embed_dim, resolution, activation):
"""Initialize the PatchMerging class with specified input, output dimensions, resolution and activation
function.
"""
super().__init__()
img_size: Tuple[int, int] = to_2tuple(resolution)
self.patches_resolution = (img_size[0] // 4, img_size[1] // 4)
@ -48,12 +54,16 @@ class PatchEmbed(nn.Module):
)
def forward(self, x):
"""Runs input tensor 'x' through the PatchMerging model's sequence of operations."""
return self.seq(x)
class MBConv(nn.Module):
def __init__(self, in_chans, out_chans, expand_ratio, activation, drop_path):
"""Initializes a convolutional layer with specified dimensions, input resolution, depth, and activation
function.
"""
super().__init__()
self.in_chans = in_chans
self.hidden_chans = int(in_chans * expand_ratio)
@ -73,6 +83,7 @@ class MBConv(nn.Module):
self.drop_path = nn.Identity()
def forward(self, x):
"""Implements the forward pass for the model architecture."""
shortcut = x
x = self.conv1(x)
x = self.act1(x)
@ -87,6 +98,9 @@ class MBConv(nn.Module):
class PatchMerging(nn.Module):
def __init__(self, input_resolution, dim, out_dim, activation):
"""Initializes the ConvLayer with specific dimension, input resolution, depth, activation, drop path, and other
optional parameters.
"""
super().__init__()
self.input_resolution = input_resolution
@ -99,6 +113,7 @@ class PatchMerging(nn.Module):
self.conv3 = Conv2d_BN(out_dim, out_dim, 1, 1, 0)
def forward(self, x):
"""Applies forward pass on the input utilizing convolution and activation layers, and returns the result."""
if x.ndim == 3:
H, W = self.input_resolution
B = len(x)
@ -149,6 +164,7 @@ class ConvLayer(nn.Module):
input_resolution, dim=dim, out_dim=out_dim, activation=activation)
def forward(self, x):
"""Processes the input through a series of convolutional layers and returns the activated output."""
for blk in self.blocks:
x = checkpoint.checkpoint(blk, x) if self.use_checkpoint else blk(x)
return x if self.downsample is None else self.downsample(x)
@ -157,6 +173,7 @@ class ConvLayer(nn.Module):
class Mlp(nn.Module):
def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
"""Initializes Attention module with the given parameters including dimension, key_dim, number of heads, etc."""
super().__init__()
out_features = out_features or in_features
hidden_features = hidden_features or in_features
@ -167,6 +184,7 @@ class Mlp(nn.Module):
self.drop = nn.Dropout(drop)
def forward(self, x):
"""Applies operations on input x and returns modified x, runs downsample if not None."""
x = self.norm(x)
x = self.fc1(x)
x = self.act(x)
@ -216,6 +234,7 @@ class Attention(torch.nn.Module):
@torch.no_grad()
def train(self, mode=True):
"""Sets the module in training mode and handles attribute 'ab' based on the mode."""
super().train(mode)
if mode and hasattr(self, 'ab'):
del self.ab
@ -298,6 +317,9 @@ class TinyViTBlock(nn.Module):
self.local_conv = Conv2d_BN(dim, dim, ks=local_conv_size, stride=1, pad=pad, groups=dim)
def forward(self, x):
"""Applies attention-based transformation or padding to input 'x' before passing it through a local
convolution.
"""
H, W = self.input_resolution
B, L, C = x.shape
assert L == H * W, 'input feature has wrong size'
@ -337,6 +359,9 @@ class TinyViTBlock(nn.Module):
return x + self.drop_path(self.mlp(x))
def extra_repr(self) -> str:
"""Returns a formatted string representing the TinyViTBlock's parameters: dimension, input resolution, number of
attentions heads, window size, and MLP ratio.
"""
return f'dim={self.dim}, input_resolution={self.input_resolution}, num_heads={self.num_heads}, ' \
f'window_size={self.window_size}, mlp_ratio={self.mlp_ratio}'
@ -402,23 +427,28 @@ class BasicLayer(nn.Module):
input_resolution, dim=dim, out_dim=out_dim, activation=activation)
def forward(self, x):
"""Performs forward propagation on the input tensor and returns a normalized tensor."""
for blk in self.blocks:
x = checkpoint.checkpoint(blk, x) if self.use_checkpoint else blk(x)
return x if self.downsample is None else self.downsample(x)
def extra_repr(self) -> str:
"""Returns a string representation of the extra_repr function with the layer's parameters."""
return f'dim={self.dim}, input_resolution={self.input_resolution}, depth={self.depth}'
class LayerNorm2d(nn.Module):
"""A PyTorch implementation of Layer Normalization in 2D."""
def __init__(self, num_channels: int, eps: float = 1e-6) -> None:
"""Initialize LayerNorm2d with the number of channels and an optional epsilon."""
super().__init__()
self.weight = nn.Parameter(torch.ones(num_channels))
self.bias = nn.Parameter(torch.zeros(num_channels))
self.eps = eps
def forward(self, x: torch.Tensor) -> torch.Tensor:
"""Perform a forward pass, normalizing the input tensor."""
u = x.mean(1, keepdim=True)
s = (x - u).pow(2).mean(1, keepdim=True)
x = (x - u) / torch.sqrt(s + self.eps)
@ -518,6 +548,7 @@ class TinyViT(nn.Module):
)
def set_layer_lr_decay(self, layer_lr_decay):
"""Sets the learning rate decay for each layer in the TinyViT model."""
decay_rate = layer_lr_decay
# layers -> blocks (depth)
@ -525,6 +556,7 @@ class TinyViT(nn.Module):
lr_scales = [decay_rate ** (depth - i - 1) for i in range(depth)]
def _set_lr_scale(m, scale):
"""Sets the learning rate scale for each layer in the model based on the layer's depth."""
for p in m.parameters():
p.lr_scale = scale
@ -544,12 +576,14 @@ class TinyViT(nn.Module):
p.param_name = k
def _check_lr_scale(m):
"""Checks if the learning rate scale attribute is present in module's parameters."""
for p in m.parameters():
assert hasattr(p, 'lr_scale'), p.param_name
self.apply(_check_lr_scale)
def _init_weights(self, m):
"""Initializes weights for linear layers and layer normalization in the given module."""
if isinstance(m, nn.Linear):
# NOTE: This initialization is needed only for training.
# trunc_normal_(m.weight, std=.02)
@ -561,11 +595,12 @@ class TinyViT(nn.Module):
@torch.jit.ignore
def no_weight_decay_keywords(self):
"""Returns a dictionary of parameter names where weight decay should not be applied."""
return {'attention_biases'}
def forward_features(self, x):
# x: (N, C, H, W)
x = self.patch_embed(x)
"""Runs the input through the model layers and returns the transformed output."""
x = self.patch_embed(x) # x input is (N, C, H, W)
x = self.layers[0](x)
start_i = 1
@ -579,4 +614,5 @@ class TinyViT(nn.Module):
return self.neck(x)
def forward(self, x):
"""Executes a forward pass on the input tensor through the constructed model layers."""
return self.forward_features(x)

@ -21,8 +21,7 @@ class TwoWayTransformer(nn.Module):
attention_downsample_rate: int = 2,
) -> None:
"""
A transformer decoder that attends to an input image using
queries whose positional embedding is supplied.
A transformer decoder that attends to an input image using queries whose positional embedding is supplied.
Args:
depth (int): number of layers in the transformer
@ -171,8 +170,7 @@ class TwoWayAttentionBlock(nn.Module):
class Attention(nn.Module):
"""
An attention layer that allows for downscaling the size of the embedding after projection to queries, keys, and
"""An attention layer that allows for downscaling the size of the embedding after projection to queries, keys, and
values.
"""

@ -19,6 +19,7 @@ from .build import build_sam
class Predictor(BasePredictor):
def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
"""Initializes the Predictor class with default or provided configuration, overrides, and callbacks."""
if overrides is None:
overrides = {}
overrides.update(dict(task='segment', mode='predict', imgsz=1024))
@ -34,7 +35,8 @@ class Predictor(BasePredictor):
self.segment_all = False
def preprocess(self, im):
"""Prepares input image before inference.
"""
Prepares input image before inference.
Args:
im (torch.Tensor | List(np.ndarray)): BCHW for tensor, [(HWC) x B] for list.
@ -189,7 +191,8 @@ class Predictor(BasePredictor):
stability_score_thresh=0.95,
stability_score_offset=0.95,
crop_nms_thresh=0.7):
"""Segment the whole image.
"""
Segment the whole image.
Args:
im (torch.Tensor): The preprocessed image, (N, C, H, W).
@ -360,14 +363,15 @@ class Predictor(BasePredictor):
self.prompts = prompts
def reset_image(self):
"""Resets the image and its features to None."""
self.im = None
self.features = None
@staticmethod
def remove_small_regions(masks, min_area=0, nms_thresh=0.7):
"""
Removes small disconnected regions and holes in masks, then reruns
box NMS to remove any new duplicates. Requires open-cv as a dependency.
Removes small disconnected regions and holes in masks, then reruns box NMS to remove any new duplicates.
Requires open-cv as a dependency.
Args:
masks (torch.Tensor): Masks, (N, H, W).

@ -47,6 +47,7 @@ class DETRLoss(nn.Module):
self.device = None
def _get_loss_class(self, pred_scores, targets, gt_scores, num_gts, postfix=''):
"""Computes the classification loss based on predictions, target values, and ground truth scores."""
# logits: [b, query, num_classes], gt_class: list[[n, 1]]
name_class = f'loss_class{postfix}'
bs, nq = pred_scores.shape[:2]
@ -68,6 +69,9 @@ class DETRLoss(nn.Module):
return {name_class: loss_cls.squeeze() * self.loss_gain['class']}
def _get_loss_bbox(self, pred_bboxes, gt_bboxes, postfix=''):
"""Calculates and returns the bounding box loss and GIoU loss for the predicted and ground truth bounding
boxes.
"""
# boxes: [b, query, 4], gt_bbox: list[[n, 4]]
name_bbox = f'loss_bbox{postfix}'
name_giou = f'loss_giou{postfix}'
@ -125,7 +129,7 @@ class DETRLoss(nn.Module):
postfix='',
masks=None,
gt_mask=None):
"""Get auxiliary losses"""
"""Get auxiliary losses."""
# NOTE: loss class, bbox, giou, mask, dice
loss = torch.zeros(5 if masks is not None else 3, device=pred_bboxes.device)
if match_indices is None and self.use_uni_match:
@ -166,12 +170,14 @@ class DETRLoss(nn.Module):
@staticmethod
def _get_index(match_indices):
"""Returns batch indices, source indices, and destination indices from provided match indices."""
batch_idx = torch.cat([torch.full_like(src, i) for i, (src, _) in enumerate(match_indices)])
src_idx = torch.cat([src for (src, _) in match_indices])
dst_idx = torch.cat([dst for (_, dst) in match_indices])
return (batch_idx, src_idx), dst_idx
def _get_assigned_bboxes(self, pred_bboxes, gt_bboxes, match_indices):
"""Assigns predicted bounding boxes to ground truth bounding boxes based on the match indices."""
pred_assigned = torch.cat([
t[I] if len(I) > 0 else torch.zeros(0, t.shape[-1], device=self.device)
for t, (I, _) in zip(pred_bboxes, match_indices)])
@ -190,7 +196,7 @@ class DETRLoss(nn.Module):
gt_mask=None,
postfix='',
match_indices=None):
"""Get losses"""
"""Get losses."""
if match_indices is None:
match_indices = self.matcher(pred_bboxes,
pred_scores,
@ -250,22 +256,43 @@ class DETRLoss(nn.Module):
class RTDETRDetectionLoss(DETRLoss):
"""
Real-Time DeepTracker (RT-DETR) Detection Loss class that extends the DETRLoss.
This class computes the detection loss for the RT-DETR model, which includes the standard detection loss as well as
an additional denoising training loss when provided with denoising metadata.
"""
def forward(self, preds, batch, dn_bboxes=None, dn_scores=None, dn_meta=None):
"""
Forward pass to compute the detection loss.
Args:
preds (tuple): Predicted bounding boxes and scores.
batch (dict): Batch data containing ground truth information.
dn_bboxes (torch.Tensor, optional): Denoising bounding boxes. Default is None.
dn_scores (torch.Tensor, optional): Denoising scores. Default is None.
dn_meta (dict, optional): Metadata for denoising. Default is None.
Returns:
(dict): Dictionary containing the total loss and, if applicable, the denoising loss.
"""
pred_bboxes, pred_scores = preds
total_loss = super().forward(pred_bboxes, pred_scores, batch)
# Check for denoising metadata to compute denoising training loss
if dn_meta is not None:
dn_pos_idx, dn_num_group = dn_meta['dn_pos_idx'], dn_meta['dn_num_group']
assert len(batch['gt_groups']) == len(dn_pos_idx)
# Denoising match indices
# Get the match indices for denoising
match_indices = self.get_dn_match_indices(dn_pos_idx, dn_num_group, batch['gt_groups'])
# Compute denoising training loss
# Compute the denoising training loss
dn_loss = super().forward(dn_bboxes, dn_scores, batch, postfix='_dn', match_indices=match_indices)
total_loss.update(dn_loss)
else:
# If no denoising metadata is provided, set denoising loss to zero
total_loss.update({f'{k}_dn': torch.tensor(0., device=self.device) for k in total_loss.keys()})
return total_loss
@ -276,12 +303,12 @@ class RTDETRDetectionLoss(DETRLoss):
Get the match indices for denoising.
Args:
dn_pos_idx (List[torch.Tensor]): A list includes positive indices of denoising.
dn_num_group (int): The number of groups of denoising.
gt_groups (List(int)): a list of batch size length includes the number of gts of each image.
dn_pos_idx (List[torch.Tensor]): List of tensors containing positive indices for denoising.
dn_num_group (int): Number of denoising groups.
gt_groups (List[int]): List of integers representing the number of ground truths for each image.
Returns:
dn_match_indices (List(tuple)): Matched indices.
(List[tuple]): List of tuples containing matched indices for denoising.
"""
dn_match_indices = []
idx_groups = torch.as_tensor([0, *gt_groups[:-1]]).cumsum_(0)

@ -11,8 +11,8 @@ from ultralytics.utils.ops import xywh2xyxy, xyxy2xywh
class HungarianMatcher(nn.Module):
"""
A module implementing the HungarianMatcher, which is a differentiable module to solve the assignment problem in
an end-to-end fashion.
A module implementing the HungarianMatcher, which is a differentiable module to solve the assignment problem in an
end-to-end fashion.
HungarianMatcher performs optimal assignment over the predicted and ground truth bounding boxes using a cost
function that considers classification scores, bounding box coordinates, and optionally, mask predictions.
@ -32,6 +32,9 @@ class HungarianMatcher(nn.Module):
"""
def __init__(self, cost_gain=None, use_fl=True, with_mask=False, num_sample_points=12544, alpha=0.25, gamma=2.0):
"""Initializes HungarianMatcher with cost coefficients, Focal Loss, mask prediction, sample points, and alpha
gamma factors.
"""
super().__init__()
if cost_gain is None:
cost_gain = {'class': 1, 'bbox': 5, 'giou': 2, 'mask': 1, 'dice': 1}
@ -45,8 +48,8 @@ class HungarianMatcher(nn.Module):
def forward(self, pred_bboxes, pred_scores, gt_bboxes, gt_cls, gt_groups, masks=None, gt_mask=None):
"""
Forward pass for HungarianMatcher. This function computes costs based on prediction and ground truth
(classification cost, L1 cost between boxes and GIoU cost between boxes) and finds the optimal matching
between predictions and ground truth based on these costs.
(classification cost, L1 cost between boxes and GIoU cost between boxes) and finds the optimal matching between
predictions and ground truth based on these costs.
Args:
pred_bboxes (Tensor): Predicted bounding boxes with shape [batch_size, num_queries, 4].
@ -153,9 +156,9 @@ def get_cdn_group(batch,
box_noise_scale=1.0,
training=False):
"""
Get contrastive denoising training group. This function creates a contrastive denoising training group with
positive and negative samples from the ground truths (gt). It applies noise to the class labels and bounding
box coordinates, and returns the modified labels, bounding boxes, attention mask and meta information.
Get contrastive denoising training group. This function creates a contrastive denoising training group with positive
and negative samples from the ground truths (gt). It applies noise to the class labels and bounding box coordinates,
and returns the modified labels, bounding boxes, attention mask and meta information.
Args:
batch (dict): A dict that includes 'gt_cls' (torch.Tensor with shape [num_gts, ]), 'gt_bboxes'
@ -191,12 +194,12 @@ def get_cdn_group(batch,
gt_bbox = batch['bboxes'] # bs*num, 4
b_idx = batch['batch_idx']
# each group has positive and negative queries.
# Each group has positive and negative queries.
dn_cls = gt_cls.repeat(2 * num_group) # (2*num_group*bs*num, )
dn_bbox = gt_bbox.repeat(2 * num_group, 1) # 2*num_group*bs*num, 4
dn_b_idx = b_idx.repeat(2 * num_group).view(-1) # (2*num_group*bs*num, )
# positive and negative mask
# Positive and negative mask
# (bs*num*num_group, ), the second total_num*num_group part as negative samples
neg_idx = torch.arange(total_num * num_group, dtype=torch.long, device=gt_bbox.device) + num_group * total_num
@ -220,10 +223,9 @@ def get_cdn_group(batch,
known_bbox += rand_part * diff
known_bbox.clip_(min=0.0, max=1.0)
dn_bbox = xyxy2xywh(known_bbox)
dn_bbox = inverse_sigmoid(dn_bbox)
dn_bbox = torch.logit(dn_bbox, eps=1e-6) # inverse sigmoid
# total denoising queries
num_dn = int(max_nums * 2 * num_group)
num_dn = int(max_nums * 2 * num_group) # total denoising queries
# class_embed = torch.cat([class_embed, torch.zeros([1, class_embed.shape[-1]], device=class_embed.device)])
dn_cls_embed = class_embed[dn_cls] # bs*num * 2 * num_group, 256
padding_cls = torch.zeros(bs, num_dn, dn_cls_embed.shape[-1], device=gt_cls.device)
@ -256,9 +258,3 @@ def get_cdn_group(batch,
return padding_cls.to(class_embed.device), padding_bbox.to(class_embed.device), attn_mask.to(
class_embed.device), dn_meta
def inverse_sigmoid(x, eps=1e-6):
"""Inverse sigmoid function."""
x = x.clip(min=0., max=1.)
return torch.log(x / (1 - x + eps) + eps)

@ -26,6 +26,7 @@ class ClassificationPredictor(BasePredictor):
"""
def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
"""Initializes ClassificationPredictor setting the task to 'classify'."""
super().__init__(cfg, overrides, _callbacks)
self.args.task = 'classify'

@ -79,6 +79,7 @@ class ClassificationTrainer(BaseTrainer):
return ckpt
def build_dataset(self, img_path, mode='train', batch=None):
"""Creates a ClassificationDataset instance given an image path, and mode (train/test etc.)."""
return ClassificationDataset(root=img_path, args=self.args, augment=mode == 'train', prefix=mode)
def get_dataloader(self, dataset_path, batch_size=16, rank=0, mode='train'):
@ -113,8 +114,9 @@ class ClassificationTrainer(BaseTrainer):
def label_loss_items(self, loss_items=None, prefix='train'):
"""
Returns a loss dict with labelled training loss items tensor. Not needed for classification but necessary for
segmentation & detection
Returns a loss dict with labelled training loss items tensor.
Not needed for classification but necessary for segmentation & detection
"""
keys = [f'{prefix}/{x}' for x in self.loss_names]
if loss_items is None:

@ -78,6 +78,7 @@ class ClassificationValidator(BaseValidator):
return self.metrics.results_dict
def build_dataset(self, img_path):
"""Creates and returns a ClassificationDataset instance using given image path and preprocessing parameters."""
return ClassificationDataset(root=img_path, args=self.args, augment=False, prefix=self.args.split)
def get_dataloader(self, dataset_path, batch_size):

@ -57,7 +57,7 @@ class DetectionTrainer(BaseTrainer):
return batch
def set_model_attributes(self):
"""nl = de_parallel(self.model).model[-1].nl # number of detection layers (to scale hyps)."""
"""Nl = de_parallel(self.model).model[-1].nl # number of detection layers (to scale hyps)."""
# self.args.box *= 3 / nl # scale to layers
# self.args.cls *= self.data["nc"] / 80 * 3 / nl # scale to classes and layers
# self.args.cls *= (self.args.imgsz / 640) ** 2 * 3 / nl # scale to image size and layers
@ -80,8 +80,9 @@ class DetectionTrainer(BaseTrainer):
def label_loss_items(self, loss_items=None, prefix='train'):
"""
Returns a loss dict with labelled training loss items tensor. Not needed for classification but necessary for
segmentation & detection
Returns a loss dict with labelled training loss items tensor.
Not needed for classification but necessary for segmentation & detection
"""
keys = [f'{prefix}/{x}' for x in self.loss_names]
if loss_items is not None:

@ -6,13 +6,11 @@ from ultralytics.nn.tasks import ClassificationModel, DetectionModel, PoseModel,
class YOLO(Model):
"""
YOLO (You Only Look Once) object detection model.
"""
"""YOLO (You Only Look Once) object detection model."""
@property
def task_map(self):
"""Map head to model, trainer, validator, and predictor classes"""
"""Map head to model, trainer, validator, and predictor classes."""
return {
'classify': {
'model': ClassificationModel,

@ -21,6 +21,7 @@ class PosePredictor(DetectionPredictor):
"""
def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
"""Initializes PosePredictor, sets task to 'pose' and logs a warning for using 'mps' as device."""
super().__init__(cfg, overrides, _callbacks)
self.args.task = 'pose'
if isinstance(self.args.device, str) and self.args.device.lower() == 'mps':

@ -21,10 +21,12 @@ class SegmentationPredictor(DetectionPredictor):
"""
def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
"""Initializes the SegmentationPredictor with the provided configuration, overrides, and callbacks."""
super().__init__(cfg, overrides, _callbacks)
self.args.task = 'segment'
def postprocess(self, preds, img, orig_imgs):
"""Applies non-max suppression and processes detections for each image in an input batch."""
p = ops.non_max_suppression(preds[0],
self.args.conf,
self.args.iou,

@ -144,7 +144,7 @@ class SegmentationValidator(DetectionValidator):
def _process_batch(self, detections, labels, pred_masks=None, gt_masks=None, overlap=False, masks=False):
"""
Return correct prediction matrix
Return correct prediction matrix.
Args:
detections (array[N, 6]), x1, y1, x2, y2, conf, class

@ -20,7 +20,11 @@ from ultralytics.utils.downloads import attempt_download_asset, is_url
def check_class_names(names):
"""Check class names. Map imagenet class codes to human-readable names if required. Convert lists to dicts."""
"""
Check class names.
Map imagenet class codes to human-readable names if required. Convert lists to dicts.
"""
if isinstance(names, list): # names is a list
names = dict(enumerate(names)) # convert to dict
if isinstance(names, dict):
@ -37,36 +41,20 @@ def check_class_names(names):
class AutoBackend(nn.Module):
"""
Handles dynamic backend selection for running inference using Ultralytics YOLO models.
@torch.no_grad()
def __init__(self,
weights='yolov8n.pt',
device=torch.device('cpu'),
dnn=False,
data=None,
fp16=False,
fuse=True,
verbose=True):
"""
MultiBackend class for python inference on various platforms using Ultralytics YOLO.
The AutoBackend class is designed to provide an abstraction layer for various inference engines. It supports a wide
range of formats, each with specific naming conventions as outlined below:
Args:
weights (str): The path to the weights file. Default: 'yolov8n.pt'
device (torch.device): The device to run the model on.
dnn (bool): Use OpenCV DNN module for inference if True, defaults to False.
data (str | Path | optional): Additional data.yaml file for class names.
fp16 (bool): If True, use half precision. Default: False
fuse (bool): Whether to fuse the model or not. Default: True
verbose (bool): Whether to run in verbose mode or not. Default: True
Supported formats and their naming conventions:
| Format | Suffix |
Supported Formats and Naming Conventions:
| Format | File Suffix |
|-----------------------|------------------|
| PyTorch | *.pt |
| TorchScript | *.torchscript |
| ONNX Runtime | *.onnx |
| ONNX OpenCV DNN | *.onnx dnn=True |
| OpenVINO | *.xml |
| ONNX OpenCV DNN | *.onnx (dnn=True)|
| OpenVINO | *openvino_model/ |
| CoreML | *.mlpackage |
| TensorRT | *.engine |
| TensorFlow SavedModel | *_saved_model |
@ -75,6 +63,31 @@ class AutoBackend(nn.Module):
| TensorFlow Edge TPU | *_edgetpu.tflite |
| PaddlePaddle | *_paddle_model |
| ncnn | *_ncnn_model |
This class offers dynamic backend switching capabilities based on the input model format, making it easier to deploy
models across various platforms.
"""
@torch.no_grad()
def __init__(self,
weights='yolov8n.pt',
device=torch.device('cpu'),
dnn=False,
data=None,
fp16=False,
fuse=True,
verbose=True):
"""
Initialize the AutoBackend for inference.
Args:
weights (str): Path to the model weights file. Defaults to 'yolov8n.pt'.
device (torch.device): Device to run the model on. Defaults to CPU.
dnn (bool): Use OpenCV DNN module for ONNX inference. Defaults to False.
data (str | Path | optional): Path to the additional data.yaml file containing class names. Optional.
fp16 (bool): Enable half-precision inference. Supported only on specific backends. Defaults to False.
fuse (bool): Fuse Conv2D + BatchNorm layers for optimization. Defaults to True.
verbose (bool): Enable verbose logging. Defaults to True.
"""
super().__init__()
w = str(weights[0] if isinstance(weights, list) else weights)
@ -440,14 +453,14 @@ class AutoBackend(nn.Module):
def from_numpy(self, x):
"""
Convert a numpy array to a tensor.
Convert a numpy array to a tensor.
Args:
x (np.ndarray): The array to be converted.
Args:
x (np.ndarray): The array to be converted.
Returns:
(torch.Tensor): The converted tensor
"""
Returns:
(torch.Tensor): The converted tensor
"""
return torch.tensor(x).to(self.device) if isinstance(x, np.ndarray) else x
def warmup(self, imgsz=(1, 3, 640, 640)):
@ -476,7 +489,7 @@ class AutoBackend(nn.Module):
@staticmethod
def _model_type(p='path/to/model.pt'):
"""
This function takes a path to a model file and returns the model type
This function takes a path to a model file and returns the model type.
Args:
p: path to the model file. Defaults to path/to/model.pt

@ -1,16 +1,20 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
"""
Ultralytics modules. Visualize with:
Ultralytics modules.
from ultralytics.nn.modules import *
import torch
import os
Example:
Visualize a module with Netron.
```python
from ultralytics.nn.modules import *
import torch
import os
x = torch.ones(1, 128, 40, 40)
m = Conv(128, 128)
f = f'{m._get_name()}.onnx'
torch.onnx.export(m, x, f)
os.system(f'onnxsim {f} {f} && open {f}')
x = torch.ones(1, 128, 40, 40)
m = Conv(128, 128)
f = f'{m._get_name()}.onnx'
torch.onnx.export(m, x, f)
os.system(f'onnxsim {f} {f} && open {f}')
```
"""
from .block import (C1, C2, C3, C3TR, DFL, SPP, SPPF, Bottleneck, BottleneckCSP, C2f, C3Ghost, C3x, GhostBottleneck,

@ -1,7 +1,5 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
"""
Block modules
"""
"""Block modules."""
import torch
import torch.nn as nn
@ -17,6 +15,7 @@ __all__ = ('DFL', 'HGBlock', 'HGStem', 'SPP', 'SPPF', 'C1', 'C2', 'C3', 'C2f', '
class DFL(nn.Module):
"""
Integral module of Distribution Focal Loss (DFL).
Proposed in Generalized Focal Loss https://ieeexplore.ieee.org/document/9792391
"""
@ -51,11 +50,14 @@ class Proto(nn.Module):
class HGStem(nn.Module):
"""StemBlock of PPHGNetV2 with 5 convolutions and one maxpool2d.
"""
StemBlock of PPHGNetV2 with 5 convolutions and one maxpool2d.
https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py
"""
def __init__(self, c1, cm, c2):
"""Initialize the SPP layer with input/output channels and specified kernel sizes for max pooling."""
super().__init__()
self.stem1 = Conv(c1, cm, 3, 2, act=nn.ReLU())
self.stem2a = Conv(cm, cm // 2, 2, 1, 0, act=nn.ReLU())
@ -79,11 +81,14 @@ class HGStem(nn.Module):
class HGBlock(nn.Module):
"""HG_Block of PPHGNetV2 with 2 convolutions and LightConv.
"""
HG_Block of PPHGNetV2 with 2 convolutions and LightConv.
https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py
"""
def __init__(self, c1, cm, c2, k=3, n=6, lightconv=False, shortcut=False, act=nn.ReLU()):
"""Initializes a CSP Bottleneck with 1 convolution using specified input and output channels."""
super().__init__()
block = LightConv if lightconv else Conv
self.m = nn.ModuleList(block(c1 if i == 0 else cm, cm, k=k, act=act) for i in range(n))
@ -218,6 +223,7 @@ class RepC3(nn.Module):
"""Rep C3."""
def __init__(self, c1, c2, n=3, e=1.0):
"""Initialize CSP Bottleneck with a single convolution using input channels, output channels, and number."""
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c2, 1, 1)

@ -1,7 +1,5 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
"""
Convolution modules
"""
"""Convolution modules."""
import math
@ -69,7 +67,9 @@ class Conv2(Conv):
class LightConv(nn.Module):
"""Light convolution with args(ch_in, ch_out, kernel).
"""
Light convolution with args(ch_in, ch_out, kernel).
https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py
"""
@ -148,12 +148,15 @@ class GhostConv(nn.Module):
class RepConv(nn.Module):
"""
RepConv is a basic rep-style block, including training and deploy status. This module is used in RT-DETR.
RepConv is a basic rep-style block, including training and deploy status.
This module is used in RT-DETR.
Based on https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py
"""
default_act = nn.SiLU() # default activation
def __init__(self, c1, c2, k=3, s=1, p=1, g=1, d=1, act=True, bn=False, deploy=False):
"""Initializes Light Convolution layer with inputs, outputs & optional activation function."""
super().__init__()
assert k == 3 and p == 1
self.g = g
@ -166,27 +169,30 @@ class RepConv(nn.Module):
self.conv2 = Conv(c1, c2, 1, s, p=(p - k // 2), g=g, act=False)
def forward_fuse(self, x):
"""Forward process"""
"""Forward process."""
return self.act(self.conv(x))
def forward(self, x):
"""Forward process"""
"""Forward process."""
id_out = 0 if self.bn is None else self.bn(x)
return self.act(self.conv1(x) + self.conv2(x) + id_out)
def get_equivalent_kernel_bias(self):
"""Returns equivalent kernel and bias by adding 3x3 kernel, 1x1 kernel and identity kernel with their biases."""
kernel3x3, bias3x3 = self._fuse_bn_tensor(self.conv1)
kernel1x1, bias1x1 = self._fuse_bn_tensor(self.conv2)
kernelid, biasid = self._fuse_bn_tensor(self.bn)
return kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1) + kernelid, bias3x3 + bias1x1 + biasid
def _pad_1x1_to_3x3_tensor(self, kernel1x1):
"""Pads a 1x1 tensor to a 3x3 tensor."""
if kernel1x1 is None:
return 0
else:
return torch.nn.functional.pad(kernel1x1, [1, 1, 1, 1])
def _fuse_bn_tensor(self, branch):
"""Generates appropriate kernels and biases for convolution by fusing branches of the neural network."""
if branch is None:
return 0, 0
if isinstance(branch, Conv):
@ -214,6 +220,7 @@ class RepConv(nn.Module):
return kernel * t, beta - running_mean * gamma / std
def fuse_convs(self):
"""Combines two convolution layers into a single layer and removes unused attributes from the class."""
if hasattr(self, 'conv'):
return
kernel, bias = self.get_equivalent_kernel_bias()
@ -243,12 +250,14 @@ class ChannelAttention(nn.Module):
"""Channel-attention module https://github.com/open-mmlab/mmdetection/tree/v3.0.0rc1/configs/rtmdet."""
def __init__(self, channels: int) -> None:
"""Initializes the class and sets the basic configurations and instance variables required."""
super().__init__()
self.pool = nn.AdaptiveAvgPool2d(1)
self.fc = nn.Conv2d(channels, channels, 1, 1, 0, bias=True)
self.act = nn.Sigmoid()
def forward(self, x: torch.Tensor) -> torch.Tensor:
"""Applies forward pass using activation on convolutions of the input, optionally using batch normalization."""
return x * self.act(self.fc(self.pool(x)))

@ -1,7 +1,5 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
"""
Model head modules
"""
"""Model head modules."""
import math
@ -229,6 +227,7 @@ class RTDETRDecoder(nn.Module):
self._reset_parameters()
def forward(self, x, batch=None):
"""Runs the forward pass of the module, returning bounding box and classification scores for the input."""
from ultralytics.models.utils.ops import get_cdn_group
# input projection and embedding
@ -265,6 +264,7 @@ class RTDETRDecoder(nn.Module):
return y if self.export else (y, x)
def _generate_anchors(self, shapes, grid_size=0.05, dtype=torch.float32, device='cpu', eps=1e-2):
"""Generates anchor bounding boxes for given shapes with specific grid size and validates them."""
anchors = []
for i, (h, w) in enumerate(shapes):
sy = torch.arange(end=h, dtype=dtype, device=device)
@ -284,6 +284,7 @@ class RTDETRDecoder(nn.Module):
return anchors, valid_mask
def _get_encoder_input(self, x):
"""Processes and returns encoder inputs by getting projection features from input and concatenating them."""
# get projection features
x = [self.input_proj[i](feat) for i, feat in enumerate(x)]
# get encoder inputs
@ -301,6 +302,7 @@ class RTDETRDecoder(nn.Module):
return feats, shapes
def _get_decoder_input(self, feats, shapes, dn_embed=None, dn_bbox=None):
"""Generates and prepares the input required for the decoder from the provided features and shapes."""
bs = len(feats)
# prepare input for decoder
anchors, valid_mask = self._generate_anchors(shapes, dtype=feats.dtype, device=feats.device)
@ -339,6 +341,7 @@ class RTDETRDecoder(nn.Module):
# TODO
def _reset_parameters(self):
"""Initializes or resets the parameters of the model's various components with predefined weights and biases."""
# class and bbox head init
bias_cls = bias_init_with_prob(0.01) / 80 * self.nc
# NOTE: the weight initialization in `linear_init_` would cause NaN when training with custom datasets.

@ -1,7 +1,5 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
"""
Transformer modules
"""
"""Transformer modules."""
import math
@ -18,9 +16,10 @@ __all__ = ('TransformerEncoderLayer', 'TransformerLayer', 'TransformerBlock', 'M
class TransformerEncoderLayer(nn.Module):
"""Transformer Encoder."""
"""Defines a single layer of the transformer encoder."""
def __init__(self, c1, cm=2048, num_heads=8, dropout=0.0, act=nn.GELU(), normalize_before=False):
"""Initialize the TransformerEncoderLayer with specified parameters."""
super().__init__()
from ...utils.torch_utils import TORCH_1_9
if not TORCH_1_9:
@ -41,10 +40,11 @@ class TransformerEncoderLayer(nn.Module):
self.normalize_before = normalize_before
def with_pos_embed(self, tensor, pos=None):
"""Add position embeddings if given."""
"""Add position embeddings to the tensor if provided."""
return tensor if pos is None else tensor + pos
def forward_post(self, src, src_mask=None, src_key_padding_mask=None, pos=None):
"""Performs forward pass with post-normalization."""
q = k = self.with_pos_embed(src, pos)
src2 = self.ma(q, k, value=src, attn_mask=src_mask, key_padding_mask=src_key_padding_mask)[0]
src = src + self.dropout1(src2)
@ -54,6 +54,7 @@ class TransformerEncoderLayer(nn.Module):
return self.norm2(src)
def forward_pre(self, src, src_mask=None, src_key_padding_mask=None, pos=None):
"""Performs forward pass with pre-normalization."""
src2 = self.norm1(src)
q = k = self.with_pos_embed(src2, pos)
src2 = self.ma(q, k, value=src2, attn_mask=src_mask, key_padding_mask=src_key_padding_mask)[0]
@ -70,11 +71,14 @@ class TransformerEncoderLayer(nn.Module):
class AIFI(TransformerEncoderLayer):
"""Defines the AIFI transformer layer."""
def __init__(self, c1, cm=2048, num_heads=8, dropout=0, act=nn.GELU(), normalize_before=False):
"""Initialize the AIFI instance with specified parameters."""
super().__init__(c1, cm, num_heads, dropout, act, normalize_before)
def forward(self, x):
"""Forward pass for the AIFI transformer layer."""
c, h, w = x.shape[1:]
pos_embed = self.build_2d_sincos_position_embedding(w, h, c)
# flatten [B, C, H, W] to [B, HxW, C]
@ -82,7 +86,8 @@ class AIFI(TransformerEncoderLayer):
return x.permute(0, 2, 1).view([-1, c, h, w]).contiguous()
@staticmethod
def build_2d_sincos_position_embedding(w, h, embed_dim=256, temperature=10000.):
def build_2d_sincos_position_embedding(w, h, embed_dim=256, temperature=10000.0):
"""Builds 2D sine-cosine position embedding."""
grid_w = torch.arange(int(w), dtype=torch.float32)
grid_h = torch.arange(int(h), dtype=torch.float32)
grid_w, grid_h = torch.meshgrid(grid_w, grid_h, indexing='ij')
@ -140,27 +145,32 @@ class TransformerBlock(nn.Module):
class MLPBlock(nn.Module):
"""Implements a single block of a multi-layer perceptron."""
def __init__(self, embedding_dim, mlp_dim, act=nn.GELU):
"""Initialize the MLPBlock with specified embedding dimension, MLP dimension, and activation function."""
super().__init__()
self.lin1 = nn.Linear(embedding_dim, mlp_dim)
self.lin2 = nn.Linear(mlp_dim, embedding_dim)
self.act = act()
def forward(self, x: torch.Tensor) -> torch.Tensor:
"""Forward pass for the MLPBlock."""
return self.lin2(self.act(self.lin1(x)))
class MLP(nn.Module):
""" Very simple multi-layer perceptron (also called FFN)"""
"""Implements a simple multi-layer perceptron (also called FFN)."""
def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
"""Initialize the MLP with specified input, hidden, output dimensions and number of layers."""
super().__init__()
self.num_layers = num_layers
h = [hidden_dim] * (num_layers - 1)
self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim]))
def forward(self, x):
"""Forward pass for the entire MLP."""
for i, layer in enumerate(self.layers):
x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x)
return x
@ -168,17 +178,22 @@ class MLP(nn.Module):
class LayerNorm2d(nn.Module):
"""
LayerNorm2d module from https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/batch_norm.py
2D Layer Normalization module inspired by Detectron2 and ConvNeXt implementations.
Original implementation at
https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/batch_norm.py
https://github.com/facebookresearch/ConvNeXt/blob/d1fa8f6fef0a165b27399986cc2bdacc92777e40/models/convnext.py#L119
"""
def __init__(self, num_channels, eps=1e-6):
"""Initialize LayerNorm2d with the given parameters."""
super().__init__()
self.weight = nn.Parameter(torch.ones(num_channels))
self.bias = nn.Parameter(torch.zeros(num_channels))
self.eps = eps
def forward(self, x):
"""Perform forward pass for 2D layer normalization."""
u = x.mean(1, keepdim=True)
s = (x - u).pow(2).mean(1, keepdim=True)
x = (x - u) / torch.sqrt(s + self.eps)
@ -187,11 +202,13 @@ class LayerNorm2d(nn.Module):
class MSDeformAttn(nn.Module):
"""
Original Multi-Scale Deformable Attention Module.
Multi-Scale Deformable Attention Module based on Deformable-DETR and PaddleDetection implementations.
https://github.com/fundamentalvision/Deformable-DETR/blob/main/models/ops/modules/ms_deform_attn.py
"""
def __init__(self, d_model=256, n_levels=4, n_heads=8, n_points=4):
"""Initialize MSDeformAttn with the given parameters."""
super().__init__()
if d_model % n_heads != 0:
raise ValueError(f'd_model must be divisible by n_heads, but got {d_model} and {n_heads}')
@ -214,6 +231,7 @@ class MSDeformAttn(nn.Module):
self._reset_parameters()
def _reset_parameters(self):
"""Reset module parameters."""
constant_(self.sampling_offsets.weight.data, 0.)
thetas = torch.arange(self.n_heads, dtype=torch.float32) * (2.0 * math.pi / self.n_heads)
grid_init = torch.stack([thetas.cos(), thetas.sin()], -1)
@ -232,7 +250,10 @@ class MSDeformAttn(nn.Module):
def forward(self, query, refer_bbox, value, value_shapes, value_mask=None):
"""
Perform forward pass for multi-scale deformable attention.
https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/transformers/deformable_transformer.py
Args:
query (torch.Tensor): [bs, query_length, C]
refer_bbox (torch.Tensor): [bs, query_length, n_levels, 2], range in [0, 1], top-left (0,0),
@ -272,24 +293,27 @@ class MSDeformAttn(nn.Module):
class DeformableTransformerDecoderLayer(nn.Module):
"""
Deformable Transformer Decoder Layer inspired by PaddleDetection and Deformable-DETR implementations.
https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/transformers/deformable_transformer.py
https://github.com/fundamentalvision/Deformable-DETR/blob/main/models/deformable_transformer.py
"""
def __init__(self, d_model=256, n_heads=8, d_ffn=1024, dropout=0., act=nn.ReLU(), n_levels=4, n_points=4):
"""Initialize the DeformableTransformerDecoderLayer with the given parameters."""
super().__init__()
# self attention
# Self attention
self.self_attn = nn.MultiheadAttention(d_model, n_heads, dropout=dropout)
self.dropout1 = nn.Dropout(dropout)
self.norm1 = nn.LayerNorm(d_model)
# cross attention
# Cross attention
self.cross_attn = MSDeformAttn(d_model, n_levels, n_heads, n_points)
self.dropout2 = nn.Dropout(dropout)
self.norm2 = nn.LayerNorm(d_model)
# ffn
# FFN
self.linear1 = nn.Linear(d_model, d_ffn)
self.act = act
self.dropout3 = nn.Dropout(dropout)
@ -299,37 +323,44 @@ class DeformableTransformerDecoderLayer(nn.Module):
@staticmethod
def with_pos_embed(tensor, pos):
"""Add positional embeddings to the input tensor, if provided."""
return tensor if pos is None else tensor + pos
def forward_ffn(self, tgt):
"""Perform forward pass through the Feed-Forward Network part of the layer."""
tgt2 = self.linear2(self.dropout3(self.act(self.linear1(tgt))))
tgt = tgt + self.dropout4(tgt2)
return self.norm3(tgt)
def forward(self, embed, refer_bbox, feats, shapes, padding_mask=None, attn_mask=None, query_pos=None):
# self attention
"""Perform the forward pass through the entire decoder layer."""
# Self attention
q = k = self.with_pos_embed(embed, query_pos)
tgt = self.self_attn(q.transpose(0, 1), k.transpose(0, 1), embed.transpose(0, 1),
attn_mask=attn_mask)[0].transpose(0, 1)
embed = embed + self.dropout1(tgt)
embed = self.norm1(embed)
# cross attention
# Cross attention
tgt = self.cross_attn(self.with_pos_embed(embed, query_pos), refer_bbox.unsqueeze(2), feats, shapes,
padding_mask)
embed = embed + self.dropout2(tgt)
embed = self.norm2(embed)
# ffn
# FFN
return self.forward_ffn(embed)
class DeformableTransformerDecoder(nn.Module):
"""
Implementation of Deformable Transformer Decoder based on PaddleDetection.
https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/transformers/deformable_transformer.py
"""
def __init__(self, hidden_dim, decoder_layer, num_layers, eval_idx=-1):
"""Initialize the DeformableTransformerDecoder with the given parameters."""
super().__init__()
self.layers = _get_clones(decoder_layer, num_layers)
self.num_layers = num_layers
@ -347,6 +378,7 @@ class DeformableTransformerDecoder(nn.Module):
pos_mlp,
attn_mask=None,
padding_mask=None):
"""Perform the forward pass through the entire decoder."""
output = embed
dec_bboxes = []
dec_cls = []

@ -1,7 +1,5 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
"""
Module utils
"""
"""Module utils."""
import copy
import math
@ -16,15 +14,17 @@ __all__ = 'multi_scale_deformable_attn_pytorch', 'inverse_sigmoid'
def _get_clones(module, n):
"""Create a list of cloned modules from the given module."""
return nn.ModuleList([copy.deepcopy(module) for _ in range(n)])
def bias_init_with_prob(prior_prob=0.01):
"""initialize conv/fc bias value according to a given probability value."""
"""Initialize conv/fc bias value according to a given probability value."""
return float(-np.log((1 - prior_prob) / prior_prob)) # return bias_init
def linear_init_(module):
"""Initialize the weights and biases of a linear module."""
bound = 1 / math.sqrt(module.weight.shape[0])
uniform_(module.weight, -bound, bound)
if hasattr(module, 'bias') and module.bias is not None:
@ -32,6 +32,7 @@ def linear_init_(module):
def inverse_sigmoid(x, eps=1e-5):
"""Calculate the inverse sigmoid function for a tensor."""
x = x.clamp(min=0, max=1)
x1 = x.clamp(min=eps)
x2 = (1 - x).clamp(min=eps)
@ -43,6 +44,7 @@ def multi_scale_deformable_attn_pytorch(value: torch.Tensor, value_spatial_shape
attention_weights: torch.Tensor) -> torch.Tensor:
"""
Multi-scale deformable attention.
https://github.com/IDEA-Research/detrex/blob/main/detrex/layers/multi_scale_deform_attn.py
"""

@ -25,14 +25,11 @@ except ImportError:
class BaseModel(nn.Module):
"""
The BaseModel class serves as a base class for all the models in the Ultralytics YOLO family.
"""
"""The BaseModel class serves as a base class for all the models in the Ultralytics YOLO family."""
def forward(self, x, *args, **kwargs):
"""
Forward pass of the model on a single scale.
Wrapper for `_forward_once` method.
Forward pass of the model on a single scale. Wrapper for `_forward_once` method.
Args:
x (torch.Tensor | dict): The input image tensor or a dict including image tensor and gt labels.
@ -93,8 +90,8 @@ class BaseModel(nn.Module):
def _profile_one_layer(self, m, x, dt):
"""
Profile the computation time and FLOPs of a single layer of the model on a given input.
Appends the results to the provided list.
Profile the computation time and FLOPs of a single layer of the model on a given input. Appends the results to
the provided list.
Args:
m (nn.Module): The layer to be profiled.
@ -158,7 +155,7 @@ class BaseModel(nn.Module):
def info(self, detailed=False, verbose=True, imgsz=640):
"""
Prints model information
Prints model information.
Args:
detailed (bool): if True, prints out detailed information about the model. Defaults to False
@ -175,7 +172,7 @@ class BaseModel(nn.Module):
fn (function): the function to apply to the model
Returns:
A model that is a Detect() object.
(BaseModel): An updated BaseModel object.
"""
self = super()._apply(fn)
m = self.model[-1] # Detect()
@ -202,7 +199,7 @@ class BaseModel(nn.Module):
def loss(self, batch, preds=None):
"""
Compute loss
Compute loss.
Args:
batch (dict): Batch to compute loss on
@ -215,6 +212,7 @@ class BaseModel(nn.Module):
return self.criterion(preds, batch)
def init_criterion(self):
"""Initialize the loss criterion for the BaseModel."""
raise NotImplementedError('compute_loss() needs to be implemented by task heads')
@ -222,6 +220,7 @@ class DetectionModel(BaseModel):
"""YOLOv8 detection model."""
def __init__(self, cfg='yolov8n.yaml', ch=3, nc=None, verbose=True): # model, input channels, number of classes
"""Initialize the YOLOv8 detection model with the given config and parameters."""
super().__init__()
self.yaml = cfg if isinstance(cfg, dict) else yaml_model_load(cfg) # cfg dict
@ -289,6 +288,7 @@ class DetectionModel(BaseModel):
return y
def init_criterion(self):
"""Initialize the loss criterion for the DetectionModel."""
return v8DetectionLoss(self)
@ -300,6 +300,7 @@ class SegmentationModel(DetectionModel):
super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose)
def init_criterion(self):
"""Initialize the loss criterion for the SegmentationModel."""
return v8SegmentationLoss(self)
@ -316,6 +317,7 @@ class PoseModel(DetectionModel):
super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose)
def init_criterion(self):
"""Initialize the loss criterion for the PoseModel."""
return v8PoseLoss(self)
@ -365,22 +367,59 @@ class ClassificationModel(BaseModel):
m[i] = nn.Conv2d(m[i].in_channels, nc, m[i].kernel_size, m[i].stride, bias=m[i].bias is not None)
def init_criterion(self):
"""Compute the classification loss between predictions and true labels."""
"""Initialize the loss criterion for the ClassificationModel."""
return v8ClassificationLoss()
class RTDETRDetectionModel(DetectionModel):
"""
RTDETR (Real-time DEtection and Tracking using Transformers) Detection Model class.
This class is responsible for constructing the RTDETR architecture, defining loss functions, and
facilitating both the training and inference processes. RTDETR is an object detection and tracking model
that extends from the DetectionModel base class.
Attributes:
cfg (str): The configuration file path or preset string. Default is 'rtdetr-l.yaml'.
ch (int): Number of input channels. Default is 3 (RGB).
nc (int, optional): Number of classes for object detection. Default is None.
verbose (bool): Specifies if summary statistics are shown during initialization. Default is True.
Methods:
init_criterion: Initializes the criterion used for loss calculation.
loss: Computes and returns the loss during training.
predict: Performs a forward pass through the network and returns the output.
"""
def __init__(self, cfg='rtdetr-l.yaml', ch=3, nc=None, verbose=True):
"""
Initialize the RTDETRDetectionModel.
Args:
cfg (str): Configuration file name or path.
ch (int): Number of input channels.
nc (int, optional): Number of classes. Defaults to None.
verbose (bool, optional): Print additional information during initialization. Defaults to True.
"""
super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose)
def init_criterion(self):
"""Compute the classification loss between predictions and true labels."""
"""Initialize the loss criterion for the RTDETRDetectionModel."""
from ultralytics.models.utils.loss import RTDETRDetectionLoss
return RTDETRDetectionLoss(nc=self.nc, use_vfl=True)
def loss(self, batch, preds=None):
"""
Compute the loss for the given batch of data.
Args:
batch (dict): Dictionary containing image and label data.
preds (torch.Tensor, optional): Precomputed model predictions. Defaults to None.
Returns:
tuple: A tuple containing the total loss and main three losses in a tensor.
"""
if not hasattr(self, 'criterion'):
self.criterion = self.init_criterion()
@ -417,16 +456,17 @@ class RTDETRDetectionModel(DetectionModel):
def predict(self, x, profile=False, visualize=False, batch=None, augment=False):
"""
Perform a forward pass through the network.
Perform a forward pass through the model.
Args:
x (torch.Tensor): The input tensor to the model
profile (bool): Print the computation time of each layer if True, defaults to False.
visualize (bool): Save the feature maps of the model if True, defaults to False
batch (dict): A dict including gt boxes and labels from dataloader.
x (torch.Tensor): The input tensor.
profile (bool, optional): If True, profile the computation time for each layer. Defaults to False.
visualize (bool, optional): If True, save feature maps for visualization. Defaults to False.
batch (dict, optional): Ground truth data for evaluation. Defaults to None.
augment (bool, optional): If True, perform data augmentation during inference. Defaults to False.
Returns:
(torch.Tensor): The last output of the model.
torch.Tensor: Model's output tensor.
"""
y, dt = [], [] # outputs
for m in self.model[:-1]: # except the head part
@ -708,9 +748,9 @@ def yaml_model_load(path):
def guess_model_scale(model_path):
"""
Takes a path to a YOLO model's YAML file as input and extracts the size character of the model's scale.
The function uses regular expression matching to find the pattern of the model scale in the YAML file name,
which is denoted by n, s, m, l, or x. The function returns the size character of the model scale as a string.
Takes a path to a YOLO model's YAML file as input and extracts the size character of the model's scale. The function
uses regular expression matching to find the pattern of the model scale in the YAML file name, which is denoted by
n, s, m, l, or x. The function returns the size character of the model scale as a string.
Args:
model_path (str | Path): The path to the YOLO model's YAML file.

@ -12,6 +12,33 @@ from .utils.kalman_filter import KalmanFilterXYWH
class BOTrack(STrack):
"""
An extended version of the STrack class for YOLOv8, adding object tracking features.
Attributes:
shared_kalman (KalmanFilterXYWH): A shared Kalman filter for all instances of BOTrack.
smooth_feat (np.ndarray): Smoothed feature vector.
curr_feat (np.ndarray): Current feature vector.
features (deque): A deque to store feature vectors with a maximum length defined by `feat_history`.
alpha (float): Smoothing factor for the exponential moving average of features.
mean (np.ndarray): The mean state of the Kalman filter.
covariance (np.ndarray): The covariance matrix of the Kalman filter.
Methods:
update_features(feat): Update features vector and smooth it using exponential moving average.
predict(): Predicts the mean and covariance using Kalman filter.
re_activate(new_track, frame_id, new_id): Reactivates a track with updated features and optionally new ID.
update(new_track, frame_id): Update the YOLOv8 instance with new track and frame ID.
tlwh: Property that gets the current position in tlwh format `(top left x, top left y, width, height)`.
multi_predict(stracks): Predicts the mean and covariance of multiple object tracks using shared Kalman filter.
convert_coords(tlwh): Converts tlwh bounding box coordinates to xywh format.
tlwh_to_xywh(tlwh): Convert bounding box to xywh format `(center x, center y, width, height)`.
Usage:
bo_track = BOTrack(tlwh, score, cls, feat)
bo_track.predict()
bo_track.update(new_track, frame_id)
"""
shared_kalman = KalmanFilterXYWH()
def __init__(self, tlwh, score, cls, feat=None, feat_history=50):
@ -59,9 +86,7 @@ class BOTrack(STrack):
@property
def tlwh(self):
"""Get current position in bounding box format `(top left x, top left y,
width, height)`.
"""
"""Get current position in bounding box format `(top left x, top left y, width, height)`."""
if self.mean is None:
return self._tlwh.copy()
ret = self.mean[:4].copy()
@ -90,15 +115,37 @@ class BOTrack(STrack):
@staticmethod
def tlwh_to_xywh(tlwh):
"""Convert bounding box to format `(center x, center y, width,
height)`.
"""
"""Convert bounding box to format `(center x, center y, width, height)`."""
ret = np.asarray(tlwh).copy()
ret[:2] += ret[2:] / 2
return ret
class BOTSORT(BYTETracker):
"""
An extended version of the BYTETracker class for YOLOv8, designed for object tracking with ReID and GMC algorithm.
Attributes:
proximity_thresh (float): Threshold for spatial proximity (IoU) between tracks and detections.
appearance_thresh (float): Threshold for appearance similarity (ReID embeddings) between tracks and detections.
encoder (object): Object to handle ReID embeddings, set to None if ReID is not enabled.
gmc (GMC): An instance of the GMC algorithm for data association.
args (object): Parsed command-line arguments containing tracking parameters.
Methods:
get_kalmanfilter(): Returns an instance of KalmanFilterXYWH for object tracking.
init_track(dets, scores, cls, img): Initialize track with detections, scores, and classes.
get_dists(tracks, detections): Get distances between tracks and detections using IoU and (optionally) ReID.
multi_predict(tracks): Predict and track multiple objects with YOLOv8 model.
Usage:
bot_sort = BOTSORT(args, frame_rate)
bot_sort.init_track(dets, scores, cls, img)
bot_sort.multi_predict(tracks)
Note:
The class is designed to work with the YOLOv8 object detection model and supports ReID only if enabled via args.
"""
def __init__(self, args, frame_rate=30):
"""Initialize YOLOv8 object with ReID module and GMC algorithm."""

@ -8,10 +8,43 @@ from .utils.kalman_filter import KalmanFilterXYAH
class STrack(BaseTrack):
"""
Single object tracking representation that uses Kalman filtering for state estimation.
This class is responsible for storing all the information regarding individual tracklets and performs state updates
and predictions based on Kalman filter.
Attributes:
shared_kalman (KalmanFilterXYAH): Shared Kalman filter that is used across all STrack instances for prediction.
_tlwh (np.ndarray): Private attribute to store top-left corner coordinates and width and height of bounding box.
kalman_filter (KalmanFilterXYAH): Instance of Kalman filter used for this particular object track.
mean (np.ndarray): Mean state estimate vector.
covariance (np.ndarray): Covariance of state estimate.
is_activated (bool): Boolean flag indicating if the track has been activated.
score (float): Confidence score of the track.
tracklet_len (int): Length of the tracklet.
cls (any): Class label for the object.
idx (int): Index or identifier for the object.
frame_id (int): Current frame ID.
start_frame (int): Frame where the object was first detected.
Methods:
predict(): Predict the next state of the object using Kalman filter.
multi_predict(stracks): Predict the next states for multiple tracks.
multi_gmc(stracks, H): Update multiple track states using a homography matrix.
activate(kalman_filter, frame_id): Activate a new tracklet.
re_activate(new_track, frame_id, new_id): Reactivate a previously lost tracklet.
update(new_track, frame_id): Update the state of a matched track.
convert_coords(tlwh): Convert bounding box to x-y-angle-height format.
tlwh_to_xyah(tlwh): Convert tlwh bounding box to xyah format.
tlbr_to_tlwh(tlbr): Convert tlbr bounding box to tlwh format.
tlwh_to_tlbr(tlwh): Convert tlwh bounding box to tlbr format.
"""
shared_kalman = KalmanFilterXYAH()
def __init__(self, tlwh, score, cls):
"""wait activate."""
"""Initialize new STrack instance."""
self._tlwh = np.asarray(self.tlbr_to_tlwh(tlwh[:-1]), dtype=np.float32)
self.kalman_filter = None
self.mean, self.covariance = None, None
@ -92,10 +125,11 @@ class STrack(BaseTrack):
def update(self, new_track, frame_id):
"""
Update a matched track
:type new_track: STrack
:type frame_id: int
:return:
Update the state of a matched track.
Args:
new_track (STrack): The new track containing updated information.
frame_id (int): The ID of the current frame.
"""
self.frame_id = frame_id
self.tracklet_len += 1
@ -116,9 +150,7 @@ class STrack(BaseTrack):
@property
def tlwh(self):
"""Get current position in bounding box format `(top left x, top left y,
width, height)`.
"""
"""Get current position in bounding box format (top left x, top left y, width, height)."""
if self.mean is None:
return self._tlwh.copy()
ret = self.mean[:4].copy()
@ -128,17 +160,15 @@ class STrack(BaseTrack):
@property
def tlbr(self):
"""Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
`(top left, bottom right)`.
"""
"""Convert bounding box to format (min x, min y, max x, max y), i.e., (top left, bottom right)."""
ret = self.tlwh.copy()
ret[2:] += ret[:2]
return ret
@staticmethod
def tlwh_to_xyah(tlwh):
"""Convert bounding box to format `(center x, center y, aspect ratio,
height)`, where the aspect ratio is `width / height`.
"""Convert bounding box to format (center x, center y, aspect ratio, height), where the aspect ratio is width /
height.
"""
ret = np.asarray(tlwh).copy()
ret[:2] += ret[2:] / 2
@ -165,6 +195,33 @@ class STrack(BaseTrack):
class BYTETracker:
"""
BYTETracker: A tracking algorithm built on top of YOLOv8 for object detection and tracking.
The class is responsible for initializing, updating, and managing the tracks for detected objects in a video
sequence. It maintains the state of tracked, lost, and removed tracks over frames, utilizes Kalman filtering for
predicting the new object locations, and performs data association.
Attributes:
tracked_stracks (list[STrack]): List of successfully activated tracks.
lost_stracks (list[STrack]): List of lost tracks.
removed_stracks (list[STrack]): List of removed tracks.
frame_id (int): The current frame ID.
args (namespace): Command-line arguments.
max_time_lost (int): The maximum frames for a track to be considered as 'lost'.
kalman_filter (object): Kalman Filter object.
Methods:
update(results, img=None): Updates object tracker with new detections.
get_kalmanfilter(): Returns a Kalman filter object for tracking bounding boxes.
init_track(dets, scores, cls, img=None): Initialize object tracking with detections.
get_dists(tracks, detections): Calculates the distance between tracks and detections.
multi_predict(tracks): Predicts the location of tracks.
reset_id(): Resets the ID counter of STrack.
joint_stracks(tlista, tlistb): Combines two lists of stracks.
sub_stracks(tlista, tlistb): Filters out the stracks present in the second list from the first list.
remove_duplicate_stracks(stracksa, stracksb): Removes duplicate stracks based on IOU.
"""
def __init__(self, args, frame_rate=30):
"""Initialize a YOLOv8 object to track objects with given arguments and frame rate."""
@ -234,8 +291,7 @@ class BYTETracker:
else:
track.re_activate(det, self.frame_id, new_id=False)
refind_stracks.append(track)
# Step 3: Second association, with low score detection boxes
# association the untrack to the low score detections
# Step 3: Second association, with low score detection boxes association the untrack to the low score detections
detections_second = self.init_track(dets_second, scores_second, cls_second, img)
r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked]
# TODO

@ -60,7 +60,6 @@ def register_tracker(model, persist):
Args:
model (object): The model object to register tracking callbacks for.
persist (bool): Whether to persist the trackers if they already exist.
"""
model.add_callback('on_predict_start', partial(on_predict_start, persist=persist))
model.add_callback('on_predict_postprocess_end', on_predict_postprocess_end)

@ -9,6 +9,29 @@ from ultralytics.utils import LOGGER
class GMC:
"""
Generalized Motion Compensation (GMC) class for tracking and object detection in video frames.
This class provides methods for tracking and detecting objects based on several tracking algorithms including ORB,
SIFT, ECC, and Sparse Optical Flow. It also supports downscaling of frames for computational efficiency.
Attributes:
method (str): The method used for tracking. Options include 'orb', 'sift', 'ecc', 'sparseOptFlow', 'none'.
downscale (int): Factor by which to downscale the frames for processing.
prevFrame (np.array): Stores the previous frame for tracking.
prevKeyPoints (list): Stores the keypoints from the previous frame.
prevDescriptors (np.array): Stores the descriptors from the previous frame.
initializedFirstFrame (bool): Flag to indicate if the first frame has been processed.
Methods:
__init__(self, method='sparseOptFlow', downscale=2): Initializes a GMC object with the specified method
and downscale factor.
apply(self, raw_frame, detections=None): Applies the chosen method to a raw frame and optionally uses
provided detections.
applyEcc(self, raw_frame, detections=None): Applies the ECC algorithm to a raw frame.
applyFeatures(self, raw_frame, detections=None): Applies feature-based methods like ORB or SIFT to a raw frame.
applySparseOptFlow(self, raw_frame, detections=None): Applies the Sparse Optical Flow method to a raw frame.
"""
def __init__(self, method='sparseOptFlow', downscale=2):
"""Initialize a video tracker with specified parameters."""

@ -8,8 +8,8 @@ class KalmanFilterXYAH:
"""
For bytetrack. A simple Kalman filter for tracking bounding boxes in image space.
The 8-dimensional state space (x, y, a, h, vx, vy, va, vh) contains the bounding box center position (x, y),
aspect ratio a, height h, and their respective velocities.
The 8-dimensional state space (x, y, a, h, vx, vy, va, vh) contains the bounding box center position (x, y), aspect
ratio a, height h, and their respective velocities.
Object motion follows a constant velocity model. The bounding box location (x, y, a, h) is taken as direct
observation of the state space (linear observation model).
@ -182,8 +182,8 @@ class KalmanFilterXYAH:
def gating_distance(self, mean, covariance, measurements, only_position=False, metric='maha'):
"""
Compute gating distance between state distribution and measurements. A suitable distance threshold can be
obtained from `chi2inv95`. If `only_position` is False, the chi-square distribution has 4 degrees of
freedom, otherwise 2.
obtained from `chi2inv95`. If `only_position` is False, the chi-square distribution has 4 degrees of freedom,
otherwise 2.
Parameters
----------
@ -223,8 +223,8 @@ class KalmanFilterXYWH(KalmanFilterXYAH):
"""
For BoT-SORT. A simple Kalman filter for tracking bounding boxes in image space.
The 8-dimensional state space (x, y, w, h, vx, vy, vw, vh) contains the bounding box center position (x, y),
width w, height h, and their respective velocities.
The 8-dimensional state space (x, y, w, h, vx, vy, vw, vh) contains the bounding box center position (x, y), width
w, height h, and their respective velocities.
Object motion follows a constant velocity model. The bounding box location (x, y, w, h) is taken as direct
observation of the state space (linear observation model).

@ -117,6 +117,7 @@ class TQDM(tqdm_original):
"""
def __init__(self, *args, **kwargs):
"""Initialize custom Ultralytics tqdm class with different default arguments."""
# Set new default values (these can still be overridden when calling TQDM)
kwargs['disable'] = not VERBOSE or kwargs.get('disable', False) # logical 'and' with default value if passed
kwargs.setdefault('bar_format', TQDM_BAR_FORMAT) # override default value if passed
@ -124,8 +125,7 @@ class TQDM(tqdm_original):
class SimpleClass:
"""
Ultralytics SimpleClass is a base class providing helpful string representation, error reporting, and attribute
"""Ultralytics SimpleClass is a base class providing helpful string representation, error reporting, and attribute
access methods for easier debugging and usage.
"""
@ -154,8 +154,7 @@ class SimpleClass:
class IterableSimpleNamespace(SimpleNamespace):
"""
Ultralytics IterableSimpleNamespace is an extension class of SimpleNamespace that adds iterable functionality and
"""Ultralytics IterableSimpleNamespace is an extension class of SimpleNamespace that adds iterable functionality and
enables usage with dict() and for loops.
"""
@ -256,8 +255,8 @@ class EmojiFilter(logging.Filter):
"""
A custom logging filter class for removing emojis in log messages.
This filter is particularly useful for ensuring compatibility with Windows terminals
that may not support the display of emojis in log messages.
This filter is particularly useful for ensuring compatibility with Windows terminals that may not support the
display of emojis in log messages.
"""
def filter(self, record):
@ -275,9 +274,9 @@ if WINDOWS: # emoji-safe logging
class ThreadingLocked:
"""
A decorator class for ensuring thread-safe execution of a function or method.
This class can be used as a decorator to make sure that if the decorated function
is called from multiple threads, only one thread at a time will be able to execute the function.
A decorator class for ensuring thread-safe execution of a function or method. This class can be used as a decorator
to make sure that if the decorated function is called from multiple threads, only one thread at a time will be able
to execute the function.
Attributes:
lock (threading.Lock): A lock object used to manage access to the decorated function.
@ -294,13 +293,16 @@ class ThreadingLocked:
"""
def __init__(self):
"""Initializes the decorator class for thread-safe execution of a function or method."""
self.lock = threading.Lock()
def __call__(self, f):
"""Run thread-safe execution of function or method."""
from functools import wraps
@wraps(f)
def decorated(*args, **kwargs):
"""Applies thread-safety to the decorated function or method."""
with self.lock:
return f(*args, **kwargs)
@ -424,8 +426,7 @@ def is_kaggle():
def is_jupyter():
"""
Check if the current script is running inside a Jupyter Notebook.
Verified on Colab, Jupyterlab, Kaggle, Paperspace.
Check if the current script is running inside a Jupyter Notebook. Verified on Colab, Jupyterlab, Kaggle, Paperspace.
Returns:
(bool): True if running inside a Jupyter Notebook, False otherwise.
@ -529,8 +530,8 @@ def is_github_actions_ci() -> bool:
def is_git_dir():
"""
Determines whether the current file is part of a git repository.
If the current file is not part of a git repository, returns None.
Determines whether the current file is part of a git repository. If the current file is not part of a git
repository, returns None.
Returns:
(bool): True if current file is part of a git repository.
@ -540,8 +541,8 @@ def is_git_dir():
def get_git_dir():
"""
Determines whether the current file is part of a git repository and if so, returns the repository root directory.
If the current file is not part of a git repository, returns None.
Determines whether the current file is part of a git repository and if so, returns the repository root directory. If
the current file is not part of a git repository, returns None.
Returns:
(Path | None): Git root directory if found or None if not found.
@ -578,7 +579,8 @@ def get_git_branch():
def get_default_args(func):
"""Returns a dictionary of default arguments for a function.
"""
Returns a dictionary of default arguments for a function.
Args:
func (callable): The function to inspect.
@ -710,7 +712,11 @@ def remove_colorstr(input_string):
class TryExcept(contextlib.ContextDecorator):
"""YOLOv8 TryExcept class. Usage: @TryExcept() decorator or 'with TryExcept():' context manager."""
"""
YOLOv8 TryExcept class.
Use as @TryExcept() decorator or 'with TryExcept():' context manager.
"""
def __init__(self, msg='', verbose=True):
"""Initialize TryExcept class with optional message and verbosity settings."""
@ -729,7 +735,11 @@ class TryExcept(contextlib.ContextDecorator):
def threaded(func):
"""Multi-threads a target function and returns thread. Usage: @threaded decorator."""
"""
Multi-threads a target function and returns thread.
Use as @threaded decorator.
"""
def wrapper(*args, **kwargs):
"""Multi-threads a given function and returns the thread."""
@ -824,6 +834,9 @@ class SettingsManager(dict):
"""
def __init__(self, file=SETTINGS_YAML, version='0.0.4'):
"""Initialize the SettingsManager with default settings, load and validate current settings from the YAML
file.
"""
import copy
import hashlib

@ -1,7 +1,5 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
"""
Functions for estimating the best YOLO batch size to use a fraction of the available CUDA memory in PyTorch.
"""
"""Functions for estimating the best YOLO batch size to use a fraction of the available CUDA memory in PyTorch."""
from copy import deepcopy

@ -1,6 +1,6 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
"""
Benchmark a YOLO model formats for speed and accuracy
Benchmark a YOLO model formats for speed and accuracy.
Usage:
from ultralytics.utils.benchmarks import ProfileModels, benchmark
@ -194,6 +194,7 @@ class ProfileModels:
self.device = device or torch.device(0 if torch.cuda.is_available() else 'cpu')
def profile(self):
"""Logs the benchmarking results of a model, checks metrics against floor and returns the results."""
files = self.get_files()
if not files:
@ -235,6 +236,7 @@ class ProfileModels:
return output
def get_files(self):
"""Returns a list of paths for all relevant model files given by the user."""
files = []
for path in self.paths:
path = Path(path)
@ -250,10 +252,14 @@ class ProfileModels:
return [Path(file) for file in sorted(files)]
def get_onnx_model_info(self, onnx_file: str):
"""Retrieves the information including number of layers, parameters, gradients and FLOPs for an ONNX model
file.
"""
# return (num_layers, num_params, num_gradients, num_flops)
return 0.0, 0.0, 0.0, 0.0
def iterative_sigma_clipping(self, data, sigma=2, max_iters=3):
"""Applies an iterative sigma clipping algorithm to the given data times number of iterations."""
data = np.array(data)
for _ in range(max_iters):
mean, std = np.mean(data), np.std(data)
@ -264,6 +270,7 @@ class ProfileModels:
return data
def profile_tensorrt_model(self, engine_file: str, eps: float = 1e-3):
"""Profiles the TensorRT model, measuring average run time and standard deviation among runs."""
if not self.trt or not Path(engine_file).is_file():
return 0.0, 0.0
@ -292,6 +299,9 @@ class ProfileModels:
return np.mean(run_times), np.std(run_times)
def profile_onnx_model(self, onnx_file: str, eps: float = 1e-3):
"""Profiles an ONNX model by executing it multiple times and returns the mean and standard deviation of run
times.
"""
check_requirements('onnxruntime')
import onnxruntime as ort
@ -344,10 +354,12 @@ class ProfileModels:
return np.mean(run_times), np.std(run_times)
def generate_table_row(self, model_name, t_onnx, t_engine, model_info):
"""Generates a formatted string for a table row that includes model performance and metric details."""
layers, params, gradients, flops = model_info
return f'| {model_name:18s} | {self.imgsz} | - | {t_onnx[0]:.2f} ± {t_onnx[1]:.2f} ms | {t_engine[0]:.2f} ± {t_engine[1]:.2f} ms | {params / 1e6:.1f} | {flops:.1f} |'
def generate_results_dict(self, model_name, t_onnx, t_engine, model_info):
"""Generates a dictionary of model details including name, parameters, GFLOPS and speed metrics."""
layers, params, gradients, flops = model_info
return {
'model/name': model_name,
@ -357,6 +369,7 @@ class ProfileModels:
'model/speed_TensorRT(ms)': round(t_engine[0], 3)}
def print_table(self, table_rows):
"""Formats and prints a comparison table for different models with given statistics and performance data."""
gpu = torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'GPU'
header = f'| Model | size<br><sup>(pixels) | mAP<sup>val<br>50-95 | Speed<br><sup>CPU ONNX<br>(ms) | Speed<br><sup>{gpu} TensorRT<br>(ms) | params<br><sup>(M) | FLOPs<br><sup>(B) |'
separator = '|-------------|---------------------|--------------------|------------------------------|-----------------------------------|------------------|-----------------|'

@ -1,7 +1,5 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
"""
Base callbacks
"""
"""Base callbacks."""
from collections import defaultdict
from copy import deepcopy

@ -26,31 +26,38 @@ except (ImportError, AssertionError):
def _get_comet_mode():
"""Returns the mode of comet set in the environment variables, defaults to 'online' if not set."""
return os.getenv('COMET_MODE', 'online')
def _get_comet_model_name():
"""Returns the model name for Comet from the environment variable 'COMET_MODEL_NAME' or defaults to 'YOLOv8'."""
return os.getenv('COMET_MODEL_NAME', 'YOLOv8')
def _get_eval_batch_logging_interval():
"""Get the evaluation batch logging interval from environment variable or use default value 1."""
return int(os.getenv('COMET_EVAL_BATCH_LOGGING_INTERVAL', 1))
def _get_max_image_predictions_to_log():
"""Get the maximum number of image predictions to log from the environment variables."""
return int(os.getenv('COMET_MAX_IMAGE_PREDICTIONS', 100))
def _scale_confidence_score(score):
"""Scales the given confidence score by a factor specified in an environment variable."""
scale = float(os.getenv('COMET_MAX_CONFIDENCE_SCORE', 100.0))
return score * scale
def _should_log_confusion_matrix():
"""Determines if the confusion matrix should be logged based on the environment variable settings."""
return os.getenv('COMET_EVAL_LOG_CONFUSION_MATRIX', 'false').lower() == 'true'
def _should_log_image_predictions():
"""Determines whether to log image predictions based on a specified environment variable."""
return os.getenv('COMET_EVAL_LOG_IMAGE_PREDICTIONS', 'true').lower() == 'true'
@ -104,9 +111,10 @@ def _fetch_trainer_metadata(trainer):
def _scale_bounding_box_to_original_image_shape(box, resized_image_shape, original_image_shape, ratio_pad):
"""YOLOv8 resizes images during training and the label values
are normalized based on this resized shape. This function rescales the
bounding box labels to the original image shape.
"""
YOLOv8 resizes images during training and the label values are normalized based on this resized shape.
This function rescales the bounding box labels to the original image shape.
"""
resized_image_height, resized_image_width = resized_image_shape

@ -25,6 +25,7 @@ except (ImportError, AssertionError, TypeError):
def _log_images(path, prefix=''):
"""Logs images at specified path with an optional prefix using DVCLive."""
if live:
name = path.name
@ -38,6 +39,7 @@ def _log_images(path, prefix=''):
def _log_plots(plots, prefix=''):
"""Logs plot images for training progress if they have not been previously processed."""
for name, params in plots.items():
timestamp = params['timestamp']
if _processed_plots.get(name) != timestamp:
@ -46,6 +48,7 @@ def _log_plots(plots, prefix=''):
def _log_confusion_matrix(validator):
"""Logs the confusion matrix for the given validator using DVCLive."""
targets = []
preds = []
matrix = validator.confusion_matrix.matrix
@ -62,6 +65,7 @@ def _log_confusion_matrix(validator):
def on_pretrain_routine_start(trainer):
"""Initializes DVCLive logger for training metadata during pre-training routine."""
try:
global live
live = dvclive.Live(save_dvc_exp=True, cache_images=True)
@ -71,20 +75,24 @@ def on_pretrain_routine_start(trainer):
def on_pretrain_routine_end(trainer):
"""Logs plots related to the training process at the end of the pretraining routine."""
_log_plots(trainer.plots, 'train')
def on_train_start(trainer):
"""Logs the training parameters if DVCLive logging is active."""
if live:
live.log_params(trainer.args)
def on_train_epoch_start(trainer):
"""Sets the global variable _training_epoch value to True at the start of training each epoch."""
global _training_epoch
_training_epoch = True
def on_fit_epoch_end(trainer):
"""Logs training metrics and model info, and advances to next step on the end of each fit epoch."""
global _training_epoch
if live and _training_epoch:
all_metrics = {**trainer.label_loss_items(trainer.tloss, prefix='train'), **trainer.metrics, **trainer.lr}
@ -104,6 +112,7 @@ def on_fit_epoch_end(trainer):
def on_train_end(trainer):
"""Logs the best metrics, plots, and confusion matrix at the end of training if DVCLive is active."""
if live:
# At the end log the best metrics. It runs validator on the best model internally.
all_metrics = {**trainer.label_loss_items(trainer.tloss, prefix='train'), **trainer.metrics, **trainer.lr}

@ -31,14 +31,13 @@ def _log_images(imgs_dict, group=''):
def _log_plot(title, plot_path):
"""Log plots to the NeptuneAI experiment logger."""
"""
Log image as plot in the plot section of NeptuneAI
Log plots to the NeptuneAI experiment logger.
arguments:
title (str) Title of the plot
plot_path (PosixPath or str) Path to the saved image file
"""
Args:
title (str): Title of the plot.
plot_path (PosixPath | str): Path to the saved image file.
"""
import matplotlib.image as mpimg
import matplotlib.pyplot as plt

@ -17,6 +17,7 @@ except (ImportError, AssertionError):
def _log_plots(plots, step):
"""Logs plots from the input dictionary if they haven't been logged already at the specified step."""
for name, params in plots.items():
timestamp = params['timestamp']
if _processed_plots.get(name) != timestamp:

@ -64,8 +64,8 @@ def parse_requirements(file_path=ROOT.parent / 'requirements.txt', package=''):
def parse_version(version='0.0.0') -> tuple:
"""
Convert a version string to a tuple of integers, ignoring any extra non-numeric string attached to the version.
This function replaces deprecated 'pkg_resources.parse_version(v)'
Convert a version string to a tuple of integers, ignoring any extra non-numeric string attached to the version. This
function replaces deprecated 'pkg_resources.parse_version(v)'.
Args:
version (str): Version string, i.e. '2.0.1+cpu'
@ -372,8 +372,10 @@ def check_torchvision():
Checks the installed versions of PyTorch and Torchvision to ensure they're compatible.
This function checks the installed versions of PyTorch and Torchvision, and warns if they're incompatible according
to the provided compatibility table based on https://github.com/pytorch/vision#installation. The
compatibility table is a dictionary where the keys are PyTorch versions and the values are lists of compatible
to the provided compatibility table based on:
https://github.com/pytorch/vision#installation.
The compatibility table is a dictionary where the keys are PyTorch versions and the values are lists of compatible
Torchvision versions.
"""
@ -527,9 +529,9 @@ def collect_system_info():
def check_amp(model):
"""
This function checks the PyTorch Automatic Mixed Precision (AMP) functionality of a YOLOv8 model.
If the checks fail, it means there are anomalies with AMP on the system that may cause NaN losses or zero-mAP
results, so AMP will be disabled during training.
This function checks the PyTorch Automatic Mixed Precision (AMP) functionality of a YOLOv8 model. If the checks
fail, it means there are anomalies with AMP on the system that may cause NaN losses or zero-mAP results, so AMP will
be disabled during training.
Args:
model (nn.Module): A YOLOv8 model instance.
@ -606,7 +608,8 @@ def print_args(args: Optional[dict] = None, show_file=True, show_func=False):
def cuda_device_count() -> int:
"""Get the number of NVIDIA GPUs available in the environment.
"""
Get the number of NVIDIA GPUs available in the environment.
Returns:
(int): The number of NVIDIA GPUs available.
@ -626,7 +629,8 @@ def cuda_device_count() -> int:
def cuda_is_available() -> bool:
"""Check if CUDA is available in the environment.
"""
Check if CUDA is available in the environment.
Returns:
(bool): True if one or more NVIDIA GPUs are available, False otherwise.

@ -13,7 +13,8 @@ from .torch_utils import TORCH_1_9
def find_free_network_port() -> int:
"""Finds a free port on localhost.
"""
Finds a free port on localhost.
It is useful in single-node training when we don't want to connect to a real main node but have to set the
`MASTER_PORT` environment variable.

@ -69,8 +69,8 @@ def delete_dsstore(path, files_to_delete=('.DS_Store', '__MACOSX')):
def zip_directory(directory, compress=True, exclude=('.DS_Store', '__MACOSX'), progress=True):
"""
Zips the contents of a directory, excluding files containing strings in the exclude list.
The resulting zip file is named after the directory and placed alongside it.
Zips the contents of a directory, excluding files containing strings in the exclude list. The resulting zip file is
named after the directory and placed alongside it.
Args:
directory (str | Path): The path to the directory to be zipped.
@ -341,7 +341,11 @@ def get_github_assets(repo='ultralytics/assets', version='latest', retry=False):
def attempt_download_asset(file, repo='ultralytics/assets', release='v0.0.0'):
"""Attempt file download from GitHub release assets if not found locally. release = 'latest', 'v6.2', etc."""
"""
Attempt file download from GitHub release assets if not found locally.
release = 'latest', 'v6.2', etc.
"""
from ultralytics.utils import SETTINGS # scoped for circular import
# YOLOv3/5u updates

@ -30,9 +30,9 @@ class WorkingDirectory(contextlib.ContextDecorator):
@contextmanager
def spaces_in_path(path):
"""
Context manager to handle paths with spaces in their names.
If a path contains spaces, it replaces them with underscores, copies the file/directory to the new path,
executes the context code block, then copies the file/directory back to its original location.
Context manager to handle paths with spaces in their names. If a path contains spaces, it replaces them with
underscores, copies the file/directory to the new path, executes the context code block, then copies the
file/directory back to its original location.
Args:
path (str | Path): The original path.

@ -32,9 +32,14 @@ __all__ = 'Bboxes', # tuple or list
class Bboxes:
"""Bounding Boxes class. Only numpy variables are supported."""
"""
Bounding Boxes class.
Only numpy variables are supported.
"""
def __init__(self, bboxes, format='xyxy') -> None:
"""Initializes the Bboxes class with bounding box data in a specified format."""
assert format in _formats, f'Invalid bounding box format: {format}, format must be one of {_formats}'
bboxes = bboxes[None, :] if bboxes.ndim == 1 else bboxes
assert bboxes.ndim == 2
@ -194,7 +199,7 @@ class Instances:
return self._bboxes.areas()
def scale(self, scale_w, scale_h, bbox_only=False):
"""this might be similar with denormalize func but without normalized sign."""
"""This might be similar with denormalize func but without normalized sign."""
self._bboxes.mul(scale=(scale_w, scale_h, scale_w, scale_h))
if bbox_only:
return
@ -307,7 +312,11 @@ class Instances:
self.keypoints[..., 1] = self.keypoints[..., 1].clip(0, h)
def remove_zero_area_boxes(self):
"""Remove zero-area boxes, i.e. after clipping some boxes may have zero width or height. This removes them."""
"""
Remove zero-area boxes, i.e. after clipping some boxes may have zero width or height.
This removes them.
"""
good = self.bbox_areas > 0
if not all(good):
self._bboxes = self._bboxes[good]

@ -13,7 +13,11 @@ from .tal import bbox2dist
class VarifocalLoss(nn.Module):
"""Varifocal loss by Zhang et al. https://arxiv.org/abs/2008.13367."""
"""
Varifocal loss by Zhang et al.
https://arxiv.org/abs/2008.13367.
"""
def __init__(self):
"""Initialize the VarifocalLoss class."""
@ -33,6 +37,7 @@ class FocalLoss(nn.Module):
"""Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)."""
def __init__(self, ):
"""Initializer for FocalLoss class with no parameters."""
super().__init__()
@staticmethod
@ -93,6 +98,7 @@ class KeypointLoss(nn.Module):
"""Criterion class for computing training losses."""
def __init__(self, sigmas) -> None:
"""Initialize the KeypointLoss class."""
super().__init__()
self.sigmas = sigmas

@ -1,7 +1,5 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
"""
Model validation metrics
"""
"""Model validation metrics."""
import math
import warnings
@ -195,7 +193,7 @@ class ConfusionMatrix:
def process_cls_preds(self, preds, targets):
"""
Update confusion matrix for classification task
Update confusion matrix for classification task.
Args:
preds (Array[N, min(nc,5)]): Predicted class labels.
@ -308,9 +306,7 @@ class ConfusionMatrix:
on_plot(plot_fname)
def print(self):
"""
Print the confusion matrix to the console.
"""
"""Print the confusion matrix to the console."""
for i in range(self.nc + 1):
LOGGER.info(' '.join(map(str, self.matrix[i])))
@ -440,7 +436,6 @@ def ap_per_class(tp,
f1 (np.ndarray): F1-score values at each confidence threshold.
ap (np.ndarray): Average precision for each class at different IoU thresholds.
unique_classes (np.ndarray): An array of unique classes that have data.
"""
# Sort by objectness
@ -498,32 +493,33 @@ def ap_per_class(tp,
class Metric(SimpleClass):
"""
Class for computing evaluation metrics for YOLOv8 model.
Attributes:
p (list): Precision for each class. Shape: (nc,).
r (list): Recall for each class. Shape: (nc,).
f1 (list): F1 score for each class. Shape: (nc,).
all_ap (list): AP scores for all classes and all IoU thresholds. Shape: (nc, 10).
ap_class_index (list): Index of class for each AP score. Shape: (nc,).
nc (int): Number of classes.
Methods:
ap50(): AP at IoU threshold of 0.5 for all classes. Returns: List of AP scores. Shape: (nc,) or [].
ap(): AP at IoU thresholds from 0.5 to 0.95 for all classes. Returns: List of AP scores. Shape: (nc,) or [].
mp(): Mean precision of all classes. Returns: Float.
mr(): Mean recall of all classes. Returns: Float.
map50(): Mean AP at IoU threshold of 0.5 for all classes. Returns: Float.
map75(): Mean AP at IoU threshold of 0.75 for all classes. Returns: Float.
map(): Mean AP at IoU thresholds from 0.5 to 0.95 for all classes. Returns: Float.
mean_results(): Mean of results, returns mp, mr, map50, map.
class_result(i): Class-aware result, returns p[i], r[i], ap50[i], ap[i].
maps(): mAP of each class. Returns: Array of mAP scores, shape: (nc,).
fitness(): Model fitness as a weighted combination of metrics. Returns: Float.
update(results): Update metric attributes with new evaluation results.
"""
Class for computing evaluation metrics for YOLOv8 model.
Attributes:
p (list): Precision for each class. Shape: (nc,).
r (list): Recall for each class. Shape: (nc,).
f1 (list): F1 score for each class. Shape: (nc,).
all_ap (list): AP scores for all classes and all IoU thresholds. Shape: (nc, 10).
ap_class_index (list): Index of class for each AP score. Shape: (nc,).
nc (int): Number of classes.
Methods:
ap50(): AP at IoU threshold of 0.5 for all classes. Returns: List of AP scores. Shape: (nc,) or [].
ap(): AP at IoU thresholds from 0.5 to 0.95 for all classes. Returns: List of AP scores. Shape: (nc,) or [].
mp(): Mean precision of all classes. Returns: Float.
mr(): Mean recall of all classes. Returns: Float.
map50(): Mean AP at IoU threshold of 0.5 for all classes. Returns: Float.
map75(): Mean AP at IoU threshold of 0.75 for all classes. Returns: Float.
map(): Mean AP at IoU thresholds from 0.5 to 0.95 for all classes. Returns: Float.
mean_results(): Mean of results, returns mp, mr, map50, map.
class_result(i): Class-aware result, returns p[i], r[i], ap50[i], ap[i].
maps(): mAP of each class. Returns: Array of mAP scores, shape: (nc,).
fitness(): Model fitness as a weighted combination of metrics. Returns: Float.
update(results): Update metric attributes with new evaluation results.
"""
def __init__(self) -> None:
"""Initializes a Metric instance for computing evaluation metrics for the YOLOv8 model."""
self.p = [] # (nc, )
self.r = [] # (nc, )
self.f1 = [] # (nc, )
@ -606,12 +602,12 @@ class Metric(SimpleClass):
return [self.mp, self.mr, self.map50, self.map]
def class_result(self, i):
"""class-aware result, return p[i], r[i], ap50[i], ap[i]."""
"""Class-aware result, return p[i], r[i], ap50[i], ap[i]."""
return self.p[i], self.r[i], self.ap50[i], self.ap[i]
@property
def maps(self):
"""mAP of each class."""
"""MAP of each class."""
maps = np.zeros(self.nc) + self.map
for i, c in enumerate(self.ap_class_index):
maps[c] = self.ap[i]
@ -672,6 +668,7 @@ class DetMetrics(SimpleClass):
"""
def __init__(self, save_dir=Path('.'), plot=False, on_plot=None, names=()) -> None:
"""Initialize a DetMetrics instance with a save directory, plot flag, callback function, and class names."""
self.save_dir = save_dir
self.plot = plot
self.on_plot = on_plot
@ -756,6 +753,7 @@ class SegmentMetrics(SimpleClass):
"""
def __init__(self, save_dir=Path('.'), plot=False, on_plot=None, names=()) -> None:
"""Initialize a SegmentMetrics instance with a save directory, plot flag, callback function, and class names."""
self.save_dir = save_dir
self.plot = plot
self.on_plot = on_plot
@ -865,6 +863,7 @@ class PoseMetrics(SegmentMetrics):
"""
def __init__(self, save_dir=Path('.'), plot=False, on_plot=None, names=()) -> None:
"""Initialize the PoseMetrics class with directory path, class names, and plotting options."""
super().__init__(save_dir, plot, names)
self.save_dir = save_dir
self.plot = plot
@ -954,6 +953,7 @@ class ClassifyMetrics(SimpleClass):
"""
def __init__(self) -> None:
"""Initialize a ClassifyMetrics instance."""
self.top1 = 0
self.top5 = 0
self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0}

@ -50,6 +50,7 @@ class Profile(contextlib.ContextDecorator):
self.t += self.dt # accumulate dt
def __str__(self):
"""Returns a human-readable string representing the accumulated elapsed time in the profiler."""
return f'Elapsed time is {self.t} s'
def time(self):
@ -303,7 +304,7 @@ def clip_coords(coords, shape):
def scale_image(masks, im0_shape, ratio_pad=None):
"""
Takes a mask, and resizes it to the original image size
Takes a mask, and resizes it to the original image size.
Args:
masks (np.ndarray): resized and padded masks/images, [h, w, num]/[h, w, 3].
@ -403,8 +404,8 @@ def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0):
"""
Convert bounding box coordinates from (x1, y1, x2, y2) format to (x, y, width, height, normalized) format.
x, y, width and height are normalized to image dimensions
Convert bounding box coordinates from (x1, y1, x2, y2) format to (x, y, width, height, normalized) format. x, y,
width and height are normalized to image dimensions.
Args:
x (np.ndarray | torch.Tensor): The input bounding box coordinates in (x1, y1, x2, y2) format.
@ -445,7 +446,7 @@ def xywh2ltwh(x):
def xyxy2ltwh(x):
"""
Convert nx4 bounding boxes from [x1, y1, x2, y2] to [x1, y1, w, h], where xy1=top-left, xy2=bottom-right
Convert nx4 bounding boxes from [x1, y1, x2, y2] to [x1, y1, w, h], where xy1=top-left, xy2=bottom-right.
Args:
x (np.ndarray | torch.Tensor): The input tensor with the bounding boxes coordinates in the xyxy format
@ -461,7 +462,7 @@ def xyxy2ltwh(x):
def ltwh2xywh(x):
"""
Convert nx4 boxes from [x1, y1, w, h] to [x, y, w, h] where xy1=top-left, xy=center
Convert nx4 boxes from [x1, y1, w, h] to [x, y, w, h] where xy1=top-left, xy=center.
Args:
x (torch.Tensor): the input tensor
@ -544,7 +545,7 @@ def xywhr2xyxyxyxy(center):
def ltwh2xyxy(x):
"""
It converts the bounding box from [x1, y1, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
It converts the bounding box from [x1, y1, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right.
Args:
x (np.ndarray | torch.Tensor): the input image
@ -616,8 +617,8 @@ def crop_mask(masks, boxes):
def process_mask_upsample(protos, masks_in, bboxes, shape):
"""
Takes the output of the mask head, and applies the mask to the bounding boxes. This produces masks of higher
quality but is slower.
Takes the output of the mask head, and applies the mask to the bounding boxes. This produces masks of higher quality
but is slower.
Args:
protos (torch.Tensor): [mask_dim, mask_h, mask_w]
@ -713,7 +714,7 @@ def scale_masks(masks, shape, padding=True):
def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None, normalize=False, padding=True):
"""
Rescale segment coordinates (xy) from img1_shape to img0_shape
Rescale segment coordinates (xy) from img1_shape to img0_shape.
Args:
img1_shape (tuple): The shape of the image that the coords are from.

@ -1,7 +1,5 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
"""
Monkey patches to update/extend functionality of existing functions
"""
"""Monkey patches to update/extend functionality of existing functions."""
from pathlib import Path
@ -14,7 +12,8 @@ _imshow = cv2.imshow # copy to avoid recursion errors
def imread(filename: str, flags: int = cv2.IMREAD_COLOR):
"""Read an image from a file.
"""
Read an image from a file.
Args:
filename (str): Path to the file to read.
@ -27,7 +26,8 @@ def imread(filename: str, flags: int = cv2.IMREAD_COLOR):
def imwrite(filename: str, img: np.ndarray, params=None):
"""Write an image to a file.
"""
Write an image to a file.
Args:
filename (str): Path to the file to write.
@ -45,7 +45,8 @@ def imwrite(filename: str, img: np.ndarray, params=None):
def imshow(winname: str, mat: np.ndarray):
"""Displays an image in the specified window.
"""
Displays an image in the specified window.
Args:
winname (str): Name of the window.
@ -59,7 +60,8 @@ _torch_save = torch.save # copy to avoid recursion errors
def torch_save(*args, **kwargs):
"""Use dill (if exists) to serialize the lambda functions where pickle does not do this.
"""
Use dill (if exists) to serialize the lambda functions where pickle does not do this.
Args:
*args (tuple): Positional arguments to pass to torch.save.

@ -316,7 +316,8 @@ def plot_labels(boxes, cls, names=(), save_dir=Path(''), on_plot=None):
def save_one_box(xyxy, im, file=Path('im.jpg'), gain=1.02, pad=10, square=False, BGR=False, save=True):
"""Save image crop as {file} with crop size multiple {gain} and {pad} pixels. Save and/or return crop.
"""
Save image crop as {file} with crop size multiple {gain} and {pad} pixels. Save and/or return crop.
This function takes a bounding box and an image, and then saves a cropped portion of the image according
to the bounding box. Optionally, the crop can be squared, and the function allows for gain and padding

@ -205,7 +205,11 @@ def fuse_deconv_and_bn(deconv, bn):
def model_info(model, detailed=False, verbose=True, imgsz=640):
"""Model information. imgsz may be int or list, i.e. imgsz=640 or imgsz=[640, 320]."""
"""
Model information.
imgsz may be int or list, i.e. imgsz=640 or imgsz=[640, 320].
"""
if not verbose:
return
n_p = get_num_params(model) # number of parameters
@ -517,13 +521,11 @@ def profile(input, ops, n=10, device=None):
class EarlyStopping:
"""
Early stopping class that stops training when a specified number of epochs have passed without improvement.
"""
"""Early stopping class that stops training when a specified number of epochs have passed without improvement."""
def __init__(self, patience=50):
"""
Initialize early stopping object
Initialize early stopping object.
Args:
patience (int, optional): Number of epochs to wait after fitness stops improving before stopping.
@ -535,7 +537,7 @@ class EarlyStopping:
def __call__(self, epoch, fitness):
"""
Check whether to stop training
Check whether to stop training.
Args:
epoch (int): Current epoch of training

@ -7,7 +7,8 @@ import numpy as np
class TritonRemoteModel:
"""Client for interacting with a remote Triton Inference Server model.
"""
Client for interacting with a remote Triton Inference Server model.
Attributes:
endpoint (str): The name of the model on the Triton server.

Loading…
Cancel
Save