From 7517667a33b08a1c2f7cca0dd3e2fa29f335e9f3 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Mon, 9 Oct 2023 02:25:22 +0200 Subject: [PATCH] Add `docformatter` to pre-commit (#5279) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Burhan <62214284+Burhan-Q@users.noreply.github.com> --- .pre-commit-config.yaml | 5 + docs/build_reference.py | 44 ++- docs/reference/models/utils/ops.md | 4 - examples/YOLOv8-ONNXRuntime/main.py | 9 +- examples/YOLOv8-OpenCV-ONNX-Python/main.py | 41 ++- .../yolov8_region_counter.py | 8 +- setup.cfg | 9 + setup.py | 8 +- tests/conftest.py | 9 +- tests/test_cli.py | 11 +- tests/test_cuda.py | 10 + tests/test_engine.py | 5 + tests/test_python.py | 72 ++++- ultralytics/cfg/__init__.py | 9 +- ultralytics/data/augment.py | 263 ++++++++++++++++-- ultralytics/data/base.py | 3 +- ultralytics/data/build.py | 12 +- ultralytics/data/converter.py | 7 +- ultralytics/data/dataset.py | 18 +- ultralytics/data/loaders.py | 118 +++++++- ultralytics/data/utils.py | 6 +- ultralytics/engine/exporter.py | 4 +- ultralytics/engine/model.py | 12 +- ultralytics/engine/predictor.py | 16 +- ultralytics/engine/results.py | 26 +- ultralytics/engine/trainer.py | 50 ++-- ultralytics/engine/tuner.py | 40 +-- ultralytics/engine/validator.py | 7 +- ultralytics/hub/__init__.py | 4 +- ultralytics/hub/auth.py | 21 +- ultralytics/hub/session.py | 1 + ultralytics/hub/utils.py | 4 +- ultralytics/models/fastsam/model.py | 3 +- ultralytics/models/fastsam/predict.py | 2 + ultralytics/models/fastsam/prompt.py | 16 +- ultralytics/models/nas/model.py | 4 +- ultralytics/models/rtdetr/model.py | 10 +- ultralytics/models/rtdetr/predict.py | 3 +- ultralytics/models/rtdetr/train.py | 3 +- ultralytics/models/rtdetr/val.py | 1 + ultralytics/models/sam/amg.py | 18 +- ultralytics/models/sam/model.py | 11 +- ultralytics/models/sam/modules/decoders.py | 6 +- ultralytics/models/sam/modules/encoders.py | 27 +- .../models/sam/modules/tiny_encoder.py | 40 ++- ultralytics/models/sam/modules/transformer.py | 6 +- ultralytics/models/sam/predict.py | 12 +- ultralytics/models/utils/loss.py | 43 ++- ultralytics/models/utils/ops.py | 32 +-- ultralytics/models/yolo/classify/predict.py | 1 + ultralytics/models/yolo/classify/train.py | 6 +- ultralytics/models/yolo/classify/val.py | 1 + ultralytics/models/yolo/detect/train.py | 7 +- ultralytics/models/yolo/model.py | 6 +- ultralytics/models/yolo/pose/predict.py | 1 + ultralytics/models/yolo/segment/predict.py | 2 + ultralytics/models/yolo/segment/val.py | 2 +- ultralytics/nn/autobackend.py | 77 ++--- ultralytics/nn/modules/__init__.py | 22 +- ultralytics/nn/modules/block.py | 16 +- ultralytics/nn/modules/conv.py | 23 +- ultralytics/nn/modules/head.py | 9 +- ultralytics/nn/modules/transformer.py | 62 ++++- ultralytics/nn/modules/utils.py | 10 +- ultralytics/nn/tasks.py | 82 ++++-- ultralytics/trackers/bot_sort.py | 59 +++- ultralytics/trackers/byte_tracker.py | 86 +++++- ultralytics/trackers/track.py | 1 - ultralytics/trackers/utils/gmc.py | 23 ++ ultralytics/trackers/utils/kalman_filter.py | 12 +- ultralytics/utils/__init__.py | 49 ++-- ultralytics/utils/autobatch.py | 4 +- ultralytics/utils/benchmarks.py | 15 +- ultralytics/utils/callbacks/base.py | 4 +- ultralytics/utils/callbacks/comet.py | 14 +- ultralytics/utils/callbacks/dvc.py | 9 + ultralytics/utils/callbacks/neptune.py | 11 +- ultralytics/utils/callbacks/wb.py | 1 + ultralytics/utils/checks.py | 22 +- ultralytics/utils/dist.py | 3 +- ultralytics/utils/downloads.py | 10 +- ultralytics/utils/files.py | 6 +- ultralytics/utils/instance.py | 15 +- ultralytics/utils/loss.py | 8 +- ultralytics/utils/metrics.py | 68 ++--- ultralytics/utils/ops.py | 19 +- ultralytics/utils/patches.py | 16 +- ultralytics/utils/plotting.py | 3 +- ultralytics/utils/torch_utils.py | 14 +- ultralytics/utils/triton.py | 3 +- 90 files changed, 1397 insertions(+), 498 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e2b477dade..1dc23a87c8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -62,6 +62,11 @@ repos: args: - --ignore-words-list=crate,nd,strack,dota,ane,segway,fo + - repo: https://github.com/PyCQA/docformatter + rev: v1.7.5 + hooks: + - id: docformatter + # - repo: https://github.com/asottile/yesqa # rev: v1.4.0 # hooks: diff --git a/docs/build_reference.py b/docs/build_reference.py index 3641b132e8..d880923457 100644 --- a/docs/build_reference.py +++ b/docs/build_reference.py @@ -18,7 +18,15 @@ CODE_DIR = ROOT REFERENCE_DIR = ROOT.parent / 'docs/reference' -def extract_classes_and_functions(filepath): +def extract_classes_and_functions(filepath: Path): + """Extracts class and function names from a given Python file. + + Args: + filepath (Path): The path to the Python file. + + Returns: + (tuple): A tuple containing lists of class and function names. + """ with open(filepath, 'r') as file: content = file.read() @@ -31,7 +39,15 @@ def extract_classes_and_functions(filepath): return classes, functions -def create_markdown(py_filepath, module_path, classes, functions): +def create_markdown(py_filepath: Path, module_path: str, classes: list, functions: list): + """Creates a Markdown file containing the API reference for the given Python module. + + Args: + py_filepath (Path): The path to the Python file. + module_path (str): The import path for the Python module. + classes (list): A list of class names within the module. + functions (list): A list of function names within the module. + """ md_filepath = py_filepath.with_suffix('.md') # Read existing content and keep header content between first two --- @@ -64,17 +80,35 @@ def create_markdown(py_filepath, module_path, classes, functions): def nested_dict(): + """Creates and returns a nested defaultdict. + + Returns: + (defaultdict): A nested defaultdict object. + """ return defaultdict(nested_dict) -def sort_nested_dict(d): +def sort_nested_dict(d: dict): + """Sorts a nested dictionary recursively. + + Args: + d (dict): The dictionary to sort. + + Returns: + (dict): The sorted dictionary. + """ return { key: sort_nested_dict(value) if isinstance(value, dict) else value for key, value in sorted(d.items()) } -def create_nav_menu_yaml(nav_items): +def create_nav_menu_yaml(nav_items: list): + """Creates a YAML file for the navigation menu based on the provided list of items. + + Args: + nav_items (list): A list of relative file paths to Markdown files for the navigation menu. + """ nav_tree = nested_dict() for item_str in nav_items: @@ -90,6 +124,7 @@ def create_nav_menu_yaml(nav_items): nav_tree_sorted = sort_nested_dict(nav_tree) def _dict_to_yaml(d, level=0): + """Converts a nested dictionary to a YAML-formatted string with indentation.""" yaml_str = '' indent = ' ' * level for k, v in d.items(): @@ -105,6 +140,7 @@ def create_nav_menu_yaml(nav_items): def main(): + """Main function to extract class and function names, create Markdown files, and generate a YAML navigation menu.""" nav_items = [] for root, _, files in os.walk(CODE_DIR): for file in files: diff --git a/docs/reference/models/utils/ops.md b/docs/reference/models/utils/ops.md index 9538ee483e..d1f933586d 100644 --- a/docs/reference/models/utils/ops.md +++ b/docs/reference/models/utils/ops.md @@ -16,7 +16,3 @@ keywords: Ultralytics, YOLO, HungarianMatcher, inverse_sigmoid, detection models --- ## ::: ultralytics.models.utils.ops.get_cdn_group

- ---- -## ::: ultralytics.models.utils.ops.inverse_sigmoid -

diff --git a/examples/YOLOv8-ONNXRuntime/main.py b/examples/YOLOv8-ONNXRuntime/main.py index 8d03182b36..ec7687138f 100644 --- a/examples/YOLOv8-ONNXRuntime/main.py +++ b/examples/YOLOv8-ONNXRuntime/main.py @@ -9,11 +9,12 @@ from ultralytics.utils import ASSETS, yaml_load from ultralytics.utils.checks import check_requirements, check_yaml -class Yolov8: +class YOLOv8: + """YOLOv8 object detection model class for handling inference and visualization.""" def __init__(self, onnx_model, input_image, confidence_thres, iou_thres): """ - Initializes an instance of the Yolov8 class. + Initializes an instance of the YOLOv8 class. Args: onnx_model: Path to the ONNX model. @@ -213,8 +214,8 @@ if __name__ == '__main__': # Check the requirements and select the appropriate backend (CPU or GPU) check_requirements('onnxruntime-gpu' if torch.cuda.is_available() else 'onnxruntime') - # Create an instance of the Yolov8 class with the specified arguments - detection = Yolov8(args.model, args.img, args.conf_thres, args.iou_thres) + # Create an instance of the YOLOv8 class with the specified arguments + detection = YOLOv8(args.model, args.img, args.conf_thres, args.iou_thres) # Perform object detection and obtain the output image output_image = detection.main() diff --git a/examples/YOLOv8-OpenCV-ONNX-Python/main.py b/examples/YOLOv8-OpenCV-ONNX-Python/main.py index 76802f0a0a..78b0b08ef0 100644 --- a/examples/YOLOv8-OpenCV-ONNX-Python/main.py +++ b/examples/YOLOv8-OpenCV-ONNX-Python/main.py @@ -7,11 +7,22 @@ from ultralytics.utils import ASSETS, yaml_load from ultralytics.utils.checks import check_yaml CLASSES = yaml_load(check_yaml('coco128.yaml'))['names'] - colors = np.random.uniform(0, 255, size=(len(CLASSES), 3)) def draw_bounding_box(img, class_id, confidence, x, y, x_plus_w, y_plus_h): + """ + Draws bounding boxes on the input image based on the provided arguments. + + Args: + img (numpy.ndarray): The input image to draw the bounding box on. + class_id (int): Class ID of the detected object. + confidence (float): Confidence score of the detected object. + x (int): X-coordinate of the top-left corner of the bounding box. + y (int): Y-coordinate of the top-left corner of the bounding box. + x_plus_w (int): X-coordinate of the bottom-right corner of the bounding box. + y_plus_h (int): Y-coordinate of the bottom-right corner of the bounding box. + """ label = f'{CLASSES[class_id]} ({confidence:.2f})' color = colors[class_id] cv2.rectangle(img, (x, y), (x_plus_w, y_plus_h), color, 2) @@ -19,18 +30,39 @@ def draw_bounding_box(img, class_id, confidence, x, y, x_plus_w, y_plus_h): def main(onnx_model, input_image): + """ + Main function to load ONNX model, perform inference, draw bounding boxes, and display the output image. + + Args: + onnx_model (str): Path to the ONNX model. + input_image (str): Path to the input image. + + Returns: + list: List of dictionaries containing detection information such as class_id, class_name, confidence, etc. + """ + # Load the ONNX model model: cv2.dnn.Net = cv2.dnn.readNetFromONNX(onnx_model) + + # Read the input image original_image: np.ndarray = cv2.imread(input_image) [height, width, _] = original_image.shape + + # Prepare a square image for inference length = max((height, width)) image = np.zeros((length, length, 3), np.uint8) image[0:height, 0:width] = original_image + + # Calculate scale factor scale = length / 640 + # Preprocess the image and prepare blob for model blob = cv2.dnn.blobFromImage(image, scalefactor=1 / 255, size=(640, 640), swapRB=True) model.setInput(blob) + + # Perform inference outputs = model.forward() + # Prepare output array outputs = np.array([cv2.transpose(outputs[0])]) rows = outputs.shape[1] @@ -38,6 +70,7 @@ def main(onnx_model, input_image): scores = [] class_ids = [] + # Iterate through output to collect bounding boxes, confidence scores, and class IDs for i in range(rows): classes_scores = outputs[0][i][4:] (minScore, maxScore, minClassLoc, (x, maxClassIndex)) = cv2.minMaxLoc(classes_scores) @@ -49,9 +82,12 @@ def main(onnx_model, input_image): scores.append(maxScore) class_ids.append(maxClassIndex) + # Apply NMS (Non-maximum suppression) result_boxes = cv2.dnn.NMSBoxes(boxes, scores, 0.25, 0.45, 0.5) detections = [] + + # Iterate through NMS results to draw bounding boxes and labels for i in range(len(result_boxes)): index = result_boxes[i] box = boxes[index] @@ -65,6 +101,7 @@ def main(onnx_model, input_image): draw_bounding_box(original_image, class_ids[index], scores[index], round(box[0] * scale), round(box[1] * scale), round((box[0] + box[2]) * scale), round((box[1] + box[3]) * scale)) + # Display the image with bounding boxes cv2.imshow('image', original_image) cv2.waitKey(0) cv2.destroyAllWindows() @@ -74,7 +111,7 @@ def main(onnx_model, input_image): if __name__ == '__main__': parser = argparse.ArgumentParser() - parser.add_argument('--model', default='yolov8n.onnx', help='Input your onnx model.') + parser.add_argument('--model', default='yolov8n.onnx', help='Input your ONNX model.') parser.add_argument('--img', default=str(ASSETS / 'bus.jpg'), help='Path to input image.') args = parser.parse_args() main(args.model, args.img) diff --git a/examples/YOLOv8-Region-Counter/yolov8_region_counter.py b/examples/YOLOv8-Region-Counter/yolov8_region_counter.py index bdc4013201..dd0e476f97 100644 --- a/examples/YOLOv8-Region-Counter/yolov8_region_counter.py +++ b/examples/YOLOv8-Region-Counter/yolov8_region_counter.py @@ -33,10 +33,6 @@ counting_regions = [ }, ] -def is_inside_polygon(point, polygon): - return polygon.contains(Point(point)) - - def mouse_callback(event, x, y, flags, param): """Mouse call back event.""" global current_region @@ -44,7 +40,7 @@ def mouse_callback(event, x, y, flags, param): # Mouse left button down event if event == cv2.EVENT_LBUTTONDOWN: for region in counting_regions: - if is_inside_polygon((x, y), region['polygon']): + if region['polygon'].contains(Point((x, y))): current_region = region current_region['dragging'] = True current_region['offset_x'] = x @@ -150,7 +146,7 @@ def run( # Check if detection inside region for region in counting_regions: - if is_inside_polygon((x, y), region['polygon']): + if region['polygon'].contains(Point((x, y))): region['counts'] += 1 # Draw regions (Polygons/Rectangles) diff --git a/setup.cfg b/setup.cfg index a7d16ab9ba..ff36444927 100644 --- a/setup.cfg +++ b/setup.cfg @@ -60,3 +60,12 @@ SPACE_BETWEEN_ENDING_COMMA_AND_CLOSING_BRACKET = True SPLIT_BEFORE_CLOSING_BRACKET = False SPLIT_BEFORE_FIRST_ARGUMENT = False # EACH_DICT_ENTRY_ON_SEPARATE_LINE = False + +[docformatter] +wrap-summaries = 120 +wrap-descriptions = 120 +in-place = true +make-summary-multi-line = false +pre-summary-newline = true +force-wrap = false +close-quotes-on-newline = true diff --git a/setup.py b/setup.py index 1132ffa14f..8fb107c854 100644 --- a/setup.py +++ b/setup.py @@ -12,6 +12,12 @@ README = (PARENT / 'README.md').read_text(encoding='utf-8') def get_version(): + """ + Retrieve the version number from the 'ultralytics/__init__.py' file. + + Returns: + (str): The version number extracted from the '__version__' attribute in the 'ultralytics/__init__.py' file. + """ file = PARENT / 'ultralytics/__init__.py' return re.search(r'^__version__ = [\'"]([^\'"]*)[\'"]', file.read_text(encoding='utf-8'), re.M)[1] @@ -24,7 +30,7 @@ def parse_requirements(file_path: Path): file_path (str | Path): Path to the requirements.txt file. Returns: - List[str]: List of parsed requirements. + (List[str]): List of parsed requirements. """ requirements = [] diff --git a/tests/conftest.py b/tests/conftest.py index ac90931077..59955bd1ce 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -9,7 +9,8 @@ TMP = Path(__file__).resolve().parent / 'tmp' # temp directory for test files def pytest_addoption(parser): - """Add custom command-line options to pytest. + """ + Add custom command-line options to pytest. Args: parser (pytest.config.Parser): The pytest parser object. @@ -18,7 +19,8 @@ def pytest_addoption(parser): def pytest_configure(config): - """Register custom markers to avoid pytest warnings. + """ + Register custom markers to avoid pytest warnings. Args: config (pytest.config.Config): The pytest config object. @@ -27,7 +29,8 @@ def pytest_configure(config): def pytest_runtest_setup(item): - """Setup hook to skip tests marked as slow if the --slow option is not provided. + """ + Setup hook to skip tests marked as slow if the --slow option is not provided. Args: item (pytest.Item): The test item object. diff --git a/tests/test_cli.py b/tests/test_cli.py index a935aa0eea..788651b483 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -22,11 +22,12 @@ EXPORT_ARGS = [ def run(cmd): - # Run a subprocess command with check=True + """Execute a shell command using subprocess.""" subprocess.run(cmd.split(), check=True) def test_special_modes(): + """Test various special command modes of YOLO.""" run('yolo help') run('yolo checks') run('yolo version') @@ -36,31 +37,37 @@ def test_special_modes(): @pytest.mark.parametrize('task,model,data', TASK_ARGS) def test_train(task, model, data): + """Test YOLO training for a given task, model, and data.""" run(f'yolo train {task} model={model}.yaml data={data} imgsz=32 epochs=1 cache=disk') @pytest.mark.parametrize('task,model,data', TASK_ARGS) def test_val(task, model, data): + """Test YOLO validation for a given task, model, and data.""" run(f'yolo val {task} model={WEIGHTS_DIR / model}.pt data={data} imgsz=32 save_txt save_json') @pytest.mark.parametrize('task,model,data', TASK_ARGS) def test_predict(task, model, data): + """Test YOLO prediction on sample assets for a given task and model.""" run(f'yolo predict model={WEIGHTS_DIR / model}.pt source={ASSETS} imgsz=32 save save_crop save_txt') @pytest.mark.parametrize('model,format', EXPORT_ARGS) def test_export(model, format): + """Test exporting a YOLO model to different formats.""" run(f'yolo export model={WEIGHTS_DIR / model}.pt format={format} imgsz=32') def test_rtdetr(task='detect', model='yolov8n-rtdetr.yaml', data='coco8.yaml'): + """Test the RTDETR functionality with the Ultralytics framework.""" # Warning: MUST use imgsz=640 run(f'yolo train {task} model={model} data={data} --imgsz= 640 epochs =1, cache = disk') # add coma, spaces to args run(f"yolo predict {task} model={model} source={ASSETS / 'bus.jpg'} imgsz=640 save save_crop save_txt") def test_fastsam(task='segment', model=WEIGHTS_DIR / 'FastSAM-s.pt', data='coco8-seg.yaml'): + """Test FastSAM segmentation functionality within Ultralytics.""" source = ASSETS / 'bus.jpg' run(f'yolo segment val {task} model={model} data={data} imgsz=32') @@ -97,6 +104,7 @@ def test_fastsam(task='segment', model=WEIGHTS_DIR / 'FastSAM-s.pt', data='coco8 def test_mobilesam(): + """Test MobileSAM segmentation functionality using Ultralytics.""" from ultralytics import SAM # Load the model @@ -121,5 +129,6 @@ def test_mobilesam(): @pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available') @pytest.mark.skipif(CUDA_DEVICE_COUNT < 2, reason='DDP is not available') def test_train_gpu(task, model, data): + """Test YOLO training on GPU(s) for various tasks and models.""" run(f'yolo train {task} model={model}.yaml data={data} imgsz=32 epochs=1 device=0') # single GPU run(f'yolo train {task} model={model}.pt data={data} imgsz=32 epochs=1 device=0,1') # multi GPU diff --git a/tests/test_cuda.py b/tests/test_cuda.py index 92ecbe3d13..15c2259b66 100644 --- a/tests/test_cuda.py +++ b/tests/test_cuda.py @@ -1,4 +1,5 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license + import contextlib import pytest @@ -17,18 +18,21 @@ BUS = ASSETS / 'bus.jpg' def test_checks(): + """Validate CUDA settings against torch CUDA functions.""" assert torch.cuda.is_available() == CUDA_IS_AVAILABLE assert torch.cuda.device_count() == CUDA_DEVICE_COUNT @pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available') def test_train(): + """Test model training on a minimal dataset.""" device = 0 if CUDA_DEVICE_COUNT == 1 else [0, 1] YOLO(MODEL).train(data=DATA, imgsz=64, epochs=1, device=device) # requires imgsz>=64 @pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available') def test_predict_multiple_devices(): + """Validate model prediction on multiple devices.""" model = YOLO('yolov8n.pt') model = model.cpu() assert str(model.device) == 'cpu' @@ -53,6 +57,7 @@ def test_predict_multiple_devices(): @pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available') def test_autobatch(): + """Check batch size for YOLO model using autobatch.""" from ultralytics.utils.autobatch import check_train_batch_size check_train_batch_size(YOLO(MODEL).model.cuda(), imgsz=128, amp=True) @@ -60,6 +65,7 @@ def test_autobatch(): @pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available') def test_utils_benchmarks(): + """Profile YOLO models for performance benchmarks.""" from ultralytics.utils.benchmarks import ProfileModels # Pre-export a dynamic engine model to use dynamic inference @@ -69,6 +75,7 @@ def test_utils_benchmarks(): @pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available') def test_predict_sam(): + """Test SAM model prediction with various prompts.""" from ultralytics import SAM from ultralytics.models.sam import Predictor as SAMPredictor @@ -102,6 +109,7 @@ def test_predict_sam(): @pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available') def test_model_ray_tune(): + """Tune YOLO model with Ray optimization library.""" with contextlib.suppress(RuntimeError): # RuntimeError may be caused by out-of-memory YOLO('yolov8n-cls.yaml').tune(use_ray=True, data='imagenet10', @@ -115,12 +123,14 @@ def test_model_ray_tune(): @pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available') def test_model_tune(): + """Tune YOLO model for performance.""" YOLO('yolov8n-pose.pt').tune(data='coco8-pose.yaml', plots=False, imgsz=32, epochs=1, iterations=2, device='cpu') YOLO('yolov8n-cls.pt').tune(data='imagenet10', plots=False, imgsz=32, epochs=1, iterations=2, device='cpu') @pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available') def test_pycocotools(): + """Validate model predictions using pycocotools.""" from ultralytics.models.yolo.detect import DetectionValidator from ultralytics.models.yolo.pose import PoseValidator from ultralytics.models.yolo.segment import SegmentationValidator diff --git a/tests/test_engine.py b/tests/test_engine.py index 6ea4d9f042..2d7895835d 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -14,10 +14,12 @@ MODEL = WEIGHTS_DIR / 'yolov8n' def test_func(*args): # noqa + """Test function callback.""" print('callback test passed') def test_export(): + """Test model exporting functionality.""" exporter = Exporter() exporter.add_callback('on_export_start', test_func) assert test_func in exporter.callbacks['on_export_start'], 'callback test failed' @@ -26,6 +28,7 @@ def test_export(): def test_detect(): + """Test object detection functionality.""" overrides = {'data': 'coco8.yaml', 'model': CFG_DET, 'imgsz': 32, 'epochs': 1, 'save': False} CFG.data = 'coco8.yaml' CFG.imgsz = 32 @@ -61,6 +64,7 @@ def test_detect(): def test_segment(): + """Test image segmentation functionality.""" overrides = {'data': 'coco8-seg.yaml', 'model': CFG_SEG, 'imgsz': 32, 'epochs': 1, 'save': False} CFG.data = 'coco8-seg.yaml' CFG.imgsz = 32 @@ -98,6 +102,7 @@ def test_segment(): def test_classify(): + """Test image classification functionality.""" overrides = {'data': 'imagenet10', 'model': CFG_CLS, 'imgsz': 32, 'epochs': 1, 'save': False} CFG.data = 'imagenet10' CFG.imgsz = 32 diff --git a/tests/test_python.py b/tests/test_python.py index 3e49f570a1..bea8afe1de 100644 --- a/tests/test_python.py +++ b/tests/test_python.py @@ -27,11 +27,13 @@ IS_TMP_WRITEABLE = is_dir_writeable(TMP) def test_model_forward(): + """Test the forward pass of the YOLO model.""" model = YOLO(CFG) model(source=None, imgsz=32, augment=True) # also test no source and augment def test_model_methods(): + """Test various methods and properties of the YOLO model.""" model = YOLO(MODEL) # Model methods @@ -51,7 +53,7 @@ def test_model_methods(): def test_model_profile(): - # Test profile=True model argument + """Test profiling of the YOLO model with 'profile=True' argument.""" from ultralytics.nn.tasks import DetectionModel model = DetectionModel() # build model @@ -61,7 +63,7 @@ def test_model_profile(): @pytest.mark.skipif(not IS_TMP_WRITEABLE, reason='directory is not writeable') def test_predict_txt(): - # Write a list of sources (file, dir, glob, recursive glob) to a txt file + """Test YOLO predictions with sources (file, dir, glob, recursive glob) specified in a text file.""" txt_file = TMP / 'sources.txt' with open(txt_file, 'w') as f: for x in [ASSETS / 'bus.jpg', ASSETS, ASSETS / '*', ASSETS / '**/*.jpg']: @@ -70,6 +72,7 @@ def test_predict_txt(): def test_predict_img(): + """Test YOLO prediction on various types of image sources.""" model = YOLO(MODEL) seg_model = YOLO(WEIGHTS_DIR / 'yolov8n-seg.pt') cls_model = YOLO(WEIGHTS_DIR / 'yolov8n-cls.pt') @@ -105,7 +108,7 @@ def test_predict_img(): def test_predict_grey_and_4ch(): - # Convert SOURCE to greyscale and 4-ch + """Test YOLO prediction on SOURCE converted to greyscale and 4-channel images.""" im = Image.open(SOURCE) directory = TMP / 'im4' directory.mkdir(parents=True, exist_ok=True) @@ -132,8 +135,11 @@ def test_predict_grey_and_4ch(): @pytest.mark.skipif(not ONLINE, reason='environment is offline') @pytest.mark.skipif(not IS_TMP_WRITEABLE, reason='directory is not writeable') def test_track_stream(): - # Test YouTube streaming inference (short 10 frame video) with non-default ByteTrack tracker - # imgsz=160 required for tracking for higher confidence and better matches + """ + Test YouTube streaming tracking (short 10 frame video) with non-default ByteTrack tracker. + + Note imgsz=160 required for tracking for higher confidence and better matches + """ import yaml model = YOLO(MODEL) @@ -153,37 +159,44 @@ def test_track_stream(): def test_val(): + """Test the validation mode of the YOLO model.""" YOLO(MODEL).val(data='coco8.yaml', imgsz=32, save_hybrid=True) def test_train_scratch(): + """Test training the YOLO model from scratch.""" model = YOLO(CFG) model.train(data='coco8.yaml', epochs=2, imgsz=32, cache='disk', batch=-1, close_mosaic=1, name='model') model(SOURCE) def test_train_pretrained(): + """Test training the YOLO model from a pre-trained state.""" model = YOLO(WEIGHTS_DIR / 'yolov8n-seg.pt') model.train(data='coco8-seg.yaml', epochs=1, imgsz=32, cache='ram', copy_paste=0.5, mixup=0.5, name=0) model(SOURCE) def test_export_torchscript(): + """Test exporting the YOLO model to TorchScript format.""" f = YOLO(MODEL).export(format='torchscript', optimize=False) YOLO(f)(SOURCE) # exported model inference def test_export_onnx(): + """Test exporting the YOLO model to ONNX format.""" f = YOLO(MODEL).export(format='onnx', dynamic=True) YOLO(f)(SOURCE) # exported model inference def test_export_openvino(): + """Test exporting the YOLO model to OpenVINO format.""" f = YOLO(MODEL).export(format='openvino') YOLO(f)(SOURCE) # exported model inference def test_export_coreml(): + """Test exporting the YOLO model to CoreML format.""" if not WINDOWS: # RuntimeError: BlobWriter not loaded with coremltools 7.0 on windows if MACOS: f = YOLO(MODEL).export(format='coreml') @@ -193,7 +206,11 @@ def test_export_coreml(): def test_export_tflite(enabled=False): - # TF suffers from install conflicts on Windows and macOS + """ + Test exporting the YOLO model to TFLite format. + + Note TF suffers from install conflicts on Windows and macOS. + """ if enabled and LINUX: model = YOLO(MODEL) f = model.export(format='tflite') @@ -201,7 +218,11 @@ def test_export_tflite(enabled=False): def test_export_pb(enabled=False): - # TF suffers from install conflicts on Windows and macOS + """ + Test exporting the YOLO model to *.pb format. + + Note TF suffers from install conflicts on Windows and macOS. + """ if enabled and LINUX: model = YOLO(MODEL) f = model.export(format='pb') @@ -209,18 +230,24 @@ def test_export_pb(enabled=False): def test_export_paddle(enabled=False): - # Paddle protobuf requirements conflicting with onnx protobuf requirements + """ + Test exporting the YOLO model to Paddle format. + + Note Paddle protobuf requirements conflicting with onnx protobuf requirements. + """ if enabled: YOLO(MODEL).export(format='paddle') @pytest.mark.slow def test_export_ncnn(): + """Test exporting the YOLO model to NCNN format.""" f = YOLO(MODEL).export(format='ncnn') YOLO(f)(SOURCE) # exported model inference def test_all_model_yamls(): + """Test YOLO model creation for all available YAML configurations.""" for m in (ROOT / 'cfg' / 'models').rglob('*.yaml'): if 'rtdetr' in m.name: if TORCH_1_9: # torch<=1.8 issue - TypeError: __init__() got an unexpected keyword argument 'batch_first' @@ -230,6 +257,7 @@ def test_all_model_yamls(): def test_workflow(): + """Test the complete workflow including training, validation, prediction, and exporting.""" model = YOLO(MODEL) model.train(data='coco8.yaml', epochs=1, imgsz=32, optimizer='SGD') model.val(imgsz=32) @@ -238,12 +266,14 @@ def test_workflow(): def test_predict_callback_and_setup(): - # Test callback addition for prediction - def on_predict_batch_end(predictor): # results -> List[batch_size] + """Test callback functionality during YOLO prediction.""" + + def on_predict_batch_end(predictor): + """Callback function that handles operations at the end of a prediction batch.""" path, im0s, _, _ = predictor.batch im0s = im0s if isinstance(im0s, list) else [im0s] bs = [predictor.dataset.bs for _ in range(len(path))] - predictor.results = zip(predictor.results, im0s, bs) + predictor.results = zip(predictor.results, im0s, bs) # results is List[batch_size] model = YOLO(MODEL) model.add_callback('on_predict_batch_end', on_predict_batch_end) @@ -259,6 +289,7 @@ def test_predict_callback_and_setup(): def test_results(): + """Test various result formats for the YOLO model.""" for m in 'yolov8n-pose.pt', 'yolov8n-seg.pt', 'yolov8n.pt', 'yolov8n-cls.pt': results = YOLO(WEIGHTS_DIR / m)([SOURCE, SOURCE], imgsz=160) for r in results: @@ -274,7 +305,7 @@ def test_results(): @pytest.mark.skipif(not ONLINE, reason='environment is offline') def test_data_utils(): - # Test functions in ultralytics/data/utils.py + """Test utility functions in ultralytics/data/utils.py.""" from ultralytics.data.utils import HUBDatasetStats, autosplit from ultralytics.utils.downloads import zip_directory @@ -294,7 +325,7 @@ def test_data_utils(): @pytest.mark.skipif(not ONLINE, reason='environment is offline') def test_data_converter(): - # Test dataset converters + """Test dataset converters.""" from ultralytics.data.converter import coco80_to_coco91_class, convert_coco file = 'instances_val2017.json' @@ -304,6 +335,7 @@ def test_data_converter(): def test_data_annotator(): + """Test automatic data annotation.""" from ultralytics.data.annotator import auto_annotate auto_annotate(ASSETS, @@ -313,7 +345,7 @@ def test_data_annotator(): def test_events(): - # Test event sending + """Test event sending functionality.""" from ultralytics.hub.utils import Events events = Events() @@ -324,6 +356,7 @@ def test_events(): def test_cfg_init(): + """Test configuration initialization utilities.""" from ultralytics.cfg import check_dict_alignment, copy_default_cfg, smart_value with contextlib.suppress(SyntaxError): @@ -334,6 +367,7 @@ def test_cfg_init(): def test_utils_init(): + """Test initialization utilities.""" from ultralytics.utils import get_git_branch, get_git_origin_url, get_ubuntu_version, is_github_actions_ci get_ubuntu_version() @@ -343,6 +377,7 @@ def test_utils_init(): def test_utils_checks(): + """Test various utility checks.""" checks.check_yolov5u_filename('yolov5n.pt') checks.git_describe(ROOT) checks.check_requirements() # check requirements.txt @@ -354,12 +389,14 @@ def test_utils_checks(): def test_utils_benchmarks(): + """Test model benchmarking.""" from ultralytics.utils.benchmarks import ProfileModels ProfileModels(['yolov8n.yaml'], imgsz=32, min_time=1, num_timed_runs=3, num_warmup_runs=1).profile() def test_utils_torchutils(): + """Test Torch utility functions.""" from ultralytics.nn.modules.conv import Conv from ultralytics.utils.torch_utils import get_flops_with_torch_profiler, profile, time_sync @@ -373,12 +410,14 @@ def test_utils_torchutils(): @pytest.mark.skipif(not ONLINE, reason='environment is offline') def test_utils_downloads(): + """Test file download utilities.""" from ultralytics.utils.downloads import get_google_drive_file_info get_google_drive_file_info('https://drive.google.com/file/d/1cqT-cJgANNrhIHCrEufUYhQ4RqiWG_lJ/view?usp=drive_link') def test_utils_ops(): + """Test various operations utilities.""" from ultralytics.utils.ops import (ltwh2xywh, ltwh2xyxy, make_divisible, xywh2ltwh, xywh2xyxy, xywhn2xyxy, xywhr2xyxyxyxy, xyxy2ltwh, xyxy2xywh, xyxy2xywhn, xyxyxyxy2xywhr) @@ -396,6 +435,7 @@ def test_utils_ops(): def test_utils_files(): + """Test file handling utilities.""" from ultralytics.utils.files import file_age, file_date, get_latest_run, spaces_in_path file_age(SOURCE) @@ -409,6 +449,7 @@ def test_utils_files(): def test_nn_modules_conv(): + """Test Convolutional Neural Network modules.""" from ultralytics.nn.modules.conv import CBAM, Conv2, ConvTranspose, DWConvTranspose2d, Focus c1, c2 = 8, 16 # input and output channels @@ -427,6 +468,7 @@ def test_nn_modules_conv(): def test_nn_modules_block(): + """Test Neural Network block modules.""" from ultralytics.nn.modules.block import C1, C3TR, BottleneckCSP, C3Ghost, C3x c1, c2 = 8, 16 # input and output channels @@ -442,6 +484,7 @@ def test_nn_modules_block(): @pytest.mark.skipif(not ONLINE, reason='environment is offline') def test_hub(): + """Test Ultralytics HUB functionalities.""" from ultralytics.hub import export_fmts_hub, logout from ultralytics.hub.utils import smart_request @@ -453,6 +496,7 @@ def test_hub(): @pytest.mark.slow @pytest.mark.skipif(not ONLINE, reason='environment is offline') def test_triton(): + """Test NVIDIA Triton Server functionalities.""" checks.check_requirements('tritonclient[all]') import subprocess import time diff --git a/ultralytics/cfg/__init__.py b/ultralytics/cfg/__init__.py index 98edf8048b..65cd75422a 100644 --- a/ultralytics/cfg/__init__.py +++ b/ultralytics/cfg/__init__.py @@ -180,8 +180,8 @@ def _handle_deprecation(custom): def check_dict_alignment(base: Dict, custom: Dict, e=None): """ - This function checks for any mismatched keys between a custom configuration list and a base configuration list. - If any mismatched keys are found, the function prints out similar keys from the base list and exits the program. + This function checks for any mismatched keys between a custom configuration list and a base configuration list. If + any mismatched keys are found, the function prints out similar keys from the base list and exits the program. Args: custom (dict): a dictionary of custom configuration options @@ -205,9 +205,8 @@ def check_dict_alignment(base: Dict, custom: Dict, e=None): def merge_equals_args(args: List[str]) -> List[str]: """ - Merges arguments around isolated '=' args in a list of strings. - The function considers cases where the first argument ends with '=' or the second starts with '=', - as well as when the middle one is an equals sign. + Merges arguments around isolated '=' args in a list of strings. The function considers cases where the first + argument ends with '=' or the second starts with '=', as well as when the middle one is an equals sign. Args: args (List[str]): A list of strings where each element is an argument. diff --git a/ultralytics/data/augment.py b/ultralytics/data/augment.py index 432023c726..7fb3247774 100644 --- a/ultralytics/data/augment.py +++ b/ultralytics/data/augment.py @@ -20,16 +20,30 @@ from .utils import polygons2masks, polygons2masks_overlap # TODO: we might need a BaseTransform to make all these augments be compatible with both classification and semantic class BaseTransform: + """ + Base class for image transformations. + + This is a generic transformation class that can be extended for specific image processing needs. + The class is designed to be compatible with both classification and semantic segmentation tasks. + + Methods: + __init__: Initializes the BaseTransform object. + apply_image: Applies image transformation to labels. + apply_instances: Applies transformations to object instances in labels. + apply_semantic: Applies semantic segmentation to an image. + __call__: Applies all label transformations to an image, instances, and semantic masks. + """ def __init__(self) -> None: + """Initializes the BaseTransform object.""" pass def apply_image(self, labels): - """Applies image transformation to labels.""" + """Applies image transformations to labels.""" pass def apply_instances(self, labels): - """Applies transformations to input 'labels' and returns object instances.""" + """Applies transformations to object instances in labels.""" pass def apply_semantic(self, labels): @@ -37,13 +51,14 @@ class BaseTransform: pass def __call__(self, labels): - """Applies label transformations to an image, instances and semantic masks.""" + """Applies all label transformations to an image, instances, and semantic masks.""" self.apply_image(labels) self.apply_instances(labels) self.apply_semantic(labels) class Compose: + """Class for composing multiple image transformations.""" def __init__(self, transforms): """Initializes the Compose object with a list of transforms.""" @@ -60,18 +75,23 @@ class Compose: self.transforms.append(transform) def tolist(self): - """Converts list of transforms to a standard Python list.""" + """Converts the list of transforms to a standard Python list.""" return self.transforms def __repr__(self): - """Return string representation of object.""" + """Returns a string representation of the object.""" return f"{self.__class__.__name__}({', '.join([f'{t}' for t in self.transforms])})" class BaseMixTransform: - """This implementation is from mmyolo.""" + """ + Class for base mix (MixUp/Mosaic) transformations. + + This implementation is from mmyolo. + """ def __init__(self, dataset, pre_transform=None, p=0.0) -> None: + """Initializes the BaseMixTransform object with dataset, pre_transform, and probability.""" self.dataset = dataset self.pre_transform = pre_transform self.p = p @@ -262,8 +282,10 @@ class Mosaic(BaseMixTransform): class MixUp(BaseMixTransform): + """Class for applying MixUp augmentation to the dataset.""" def __init__(self, dataset, pre_transform=None, p=0.0) -> None: + """Initializes MixUp object with dataset, pre_transform, and probability of applying MixUp.""" super().__init__(dataset=dataset, pre_transform=pre_transform, p=p) def get_indexes(self): @@ -271,7 +293,7 @@ class MixUp(BaseMixTransform): return random.randint(0, len(self.dataset) - 1) def _mix_transform(self, labels): - """Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf.""" + """Applies MixUp augmentation as per https://arxiv.org/pdf/1710.09412.pdf.""" r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0 labels2 = labels['mix_labels'][0] labels['img'] = (labels['img'] * r + labels2['img'] * (1 - r)).astype(np.uint8) @@ -281,6 +303,28 @@ class MixUp(BaseMixTransform): class RandomPerspective: + """ + Implements random perspective and affine transformations on images and corresponding bounding boxes, segments, and + keypoints. These transformations include rotation, translation, scaling, and shearing. The class also offers the + option to apply these transformations conditionally with a specified probability. + + Attributes: + degrees (float): Degree range for random rotations. + translate (float): Fraction of total width and height for random translation. + scale (float): Scaling factor interval, e.g., a scale factor of 0.1 allows a resize between 90%-110%. + shear (float): Shear intensity (angle in degrees). + perspective (float): Perspective distortion factor. + border (tuple): Tuple specifying mosaic border. + pre_transform (callable): A function/transform to apply to the image before starting the random transformation. + + Methods: + affine_transform(img, border): Applies a series of affine transformations to the image. + apply_bboxes(bboxes, M): Transforms bounding boxes using the calculated affine matrix. + apply_segments(segments, M): Transforms segments and generates new bounding boxes. + apply_keypoints(keypoints, M): Transforms keypoints. + __call__(labels): Main method to apply transformations to both images and their corresponding annotations. + box_candidates(box1, box2): Filters out bounding boxes that don't meet certain criteria post-transformation. + """ def __init__(self, degrees=0.0, @@ -290,17 +334,31 @@ class RandomPerspective: perspective=0.0, border=(0, 0), pre_transform=None): + """Initializes RandomPerspective object with transformation parameters.""" + self.degrees = degrees self.translate = translate self.scale = scale self.shear = shear self.perspective = perspective - # Mosaic border - self.border = border + self.border = border # mosaic border self.pre_transform = pre_transform def affine_transform(self, img, border): - """Center.""" + """ + Applies a sequence of affine transformations centered around the image center. + + Args: + img (ndarray): Input image. + border (tuple): Border dimensions. + + Returns: + img (ndarray): Transformed image. + M (ndarray): Transformation matrix. + s (float): Scale factor. + """ + + # Center C = np.eye(3, dtype=np.float32) C[0, 2] = -img.shape[1] / 2 # x translation (pixels) @@ -462,8 +520,22 @@ class RandomPerspective: labels['resized_shape'] = img.shape[:2] return labels - def box_candidates(self, box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16): # box1(4,n), box2(4,n) - # Compute box candidates: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio + def box_candidates(self, box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16): + """ + Compute box candidates based on a set of thresholds. This method compares the characteristics of the boxes + before and after augmentation to decide whether a box is a candidate for further processing. + + Args: + box1 (numpy.ndarray): The 4,n bounding box before augmentation, represented as [x1, y1, x2, y2]. + box2 (numpy.ndarray): The 4,n bounding box after augmentation, represented as [x1, y1, x2, y2]. + wh_thr (float, optional): The width and height threshold in pixels. Default is 2. + ar_thr (float, optional): The aspect ratio threshold. Default is 100. + area_thr (float, optional): The area ratio threshold. Default is 0.1. + eps (float, optional): A small epsilon value to prevent division by zero. Default is 1e-16. + + Returns: + (numpy.ndarray): A boolean array indicating which boxes are candidates based on the given thresholds. + """ w1, h1 = box1[2] - box1[0], box1[3] - box1[1] w2, h2 = box2[2] - box2[0], box2[3] - box2[1] ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps)) # aspect ratio @@ -471,14 +543,32 @@ class RandomPerspective: class RandomHSV: + """ + This class is responsible for performing random adjustments to the Hue, Saturation, and Value (HSV) channels of an + image. + + The adjustments are random but within limits set by hgain, sgain, and vgain. + """ def __init__(self, hgain=0.5, sgain=0.5, vgain=0.5) -> None: + """ + Initialize RandomHSV class with gains for each HSV channel. + + Args: + hgain (float, optional): Maximum variation for hue. Default is 0.5. + sgain (float, optional): Maximum variation for saturation. Default is 0.5. + vgain (float, optional): Maximum variation for value. Default is 0.5. + """ self.hgain = hgain self.sgain = sgain self.vgain = vgain def __call__(self, labels): - """Applies image HSV augmentation""" + """ + Applies random HSV augmentation to an image within the predefined limits. + + The modified image replaces the original image in the input 'labels' dict. + """ img = labels['img'] if self.hgain or self.sgain or self.vgain: r = np.random.uniform(-1, 1, 3) * [self.hgain, self.sgain, self.vgain] + 1 # random gains @@ -496,9 +586,22 @@ class RandomHSV: class RandomFlip: - """Applies random horizontal or vertical flip to an image with a given probability.""" + """ + Applies a random horizontal or vertical flip to an image with a given probability. + + Also updates any instances (bounding boxes, keypoints, etc.) accordingly. + """ def __init__(self, p=0.5, direction='horizontal', flip_idx=None) -> None: + """ + Initializes the RandomFlip class with probability and direction. + + Args: + p (float, optional): The probability of applying the flip. Must be between 0 and 1. Default is 0.5. + direction (str, optional): The direction to apply the flip. Must be 'horizontal' or 'vertical'. + Default is 'horizontal'. + flip_idx (array-like, optional): Index mapping for flipping keypoints, if any. + """ assert direction in ['horizontal', 'vertical'], f'Support direction `horizontal` or `vertical`, got {direction}' assert 0 <= p <= 1.0 @@ -507,7 +610,16 @@ class RandomFlip: self.flip_idx = flip_idx def __call__(self, labels): - """Resize image and padding for detection, instance segmentation, pose.""" + """ + Applies random flip to an image and updates any instances like bounding boxes or keypoints accordingly. + + Args: + labels (dict): A dictionary containing the keys 'img' and 'instances'. 'img' is the image to be flipped. + 'instances' is an object containing bounding boxes and optionally keypoints. + + Returns: + (dict): The same dict with the flipped image and updated instances under the 'img' and 'instances' keys. + """ img = labels['img'] instances = labels.pop('instances') instances.convert_bbox(format='xywh') @@ -599,12 +711,38 @@ class LetterBox: class CopyPaste: + """ + Implements the Copy-Paste augmentation as described in the paper https://arxiv.org/abs/2012.07177. This class is + responsible for applying the Copy-Paste augmentation on images and their corresponding instances. + """ def __init__(self, p=0.5) -> None: + """ + Initializes the CopyPaste class with a given probability. + + Args: + p (float, optional): The probability of applying the Copy-Paste augmentation. Must be between 0 and 1. + Default is 0.5. + """ self.p = p def __call__(self, labels): - """Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy).""" + """ + Applies the Copy-Paste augmentation to the given image and instances. + + Args: + labels (dict): A dictionary containing: + - 'img': The image to augment. + - 'cls': Class labels associated with the instances. + - 'instances': Object containing bounding boxes, and optionally, keypoints and segments. + + Returns: + (dict): Dict with augmented image and updated instances under the 'img', 'cls', and 'instances' keys. + + Notes: + 1. Instances are expected to have 'segments' as one of their attributes for this augmentation to work. + 2. This method modifies the input dictionary 'labels' in place. + """ im = labels['img'] cls = labels['cls'] h, w = im.shape[:2] @@ -639,9 +777,13 @@ class CopyPaste: class Albumentations: - """Albumentations transformations. Optional, uninstall package to disable. - Applies Blur, Median Blur, convert to grayscale, Contrast Limited Adaptive Histogram Equalization, - random change of brightness and contrast, RandomGamma and lowering of image quality by compression.""" + """ + Albumentations transformations. + + Optional, uninstall package to disable. Applies Blur, Median Blur, convert to grayscale, Contrast Limited Adaptive + Histogram Equalization, random change of brightness and contrast, RandomGamma and lowering of image quality by + compression. + """ def __init__(self, p=1.0): """Initialize the transform object for YOLO bbox formatted params.""" @@ -690,6 +832,19 @@ class Albumentations: # TODO: technically this is not an augmentation, maybe we should put this to another files class Format: + """ + Formats image annotations for object detection, instance segmentation, and pose estimation tasks. The class + standardizes the image and instance annotations to be used by the `collate_fn` in PyTorch DataLoader. + + Attributes: + bbox_format (str): Format for bounding boxes. Default is 'xywh'. + normalize (bool): Whether to normalize bounding boxes. Default is True. + return_mask (bool): Return instance masks for segmentation. Default is False. + return_keypoint (bool): Return keypoints for pose estimation. Default is False. + mask_ratio (int): Downsample ratio for masks. Default is 4. + mask_overlap (bool): Whether to overlap masks. Default is True. + batch_idx (bool): Keep batch indexes. Default is True. + """ def __init__(self, bbox_format='xywh', @@ -699,6 +854,7 @@ class Format: mask_ratio=4, mask_overlap=True, batch_idx=True): + """Initializes the Format class with given parameters.""" self.bbox_format = bbox_format self.normalize = normalize self.return_mask = return_mask # set False when training detection only @@ -746,7 +902,7 @@ class Format: return img def _format_segments(self, instances, cls, w, h): - """convert polygon points to bitmap.""" + """Convert polygon points to bitmap.""" segments = instances.segments if self.mask_overlap: masks, sorted_idx = polygons2masks_overlap((h, w), segments, downsample_ratio=self.mask_ratio) @@ -851,35 +1007,75 @@ def classify_albumentations( class ClassifyLetterBox: - """YOLOv8 LetterBox class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()])""" + """ + YOLOv8 LetterBox class for image preprocessing, designed to be part of a transformation pipeline, e.g., + T.Compose([LetterBox(size), ToTensor()]). + + Attributes: + h (int): Target height of the image. + w (int): Target width of the image. + auto (bool): If True, automatically solves for short side using stride. + stride (int): The stride value, used when 'auto' is True. + """ def __init__(self, size=(640, 640), auto=False, stride=32): - """Resizes image and crops it to center with max dimensions 'h' and 'w'.""" + """ + Initializes the ClassifyLetterBox class with a target size, auto-flag, and stride. + + Args: + size (Union[int, Tuple[int, int]]): The target dimensions (height, width) for the letterbox. + auto (bool): If True, automatically calculates the short side based on stride. + stride (int): The stride value, used when 'auto' is True. + """ super().__init__() self.h, self.w = (size, size) if isinstance(size, int) else size self.auto = auto # pass max size integer, automatically solve for short side using stride self.stride = stride # used with auto - def __call__(self, im): # im = np.array HWC + def __call__(self, im): + """ + Resizes the image and pads it with a letterbox method. + + Args: + im (numpy.ndarray): The input image as a numpy array of shape HWC. + + Returns: + (numpy.ndarray): The letterboxed and resized image as a numpy array. + """ imh, imw = im.shape[:2] - r = min(self.h / imh, self.w / imw) # ratio of new/old - h, w = round(imh * r), round(imw * r) # resized image + r = min(self.h / imh, self.w / imw) # ratio of new/old dimensions + h, w = round(imh * r), round(imw * r) # resized image dimensions + + # Calculate padding dimensions hs, ws = (math.ceil(x / self.stride) * self.stride for x in (h, w)) if self.auto else (self.h, self.w) top, left = round((hs - h) / 2 - 0.1), round((ws - w) / 2 - 0.1) + + # Create padded image im_out = np.full((hs, ws, 3), 114, dtype=im.dtype) im_out[top:top + h, left:left + w] = cv2.resize(im, (w, h), interpolation=cv2.INTER_LINEAR) return im_out class CenterCrop: - """YOLOv8 CenterCrop class for image preprocessing, i.e. T.Compose([CenterCrop(size), ToTensor()])""" + """YOLOv8 CenterCrop class for image preprocessing, designed to be part of a transformation pipeline, e.g., + T.Compose([CenterCrop(size), ToTensor()]). + """ def __init__(self, size=640): """Converts an image from numpy array to PyTorch tensor.""" super().__init__() self.h, self.w = (size, size) if isinstance(size, int) else size - def __call__(self, im): # im = np.array HWC + def __call__(self, im): + """ + Resizes and crops the center of the image using a letterbox method. + + Args: + im (numpy.ndarray): The input image as a numpy array of shape HWC. + + Returns: + (numpy.ndarray): The center-cropped and resized image as a numpy array. + """ imh, imw = im.shape[:2] m = min(imh, imw) # min dimension top, left = (imh - m) // 2, (imw - m) // 2 @@ -887,14 +1083,23 @@ class CenterCrop: class ToTensor: - """YOLOv8 ToTensor class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()]).""" + """YOLOv8 ToTensor class for image preprocessing, i.e., T.Compose([LetterBox(size), ToTensor()]).""" def __init__(self, half=False): """Initialize YOLOv8 ToTensor object with optional half-precision support.""" super().__init__() self.half = half - def __call__(self, im): # im = np.array HWC in BGR order + def __call__(self, im): + """ + Transforms an image from a numpy array to a PyTorch tensor, applying optional half-precision and normalization. + + Args: + im (numpy.ndarray): Input image as a numpy array with shape (H, W, C) in BGR order. + + Returns: + (torch.Tensor): The transformed image as a PyTorch tensor in float32 or float16, normalized to [0, 1]. + """ im = np.ascontiguousarray(im.transpose((2, 0, 1))[::-1]) # HWC to CHW -> BGR to RGB -> contiguous im = torch.from_numpy(im) # to torch im = im.half() if self.half else im.float() # uint8 to fp16/32 diff --git a/ultralytics/data/base.py b/ultralytics/data/base.py index 429533dc30..462280a6e3 100644 --- a/ultralytics/data/base.py +++ b/ultralytics/data/base.py @@ -62,6 +62,7 @@ class BaseDataset(Dataset): classes=None, fraction=1.0): super().__init__() + """Initialize BaseDataset with given configuration and options.""" self.img_path = img_path self.imgsz = imgsz self.augment = augment @@ -256,7 +257,7 @@ class BaseDataset(Dataset): return len(self.labels) def update_labels_info(self, label): - """custom your label format here.""" + """Custom your label format here.""" return label def build_transforms(self, hyp=None): diff --git a/ultralytics/data/build.py b/ultralytics/data/build.py index 33b5edadd4..07de91c831 100644 --- a/ultralytics/data/build.py +++ b/ultralytics/data/build.py @@ -20,7 +20,11 @@ from .utils import PIN_MEMORY class InfiniteDataLoader(dataloader.DataLoader): - """Dataloader that reuses workers. Uses same syntax as vanilla DataLoader.""" + """ + Dataloader that reuses workers. + + Uses same syntax as vanilla DataLoader. + """ def __init__(self, *args, **kwargs): """Dataloader that infinitely recycles workers, inherits from DataLoader.""" @@ -38,7 +42,9 @@ class InfiniteDataLoader(dataloader.DataLoader): yield next(self.iterator) def reset(self): - """Reset iterator. + """ + Reset iterator. + This is useful when we want to modify settings of dataset while training. """ self.iterator = self._get_iterator() @@ -70,7 +76,7 @@ def seed_worker(worker_id): # noqa def build_yolo_dataset(cfg, img_path, batch, data, mode='train', rect=False, stride=32): - """Build YOLO Dataset""" + """Build YOLO Dataset.""" return YOLODataset( img_path=img_path, imgsz=cfg.imgsz, diff --git a/ultralytics/data/converter.py b/ultralytics/data/converter.py index fecc30ce99..4451df170d 100644 --- a/ultralytics/data/converter.py +++ b/ultralytics/data/converter.py @@ -12,7 +12,8 @@ from ultralytics.utils import TQDM def coco91_to_coco80_class(): - """Converts 91-index COCO class IDs to 80-index COCO class IDs. + """ + Converts 91-index COCO class IDs to 80-index COCO class IDs. Returns: (list): A list of 91 class IDs where the index represents the 80-index class ID and the value is the @@ -51,7 +52,8 @@ def convert_coco(labels_dir='../coco/annotations/', use_segments=False, use_keypoints=False, cls91to80=True): - """Converts COCO dataset annotations to a format suitable for training YOLOv5 models. + """ + Converts COCO dataset annotations to a format suitable for training YOLOv5 models. Args: labels_dir (str, optional): Path to directory containing COCO dataset annotation files. @@ -203,6 +205,7 @@ def convert_dota_to_yolo_obb(dota_root_path: str): 'helipad': 17} def convert_label(image_name, image_width, image_height, orig_label_dir, save_dir): + """Converts a single image's DOTA annotation to YOLO OBB format and saves it to a specified directory.""" orig_label_path = orig_label_dir / f'{image_name}.txt' save_path = save_dir / f'{image_name}.txt' diff --git a/ultralytics/data/dataset.py b/ultralytics/data/dataset.py index 217d3babc3..f0f431eab2 100644 --- a/ultralytics/data/dataset.py +++ b/ultralytics/data/dataset.py @@ -33,6 +33,7 @@ class YOLODataset(BaseDataset): """ def __init__(self, *args, data=None, use_segments=False, use_keypoints=False, **kwargs): + """Initializes the YOLODataset with optional configurations for segments and keypoints.""" self.use_segments = use_segments self.use_keypoints = use_keypoints self.data = data @@ -40,7 +41,9 @@ class YOLODataset(BaseDataset): super().__init__(*args, **kwargs) def cache_labels(self, path=Path('./labels.cache')): - """Cache dataset labels, check images and read shapes. + """ + Cache dataset labels, check images and read shapes. + Args: path (Path): path where to save the cache file (default: Path('./labels.cache')). Returns: @@ -157,7 +160,7 @@ class YOLODataset(BaseDataset): self.transforms = self.build_transforms(hyp) def update_labels_info(self, label): - """custom your label format here.""" + """Custom your label format here.""" # NOTE: cls is not with bboxes now, classification and semantic segmentation need an independent cls label # we can make it also support classification and semantic segmentation by add or remove some dict keys there. bboxes = label.pop('bboxes') @@ -254,6 +257,7 @@ class ClassificationDataset(torchvision.datasets.ImageFolder): return {'img': sample, 'cls': j} def __len__(self) -> int: + """Return the total number of samples in the dataset.""" return len(self.samples) def verify_images(self): @@ -320,6 +324,16 @@ def save_dataset_cache_file(prefix, path, x): # TODO: support semantic segmentation class SemanticDataset(BaseDataset): + """ + Semantic Segmentation Dataset. + + This class is responsible for handling datasets used for semantic segmentation tasks. It inherits functionalities + from the BaseDataset class. + + Note: + This class is currently a placeholder and needs to be populated with methods and attributes for supporting + semantic segmentation tasks. + """ def __init__(self): """Initialize a SemanticDataset object.""" diff --git a/ultralytics/data/loaders.py b/ultralytics/data/loaders.py index 88491c7942..0541f712a6 100644 --- a/ultralytics/data/loaders.py +++ b/ultralytics/data/loaders.py @@ -22,6 +22,7 @@ from ultralytics.utils.checks import check_requirements @dataclass class SourceTypes: + """Class to represent various types of input sources for predictions.""" webcam: bool = False screenshot: bool = False from_img: bool = False @@ -29,7 +30,34 @@ class SourceTypes: class LoadStreams: - """Stream Loader, i.e. `yolo predict source='rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP, TCP streams`.""" + """ + Stream Loader for various types of video streams. + + Suitable for use with `yolo predict source='rtsp://example.com/media.mp4'`, supports RTSP, RTMP, HTTP, and TCP streams. + + Attributes: + sources (str): The source input paths or URLs for the video streams. + imgsz (int): The image size for processing, defaults to 640. + vid_stride (int): Video frame-rate stride, defaults to 1. + buffer (bool): Whether to buffer input streams, defaults to False. + running (bool): Flag to indicate if the streaming thread is running. + mode (str): Set to 'stream' indicating real-time capture. + imgs (list): List of image frames for each stream. + fps (list): List of FPS for each stream. + frames (list): List of total frames for each stream. + threads (list): List of threads for each stream. + shape (list): List of shapes for each stream. + caps (list): List of cv2.VideoCapture objects for each stream. + bs (int): Batch size for processing. + + Methods: + __init__: Initialize the stream loader. + update: Read stream frames in daemon thread. + close: Close stream loader and release resources. + __iter__: Returns an iterator object for the class. + __next__: Returns source paths, transformed, and original images for processing. + __len__: Return the length of the sources object. + """ def __init__(self, sources='file.streams', imgsz=640, vid_stride=1, buffer=False): """Initialize instance variables and check for consistent input stream shapes.""" @@ -149,10 +177,33 @@ class LoadStreams: class LoadScreenshots: - """YOLOv8 screenshot dataloader, i.e. `yolo predict source=screen`.""" + """ + YOLOv8 screenshot dataloader. + + This class manages the loading of screenshot images for processing with YOLOv8. + Suitable for use with `yolo predict source=screen`. + + Attributes: + source (str): The source input indicating which screen to capture. + imgsz (int): The image size for processing, defaults to 640. + screen (int): The screen number to capture. + left (int): The left coordinate for screen capture area. + top (int): The top coordinate for screen capture area. + width (int): The width of the screen capture area. + height (int): The height of the screen capture area. + mode (str): Set to 'stream' indicating real-time capture. + frame (int): Counter for captured frames. + sct (mss.mss): Screen capture object from `mss` library. + bs (int): Batch size, set to 1. + monitor (dict): Monitor configuration details. + + Methods: + __iter__: Returns an iterator object. + __next__: Captures the next screenshot and returns it. + """ def __init__(self, source, imgsz=640): - """source = [screen_number left top width height] (pixels).""" + """Source = [screen_number left top width height] (pixels).""" check_requirements('mss') import mss # noqa @@ -192,7 +243,28 @@ class LoadScreenshots: class LoadImages: - """YOLOv8 image/video dataloader, i.e. `yolo predict source=image.jpg/vid.mp4`.""" + """ + YOLOv8 image/video dataloader. + + This class manages the loading and pre-processing of image and video data for YOLOv8. It supports loading from + various formats, including single image files, video files, and lists of image and video paths. + + Attributes: + imgsz (int): Image size, defaults to 640. + files (list): List of image and video file paths. + nf (int): Total number of files (images and videos). + video_flag (list): Flags indicating whether a file is a video (True) or an image (False). + mode (str): Current mode, 'image' or 'video'. + vid_stride (int): Stride for video frame-rate, defaults to 1. + bs (int): Batch size, set to 1 for this class. + cap (cv2.VideoCapture): Video capture object for OpenCV. + frame (int): Frame counter for video. + frames (int): Total number of frames in the video. + count (int): Counter for iteration, initialized at 0 during `__iter__()`. + + Methods: + _new_video(path): Create a new cv2.VideoCapture object for a given video path. + """ def __init__(self, path, imgsz=640, vid_stride=1): """Initialize the Dataloader and raise FileNotFoundError if file not found.""" @@ -285,6 +357,24 @@ class LoadImages: class LoadPilAndNumpy: + """ + Load images from PIL and Numpy arrays for batch processing. + + This class is designed to manage loading and pre-processing of image data from both PIL and Numpy formats. + It performs basic validation and format conversion to ensure that the images are in the required format for + downstream processing. + + Attributes: + paths (list): List of image paths or autogenerated filenames. + im0 (list): List of images stored as Numpy arrays. + imgsz (int): Image size, defaults to 640. + mode (str): Type of data being processed, defaults to 'image'. + bs (int): Batch size, equivalent to the length of `im0`. + count (int): Counter for iteration, initialized at 0 during `__iter__()`. + + Methods: + _single_check(im): Validate and format a single image to a Numpy array. + """ def __init__(self, im0, imgsz=640): """Initialize PIL and Numpy Dataloader.""" @@ -326,8 +416,24 @@ class LoadPilAndNumpy: class LoadTensor: + """ + Load images from torch.Tensor data. + + This class manages the loading and pre-processing of image data from PyTorch tensors for further processing. + + Attributes: + im0 (torch.Tensor): The input tensor containing the image(s). + bs (int): Batch size, inferred from the shape of `im0`. + mode (str): Current mode, set to 'image'. + paths (list): List of image paths or filenames. + count (int): Counter for iteration, initialized at 0 during `__iter__()`. + + Methods: + _single_check(im, stride): Validate and possibly modify the input tensor. + """ def __init__(self, im0) -> None: + """Initialize Tensor Dataloader.""" self.im0 = self._single_check(im0) self.bs = self.im0.shape[0] self.mode = 'image' @@ -370,9 +476,7 @@ class LoadTensor: def autocast_list(source): - """ - Merges a list of source of different types into a list of numpy arrays or PIL images - """ + """Merges a list of source of different types into a list of numpy arrays or PIL images.""" files = [] for im in source: if isinstance(im, (str, Path)): # filename or uri diff --git a/ultralytics/data/utils.py b/ultralytics/data/utils.py index 00ddc6e571..c5c2d99452 100644 --- a/ultralytics/data/utils.py +++ b/ultralytics/data/utils.py @@ -547,9 +547,9 @@ class HUBDatasetStats: def compress_one_image(f, f_new=None, max_dim=1920, quality=50): """ - Compresses a single image file to reduced size while preserving its aspect ratio and quality using either the - Python Imaging Library (PIL) or OpenCV library. If the input image is smaller than the maximum dimension, it will - not be resized. + Compresses a single image file to reduced size while preserving its aspect ratio and quality using either the Python + Imaging Library (PIL) or OpenCV library. If the input image is smaller than the maximum dimension, it will not be + resized. Args: f (str): The path to the input image file. diff --git a/ultralytics/engine/exporter.py b/ultralytics/engine/exporter.py index b209b9d71f..66932a658f 100644 --- a/ultralytics/engine/exporter.py +++ b/ultralytics/engine/exporter.py @@ -986,9 +986,7 @@ class Exporter: return model def add_callback(self, event: str, callback): - """ - Appends the given callback. - """ + """Appends the given callback.""" self.callbacks[event].append(callback) def run_callbacks(self, event: str): diff --git a/ultralytics/engine/model.py b/ultralytics/engine/model.py index 69de12b2a8..806b0346bb 100644 --- a/ultralytics/engine/model.py +++ b/ultralytics/engine/model.py @@ -159,9 +159,7 @@ class Model(nn.Module): self.overrides['task'] = self.task def _check_is_pytorch_model(self): - """ - Raises TypeError is model is not a PyTorch model - """ + """Raises TypeError is model is not a PyTorch model.""" pt_str = isinstance(self.model, (str, Path)) and Path(self.model).suffix == '.pt' pt_module = isinstance(self.model, nn.Module) if not (pt_module or pt_str): @@ -173,9 +171,7 @@ class Model(nn.Module): f"argument directly in your inference command, i.e. 'model.predict(source=..., device=0)'") def reset_weights(self): - """ - Resets the model modules parameters to randomly initialized values, losing all training information. - """ + """Resets the model modules parameters to randomly initialized values, losing all training information.""" self._check_is_pytorch_model() for m in self.model.modules(): if hasattr(m, 'reset_parameters'): @@ -185,9 +181,7 @@ class Model(nn.Module): return self def load(self, weights='yolov8n.pt'): - """ - Transfers parameters with matching names and shapes from 'weights' to model. - """ + """Transfers parameters with matching names and shapes from 'weights' to model.""" self._check_is_pytorch_model() if isinstance(weights, (str, Path)): weights, self.ckpt = attempt_load_one_weight(weights) diff --git a/ultralytics/engine/predictor.py b/ultralytics/engine/predictor.py index 3eb851b3ad..367efc694d 100644 --- a/ultralytics/engine/predictor.py +++ b/ultralytics/engine/predictor.py @@ -58,7 +58,7 @@ Example: class BasePredictor: """ - BasePredictor + BasePredictor. A base class for creating predictors. @@ -109,7 +109,8 @@ class BasePredictor: callbacks.add_integration_callbacks(self) def preprocess(self, im): - """Prepares input image before inference. + """ + Prepares input image before inference. Args: im (torch.Tensor | List(np.ndarray)): BCHW for tensor, [(HWC) x B] for list. @@ -128,6 +129,7 @@ class BasePredictor: return im def inference(self, im, *args, **kwargs): + """Runs inference on a given image using the specified model and arguments.""" visualize = increment_path(self.save_dir / Path(self.batch[0][0]).stem, mkdir=True) if self.args.visualize and (not self.source_type.tensor) else False return self.model(im, augment=self.args.augment, visualize=visualize) @@ -194,7 +196,11 @@ class BasePredictor: return list(self.stream_inference(source, model, *args, **kwargs)) # merge list of Result into one def predict_cli(self, source=None, model=None): - """Method used for CLI prediction. It uses always generator as outputs as not required by CLI mode.""" + """ + Method used for CLI prediction. + + It uses always generator as outputs as not required by CLI mode. + """ gen = self.stream_inference(source, model) for _ in gen: # running CLI inference without accumulating any outputs (do not modify) pass @@ -352,7 +358,5 @@ class BasePredictor: callback(self) def add_callback(self, event: str, func): - """ - Add callback - """ + """Add callback.""" self.callbacks[event].append(func) diff --git a/ultralytics/engine/results.py b/ultralytics/engine/results.py index 0fc6a0dd3e..fcbd762c1a 100644 --- a/ultralytics/engine/results.py +++ b/ultralytics/engine/results.py @@ -1,6 +1,6 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license """ -Ultralytics Results, Boxes and Masks classes for handling inference results +Ultralytics Results, Boxes and Masks classes for handling inference results. Usage: See https://docs.ultralytics.com/modes/predict/ """ @@ -19,12 +19,11 @@ from ultralytics.utils.torch_utils import smart_inference_mode class BaseTensor(SimpleClass): - """ - Base tensor class with additional methods for easy manipulation and device handling. - """ + """Base tensor class with additional methods for easy manipulation and device handling.""" def __init__(self, data, orig_shape) -> None: - """Initialize BaseTensor with data and original shape. + """ + Initialize BaseTensor with data and original shape. Args: data (torch.Tensor | np.ndarray): Predictions, such as bboxes, masks and keypoints. @@ -126,6 +125,18 @@ class Results(SimpleClass): self.probs = probs def _apply(self, fn, *args, **kwargs): + """ + Applies a function to all non-empty attributes and returns a new Results object with modified attributes. This + function is internally called by methods like .to(), .cuda(), .cpu(), etc. + + Args: + fn (str): The name of the function to apply. + *args: Variable length argument list to pass to the function. + **kwargs: Arbitrary keyword arguments to pass to the function. + + Returns: + Results: A new Results object with attributes modified by the applied function. + """ r = self.new() for k in self._keys: v = getattr(self, k) @@ -250,9 +261,7 @@ class Results(SimpleClass): return annotator.result() def verbose(self): - """ - Return log string for each task. - """ + """Return log string for each task.""" log_string = '' probs = self.probs boxes = self.boxes @@ -537,6 +546,7 @@ class Probs(BaseTensor): """ def __init__(self, probs, orig_shape=None) -> None: + """Initialize the Probs class with classification probabilities and optional original shape of the image.""" super().__init__(probs, orig_shape) @property diff --git a/ultralytics/engine/trainer.py b/ultralytics/engine/trainer.py index 38df267394..6905847a45 100644 --- a/ultralytics/engine/trainer.py +++ b/ultralytics/engine/trainer.py @@ -1,6 +1,6 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license """ -Train a model on a dataset +Train a model on a dataset. Usage: $ yolo mode=train model=yolov8n.pt data=coco128.yaml imgsz=640 epochs=100 batch=16 @@ -37,7 +37,7 @@ from ultralytics.utils.torch_utils import (EarlyStopping, ModelEMA, de_parallel, class BaseTrainer: """ - BaseTrainer + BaseTrainer. A base class for creating trainers. @@ -143,15 +143,11 @@ class BaseTrainer: callbacks.add_integration_callbacks(self) def add_callback(self, event: str, callback): - """ - Appends the given callback. - """ + """Appends the given callback.""" self.callbacks[event].append(callback) def set_callback(self, event: str, callback): - """ - Overrides the existing callbacks with the given callback. - """ + """Overrides the existing callbacks with the given callback.""" self.callbacks[event] = [callback] def run_callbacks(self, event: str): @@ -207,9 +203,7 @@ class BaseTrainer: world_size=world_size) def _setup_train(self, world_size): - """ - Builds dataloaders and optimizer on correct rank process. - """ + """Builds dataloaders and optimizer on correct rank process.""" # Model self.run_callbacks('on_pretrain_routine_start') @@ -450,14 +444,14 @@ class BaseTrainer: @staticmethod def get_dataset(data): """ - Get train, val path from data dict if it exists. Returns None if data format is not recognized. + Get train, val path from data dict if it exists. + + Returns None if data format is not recognized. """ return data['train'], data.get('val') or data.get('test') def setup_model(self): - """ - load/create/download model for any task. - """ + """Load/create/download model for any task.""" if isinstance(self.model, torch.nn.Module): # if model is loaded beforehand. No setup needed return @@ -482,14 +476,14 @@ class BaseTrainer: self.ema.update(self.model) def preprocess_batch(self, batch): - """ - Allows custom preprocessing model inputs and ground truths depending on task type. - """ + """Allows custom preprocessing model inputs and ground truths depending on task type.""" return batch def validate(self): """ - Runs validation on test set using self.validator. The returned dict is expected to contain "fitness" key. + Runs validation on test set using self.validator. + + The returned dict is expected to contain "fitness" key. """ metrics = self.validator(self) fitness = metrics.pop('fitness', -self.loss.detach().cpu().numpy()) # use loss as fitness measure if not found @@ -506,26 +500,20 @@ class BaseTrainer: raise NotImplementedError('get_validator function not implemented in trainer') def get_dataloader(self, dataset_path, batch_size=16, rank=0, mode='train'): - """ - Returns dataloader derived from torch.data.Dataloader. - """ + """Returns dataloader derived from torch.data.Dataloader.""" raise NotImplementedError('get_dataloader function not implemented in trainer') def build_dataset(self, img_path, mode='train', batch=None): - """Build dataset""" + """Build dataset.""" raise NotImplementedError('build_dataset function not implemented in trainer') def label_loss_items(self, loss_items=None, prefix='train'): - """ - Returns a loss dict with labelled training loss items tensor - """ + """Returns a loss dict with labelled training loss items tensor.""" # Not needed for classification but necessary for segmentation & detection return {'loss': loss_items} if loss_items is not None else ['loss'] def set_model_attributes(self): - """ - To set or update model parameters before training. - """ + """To set or update model parameters before training.""" self.model.names = self.data['names'] def build_targets(self, preds, targets): @@ -632,8 +620,8 @@ class BaseTrainer: def build_optimizer(self, model, name='auto', lr=0.001, momentum=0.9, decay=1e-5, iterations=1e5): """ - Constructs an optimizer for the given model, based on the specified optimizer name, learning rate, - momentum, weight decay, and number of iterations. + Constructs an optimizer for the given model, based on the specified optimizer name, learning rate, momentum, + weight decay, and number of iterations. Args: model (torch.nn.Module): The model for which to build an optimizer. diff --git a/ultralytics/engine/tuner.py b/ultralytics/engine/tuner.py index d60a56c22f..6d6e0a8686 100644 --- a/ultralytics/engine/tuner.py +++ b/ultralytics/engine/tuner.py @@ -31,32 +31,32 @@ from ultralytics.utils.plotting import plot_tune_results class Tuner: """ - Class responsible for hyperparameter tuning of YOLO models. + Class responsible for hyperparameter tuning of YOLO models. - The class evolves YOLO model hyperparameters over a given number of iterations - by mutating them according to the search space and retraining the model to evaluate their performance. + The class evolves YOLO model hyperparameters over a given number of iterations + by mutating them according to the search space and retraining the model to evaluate their performance. - Attributes: - space (dict): Hyperparameter search space containing bounds and scaling factors for mutation. - tune_dir (Path): Directory where evolution logs and results will be saved. - tune_csv (Path): Path to the CSV file where evolution logs are saved. + Attributes: + space (dict): Hyperparameter search space containing bounds and scaling factors for mutation. + tune_dir (Path): Directory where evolution logs and results will be saved. + tune_csv (Path): Path to the CSV file where evolution logs are saved. - Methods: - _mutate(hyp: dict) -> dict: - Mutates the given hyperparameters within the bounds specified in `self.space`. + Methods: + _mutate(hyp: dict) -> dict: + Mutates the given hyperparameters within the bounds specified in `self.space`. - __call__(): - Executes the hyperparameter evolution across multiple iterations. + __call__(): + Executes the hyperparameter evolution across multiple iterations. - Example: - Tune hyperparameters for YOLOv8n on COCO8 at imgsz=640 and epochs=30 for 300 tuning iterations. - ```python - from ultralytics import YOLO + Example: + Tune hyperparameters for YOLOv8n on COCO8 at imgsz=640 and epochs=30 for 300 tuning iterations. + ```python + from ultralytics import YOLO - model = YOLO('yolov8n.pt') - model.tune(data='coco8.yaml', epochs=10, iterations=300, optimizer='AdamW', plots=False, save=False, val=False) - ``` - """ + model = YOLO('yolov8n.pt') + model.tune(data='coco8.yaml', epochs=10, iterations=300, optimizer='AdamW', plots=False, save=False, val=False) + ``` + """ def __init__(self, args=DEFAULT_CFG, _callbacks=None): """ diff --git a/ultralytics/engine/validator.py b/ultralytics/engine/validator.py index 8d8349bda5..5ad695468c 100644 --- a/ultralytics/engine/validator.py +++ b/ultralytics/engine/validator.py @@ -36,7 +36,7 @@ from ultralytics.utils.torch_utils import de_parallel, select_device, smart_infe class BaseValidator: """ - BaseValidator + BaseValidator. A base class for creating validators. @@ -102,8 +102,7 @@ class BaseValidator: @smart_inference_mode() def __call__(self, trainer=None, model=None): - """ - Supports validation of a pre-trained model if passed or a model being trained if trainer is passed (trainer + """Supports validation of a pre-trained model if passed or a model being trained if trainer is passed (trainer gets priority). """ self.training = trainer is not None @@ -260,7 +259,7 @@ class BaseValidator: raise NotImplementedError('get_dataloader function not implemented for this validator') def build_dataset(self, img_path): - """Build dataset""" + """Build dataset.""" raise NotImplementedError('build_dataset function not implemented in validator') def preprocess(self, batch): diff --git a/ultralytics/hub/__init__.py b/ultralytics/hub/__init__.py index daed439c22..8e101d6b83 100644 --- a/ultralytics/hub/__init__.py +++ b/ultralytics/hub/__init__.py @@ -80,8 +80,8 @@ def get_export(model_id='', format='torchscript'): def check_dataset(path='', task='detect'): """ - Function for error-checking HUB dataset Zip file before upload. It checks a dataset for errors before it is - uploaded to the HUB. Usage examples are given below. + Function for error-checking HUB dataset Zip file before upload. It checks a dataset for errors before it is uploaded + to the HUB. Usage examples are given below. Args: path (str, optional): Path to data.zip (with data.yaml inside data.zip). Defaults to ''. diff --git a/ultralytics/hub/auth.py b/ultralytics/hub/auth.py index 9963d79c09..deea9a3219 100644 --- a/ultralytics/hub/auth.py +++ b/ultralytics/hub/auth.py @@ -9,6 +9,19 @@ API_KEY_URL = f'{HUB_WEB_ROOT}/settings?tab=api+keys' class Auth: + """ + Manages authentication processes including API key handling, cookie-based authentication, and header generation. + + The class supports different methods of authentication: + 1. Directly using an API key. + 2. Authenticating using browser cookies (specifically in Google Colab). + 3. Prompting the user to enter an API key. + + Attributes: + id_token (str or bool): Token used for identity verification, initialized as False. + api_key (str or bool): API key for authentication, initialized as False. + model_key (bool): Placeholder for model key, initialized as False. + """ id_token = api_key = model_key = False def __init__(self, api_key='', verbose=False): @@ -54,7 +67,9 @@ class Auth: def request_api_key(self, max_attempts=3): """ - Prompt the user to input their API key. Returns the model ID. + Prompt the user to input their API key. + + Returns the model ID. """ import getpass for attempts in range(max_attempts): @@ -86,8 +101,8 @@ class Auth: def auth_with_cookies(self) -> bool: """ - Attempt to fetch authentication via cookies and set id_token. - User must be logged in to HUB and running in a supported browser. + Attempt to fetch authentication via cookies and set id_token. User must be logged in to HUB and running in a + supported browser. Returns: bool: True if authentication is successful, False otherwise. diff --git a/ultralytics/hub/session.py b/ultralytics/hub/session.py index 57c55f80dc..9870ca16d1 100644 --- a/ultralytics/hub/session.py +++ b/ultralytics/hub/session.py @@ -84,6 +84,7 @@ class HUBTrainingSession: def _handle_signal(self, signum, frame): """ Handle kill signals and prevent heartbeats from being sent on Colab after termination. + This method does not use frame, it is included as it is passed by signal. """ if self.alive is True: diff --git a/ultralytics/hub/utils.py b/ultralytics/hub/utils.py index 3ca954e67f..f2621d7a90 100644 --- a/ultralytics/hub/utils.py +++ b/ultralytics/hub/utils.py @@ -161,9 +161,7 @@ class Events: url = 'https://www.google-analytics.com/mp/collect?measurement_id=G-X8NCJYTQXM&api_secret=QLQrATrNSwGRFRLE-cbHJw' def __init__(self): - """ - Initializes the Events object with default values for events, rate_limit, and metadata. - """ + """Initializes the Events object with default values for events, rate_limit, and metadata.""" self.events = [] # events list self.rate_limit = 60.0 # rate limit (seconds) self.t = 0.0 # rate limit timer (seconds) diff --git a/ultralytics/models/fastsam/model.py b/ultralytics/models/fastsam/model.py index c1895fc626..e6475faae0 100644 --- a/ultralytics/models/fastsam/model.py +++ b/ultralytics/models/fastsam/model.py @@ -22,7 +22,7 @@ class FastSAM(Model): """ def __init__(self, model='FastSAM-x.pt'): - """Call the __init__ method of the parent class (YOLO) with the updated default model""" + """Call the __init__ method of the parent class (YOLO) with the updated default model.""" if str(model) == 'FastSAM.pt': model = 'FastSAM-x.pt' assert Path(model).suffix not in ('.yaml', '.yml'), 'FastSAM models only support pre-trained models.' @@ -30,4 +30,5 @@ class FastSAM(Model): @property def task_map(self): + """Returns a dictionary mapping segment task to corresponding predictor and validator classes.""" return {'segment': {'predictor': FastSAMPredictor, 'validator': FastSAMValidator}} diff --git a/ultralytics/models/fastsam/predict.py b/ultralytics/models/fastsam/predict.py index 4eac69f99a..b64d2d6e64 100644 --- a/ultralytics/models/fastsam/predict.py +++ b/ultralytics/models/fastsam/predict.py @@ -11,10 +11,12 @@ from ultralytics.utils import DEFAULT_CFG, ops class FastSAMPredictor(DetectionPredictor): def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None): + """Initializes FastSAMPredictor class by inheriting from DetectionPredictor and setting task to 'segment'.""" super().__init__(cfg, overrides, _callbacks) self.args.task = 'segment' def postprocess(self, preds, img, orig_imgs): + """Postprocesses the predictions, applies non-max suppression, scales the boxes, and returns the results.""" p = ops.non_max_suppression( preds[0], self.args.conf, diff --git a/ultralytics/models/fastsam/prompt.py b/ultralytics/models/fastsam/prompt.py index 97ab46c3c8..5eb581e9fb 100644 --- a/ultralytics/models/fastsam/prompt.py +++ b/ultralytics/models/fastsam/prompt.py @@ -15,6 +15,7 @@ from ultralytics.utils import TQDM class FastSAMPrompt: def __init__(self, source, results, device='cuda') -> None: + """Initializes FastSAMPrompt with given source, results and device, and assigns clip for linear assignment.""" self.device = device self.results = results self.source = source @@ -30,6 +31,7 @@ class FastSAMPrompt: @staticmethod def _segment_image(image, bbox): + """Segments the given image according to the provided bounding box coordinates.""" image_array = np.array(image) segmented_image_array = np.zeros_like(image_array) x1, y1, x2, y2 = bbox @@ -45,6 +47,9 @@ class FastSAMPrompt: @staticmethod def _format_results(result, filter=0): + """Formats detection results into list of annotations each containing ID, segmentation, bounding box, score and + area. + """ annotations = [] n = len(result.masks.data) if result.masks is not None else 0 for i in range(n): @@ -61,6 +66,9 @@ class FastSAMPrompt: @staticmethod def _get_bbox_from_mask(mask): + """Applies morphological transformations to the mask, displays it, and if with_contours is True, draws + contours. + """ mask = mask.astype(np.uint8) contours, hierarchy = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) x1, y1, w, h = cv2.boundingRect(contours[0]) @@ -195,6 +203,7 @@ class FastSAMPrompt: @torch.no_grad() def retrieve(self, model, preprocess, elements, search_text: str, device) -> int: + """Processes images and text with a model, calculates similarity, and returns softmax score.""" preprocessed_images = [preprocess(image).to(device) for image in elements] tokenized_text = self.clip.tokenize([search_text]).to(device) stacked_images = torch.stack(preprocessed_images) @@ -206,6 +215,7 @@ class FastSAMPrompt: return probs[:, 0].softmax(dim=0) def _crop_image(self, format_results): + """Crops an image based on provided annotation format and returns cropped images and related data.""" if os.path.isdir(self.source): raise ValueError(f"'{self.source}' is a directory, not a valid source for this function.") image = Image.fromarray(cv2.cvtColor(self.results[0].orig_img, cv2.COLOR_BGR2RGB)) @@ -229,6 +239,7 @@ class FastSAMPrompt: return cropped_boxes, cropped_images, not_crop, filter_id, annotations def box_prompt(self, bbox): + """Modifies the bounding box properties and calculates IoU between masks and bounding box.""" if self.results[0].masks is not None: assert (bbox[2] != 0 and bbox[3] != 0) if os.path.isdir(self.source): @@ -261,7 +272,8 @@ class FastSAMPrompt: self.results[0].masks.data = torch.tensor(np.array([masks[max_iou_index].cpu().numpy()])) return self.results - def point_prompt(self, points, pointlabel): # numpy 处理 + def point_prompt(self, points, pointlabel): # numpy + """Adjusts points on detected masks based on user input and returns the modified results.""" if self.results[0].masks is not None: if os.path.isdir(self.source): raise ValueError(f"'{self.source}' is a directory, not a valid source for this function.") @@ -284,6 +296,7 @@ class FastSAMPrompt: return self.results def text_prompt(self, text): + """Processes a text prompt, applies it to existing results and returns the updated results.""" if self.results[0].masks is not None: format_results = self._format_results(self.results[0], 0) cropped_boxes, cropped_images, not_crop, filter_id, annotations = self._crop_image(format_results) @@ -296,4 +309,5 @@ class FastSAMPrompt: return self.results def everything_prompt(self): + """Returns the processed results from the previous methods in the class.""" return self.results diff --git a/ultralytics/models/nas/model.py b/ultralytics/models/nas/model.py index f848cc4bff..9a770c4c66 100644 --- a/ultralytics/models/nas/model.py +++ b/ultralytics/models/nas/model.py @@ -25,12 +25,13 @@ from .val import NASValidator class NAS(Model): def __init__(self, model='yolo_nas_s.pt') -> None: + """Initializes the NAS model with the provided or default 'yolo_nas_s.pt' model.""" assert Path(model).suffix not in ('.yaml', '.yml'), 'YOLO-NAS models only support pre-trained models.' super().__init__(model, task='detect') @smart_inference_mode() def _load(self, weights: str, task: str): - # Load or create new NAS model + """Loads an existing NAS model weights or creates a new NAS model with pretrained weights if not provided.""" import super_gradients suffix = Path(weights).suffix if suffix == '.pt': @@ -58,4 +59,5 @@ class NAS(Model): @property def task_map(self): + """Returns a dictionary mapping tasks to respective predictor and validator classes.""" return {'detect': {'predictor': NASPredictor, 'validator': NASValidator}} diff --git a/ultralytics/models/rtdetr/model.py b/ultralytics/models/rtdetr/model.py index c20d72f643..fa7d484e72 100644 --- a/ultralytics/models/rtdetr/model.py +++ b/ultralytics/models/rtdetr/model.py @@ -1,7 +1,5 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license -""" -RT-DETR model interface -""" +"""RT-DETR model interface.""" from ultralytics.engine.model import Model from ultralytics.nn.tasks import RTDETRDetectionModel @@ -11,17 +9,17 @@ from .val import RTDETRValidator class RTDETR(Model): - """ - RTDETR model interface. - """ + """RTDETR model interface.""" def __init__(self, model='rtdetr-l.pt') -> None: + """Initializes the RTDETR model with the given model file, defaulting to 'rtdetr-l.pt'.""" if model and model.split('.')[-1] not in ('pt', 'yaml', 'yml'): raise NotImplementedError('RT-DETR only supports creating from *.pt file or *.yaml file.') super().__init__(model=model, task='detect') @property def task_map(self): + """Returns a dictionary mapping task names to corresponding Ultralytics task classes for RTDETR model.""" return { 'detect': { 'predictor': RTDETRPredictor, diff --git a/ultralytics/models/rtdetr/predict.py b/ultralytics/models/rtdetr/predict.py index 33d5d7a263..1a2b0cbc42 100644 --- a/ultralytics/models/rtdetr/predict.py +++ b/ultralytics/models/rtdetr/predict.py @@ -48,7 +48,8 @@ class RTDETRPredictor(BasePredictor): return results def pre_transform(self, im): - """Pre-transform input image before inference. + """ + Pre-transform input image before inference. Args: im (List(np.ndarray)): (N, 3, h, w) for tensor, [(h, w, 3) x N] for list. diff --git a/ultralytics/models/rtdetr/train.py b/ultralytics/models/rtdetr/train.py index 1e586683d0..91d4729e36 100644 --- a/ultralytics/models/rtdetr/train.py +++ b/ultralytics/models/rtdetr/train.py @@ -37,7 +37,8 @@ class RTDETRTrainer(DetectionTrainer): return model def build_dataset(self, img_path, mode='val', batch=None): - """Build RTDETR Dataset + """ + Build RTDETR Dataset. Args: img_path (str): Path to the folder containing images. diff --git a/ultralytics/models/rtdetr/val.py b/ultralytics/models/rtdetr/val.py index 9b984bed21..d8e5fb6984 100644 --- a/ultralytics/models/rtdetr/val.py +++ b/ultralytics/models/rtdetr/val.py @@ -16,6 +16,7 @@ __all__ = 'RTDETRValidator', # tuple or list class RTDETRDataset(YOLODataset): def __init__(self, *args, data=None, **kwargs): + """Initialize the RTDETRDataset class by inheriting from the YOLODataset class.""" super().__init__(*args, data=data, use_segments=False, use_keypoints=False, **kwargs) # NOTE: add stretch version load_image for rtdetr mosaic diff --git a/ultralytics/models/sam/amg.py b/ultralytics/models/sam/amg.py index f251fe4e07..d7751d6f25 100644 --- a/ultralytics/models/sam/amg.py +++ b/ultralytics/models/sam/amg.py @@ -32,9 +32,10 @@ def batch_iterator(batch_size: int, *args) -> Generator[List[Any], None, None]: def calculate_stability_score(masks: torch.Tensor, mask_threshold: float, threshold_offset: float) -> torch.Tensor: """ - Computes the stability score for a batch of masks. The stability - score is the IoU between the binary masks obtained by thresholding - the predicted mask logits at high and low values. + Computes the stability score for a batch of masks. + + The stability score is the IoU between the binary masks obtained by thresholding the predicted mask logits at high + and low values. """ # One mask is always contained inside the other. # Save memory by preventing unnecessary cast to torch.int64 @@ -60,7 +61,11 @@ def build_all_layer_point_grids(n_per_side: int, n_layers: int, scale_per_layer: def generate_crop_boxes(im_size: Tuple[int, ...], n_layers: int, overlap_ratio: float) -> Tuple[List[List[int]], List[int]]: - """Generates a list of crop boxes of different sizes. Each layer has (2**i)**2 boxes for the ith layer.""" + """ + Generates a list of crop boxes of different sizes. + + Each layer has (2**i)**2 boxes for the ith layer. + """ crop_boxes, layer_idxs = [], [] im_h, im_w = im_size short_side = min(im_h, im_w) @@ -145,8 +150,9 @@ def remove_small_regions(mask: np.ndarray, area_thresh: float, mode: str) -> Tup def batched_mask_to_box(masks: torch.Tensor) -> torch.Tensor: """ - Calculates boxes in XYXY format around masks. Return [0,0,0,0] for - an empty mask. For input shape C1xC2x...xHxW, the output shape is C1xC2x...x4. + Calculates boxes in XYXY format around masks. + + Return [0,0,0,0] for an empty mask. For input shape C1xC2x...xHxW, the output shape is C1xC2x...x4. """ # torch.max below raises an error on empty inputs, just skip in this case if torch.numel(masks) == 0: diff --git a/ultralytics/models/sam/model.py b/ultralytics/models/sam/model.py index 2ca35011f7..8a140b3f0b 100644 --- a/ultralytics/models/sam/model.py +++ b/ultralytics/models/sam/model.py @@ -1,7 +1,5 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license -""" -SAM model interface -""" +"""SAM model interface.""" from pathlib import Path @@ -13,16 +11,16 @@ from .predict import Predictor class SAM(Model): - """ - SAM model interface. - """ + """SAM model interface.""" def __init__(self, model='sam_b.pt') -> None: + """Initializes the SAM model instance with the specified pre-trained model file.""" if model and Path(model).suffix not in ('.pt', '.pth'): raise NotImplementedError('SAM prediction requires pre-trained *.pt or *.pth model.') super().__init__(model=model, task='segment') def _load(self, weights: str, task=None): + """Loads the provided weights into the SAM model.""" self.model = build_sam(weights) def predict(self, source, stream=False, bboxes=None, points=None, labels=None, **kwargs): @@ -48,4 +46,5 @@ class SAM(Model): @property def task_map(self): + """Returns a dictionary mapping the 'segment' task to its corresponding 'Predictor'.""" return {'segment': {'predictor': Predictor}} diff --git a/ultralytics/models/sam/modules/decoders.py b/ultralytics/models/sam/modules/decoders.py index 0c64a7e424..a9a3a319e2 100644 --- a/ultralytics/models/sam/modules/decoders.py +++ b/ultralytics/models/sam/modules/decoders.py @@ -98,7 +98,11 @@ class MaskDecoder(nn.Module): sparse_prompt_embeddings: torch.Tensor, dense_prompt_embeddings: torch.Tensor, ) -> Tuple[torch.Tensor, torch.Tensor]: - """Predicts masks. See 'forward' for more details.""" + """ + Predicts masks. + + See 'forward' for more details. + """ # Concatenate output tokens output_tokens = torch.cat([self.iou_token.weight, self.mask_tokens.weight], dim=0) output_tokens = output_tokens.unsqueeze(0).expand(sparse_prompt_embeddings.size(0), -1, -1) diff --git a/ultralytics/models/sam/modules/encoders.py b/ultralytics/models/sam/modules/encoders.py index eb9352f970..b4c0774457 100644 --- a/ultralytics/models/sam/modules/encoders.py +++ b/ultralytics/models/sam/modules/encoders.py @@ -100,6 +100,9 @@ class ImageEncoderViT(nn.Module): ) def forward(self, x: torch.Tensor) -> torch.Tensor: + """Processes input through patch embedding, applies positional embedding if present, and passes through blocks + and neck. + """ x = self.patch_embed(x) if self.pos_embed is not None: x = x + self.pos_embed @@ -157,8 +160,8 @@ class PromptEncoder(nn.Module): def get_dense_pe(self) -> torch.Tensor: """ - Returns the positional encoding used to encode point prompts, - applied to a dense set of points the shape of the image encoding. + Returns the positional encoding used to encode point prompts, applied to a dense set of points the shape of the + image encoding. Returns: torch.Tensor: Positional encoding with shape 1x(embed_dim)x(embedding_h)x(embedding_w) @@ -204,9 +207,7 @@ class PromptEncoder(nn.Module): boxes: Optional[torch.Tensor], masks: Optional[torch.Tensor], ) -> int: - """ - Gets the batch size of the output given the batch size of the input prompts. - """ + """Gets the batch size of the output given the batch size of the input prompts.""" if points is not None: return points[0].shape[0] elif boxes is not None: @@ -217,6 +218,7 @@ class PromptEncoder(nn.Module): return 1 def _get_device(self) -> torch.device: + """Returns the device of the first point embedding's weight tensor.""" return self.point_embeddings[0].weight.device def forward( @@ -259,11 +261,10 @@ class PromptEncoder(nn.Module): class PositionEmbeddingRandom(nn.Module): - """ - Positional encoding using random spatial frequencies. - """ + """Positional encoding using random spatial frequencies.""" def __init__(self, num_pos_feats: int = 64, scale: Optional[float] = None) -> None: + """Initializes a position embedding using random spatial frequencies.""" super().__init__() if scale is None or scale <= 0.0: scale = 1.0 @@ -304,7 +305,7 @@ class PositionEmbeddingRandom(nn.Module): class Block(nn.Module): - """Transformer blocks with support of window attention and residual propagation blocks""" + """Transformer blocks with support of window attention and residual propagation blocks.""" def __init__( self, @@ -351,6 +352,7 @@ class Block(nn.Module): self.window_size = window_size def forward(self, x: torch.Tensor) -> torch.Tensor: + """Executes a forward pass through the transformer block with window attention and non-overlapping windows.""" shortcut = x x = self.norm1(x) # Window partition @@ -404,6 +406,7 @@ class Attention(nn.Module): self.rel_pos_w = nn.Parameter(torch.zeros(2 * input_size[1] - 1, head_dim)) def forward(self, x: torch.Tensor) -> torch.Tensor: + """Applies the forward operation including attention, normalization, MLP, and indexing within window limits.""" B, H, W, _ = x.shape # qkv with shape (3, B, nHead, H * W, C) qkv = self.qkv(x).reshape(B, H * W, 3, self.num_heads, -1).permute(2, 0, 3, 1, 4) @@ -448,6 +451,7 @@ def window_unpartition(windows: torch.Tensor, window_size: int, pad_hw: Tuple[in hw: Tuple[int, int]) -> torch.Tensor: """ Window unpartition into original sequences and removing padding. + Args: windows (tensor): input tokens with [B * num_windows, window_size, window_size, C]. window_size (int): window size. @@ -540,9 +544,7 @@ def add_decomposed_rel_pos( class PatchEmbed(nn.Module): - """ - Image to Patch Embedding. - """ + """Image to Patch Embedding.""" def __init__( self, @@ -565,4 +567,5 @@ class PatchEmbed(nn.Module): self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=kernel_size, stride=stride, padding=padding) def forward(self, x: torch.Tensor) -> torch.Tensor: + """Computes patch embedding by applying convolution and transposing resulting tensor.""" return self.proj(x).permute(0, 2, 3, 1) # B C H W -> B H W C diff --git a/ultralytics/models/sam/modules/tiny_encoder.py b/ultralytics/models/sam/modules/tiny_encoder.py index ca8de50b75..d96b303220 100644 --- a/ultralytics/models/sam/modules/tiny_encoder.py +++ b/ultralytics/models/sam/modules/tiny_encoder.py @@ -23,6 +23,9 @@ from ultralytics.utils.instance import to_2tuple class Conv2d_BN(torch.nn.Sequential): def __init__(self, a, b, ks=1, stride=1, pad=0, dilation=1, groups=1, bn_weight_init=1): + """Initializes the MBConv model with given input channels, output channels, expansion ratio, activation, and + drop path. + """ super().__init__() self.add_module('c', torch.nn.Conv2d(a, b, ks, stride, pad, dilation, groups, bias=False)) bn = torch.nn.BatchNorm2d(b) @@ -34,6 +37,9 @@ class Conv2d_BN(torch.nn.Sequential): class PatchEmbed(nn.Module): def __init__(self, in_chans, embed_dim, resolution, activation): + """Initialize the PatchMerging class with specified input, output dimensions, resolution and activation + function. + """ super().__init__() img_size: Tuple[int, int] = to_2tuple(resolution) self.patches_resolution = (img_size[0] // 4, img_size[1] // 4) @@ -48,12 +54,16 @@ class PatchEmbed(nn.Module): ) def forward(self, x): + """Runs input tensor 'x' through the PatchMerging model's sequence of operations.""" return self.seq(x) class MBConv(nn.Module): def __init__(self, in_chans, out_chans, expand_ratio, activation, drop_path): + """Initializes a convolutional layer with specified dimensions, input resolution, depth, and activation + function. + """ super().__init__() self.in_chans = in_chans self.hidden_chans = int(in_chans * expand_ratio) @@ -73,6 +83,7 @@ class MBConv(nn.Module): self.drop_path = nn.Identity() def forward(self, x): + """Implements the forward pass for the model architecture.""" shortcut = x x = self.conv1(x) x = self.act1(x) @@ -87,6 +98,9 @@ class MBConv(nn.Module): class PatchMerging(nn.Module): def __init__(self, input_resolution, dim, out_dim, activation): + """Initializes the ConvLayer with specific dimension, input resolution, depth, activation, drop path, and other + optional parameters. + """ super().__init__() self.input_resolution = input_resolution @@ -99,6 +113,7 @@ class PatchMerging(nn.Module): self.conv3 = Conv2d_BN(out_dim, out_dim, 1, 1, 0) def forward(self, x): + """Applies forward pass on the input utilizing convolution and activation layers, and returns the result.""" if x.ndim == 3: H, W = self.input_resolution B = len(x) @@ -149,6 +164,7 @@ class ConvLayer(nn.Module): input_resolution, dim=dim, out_dim=out_dim, activation=activation) def forward(self, x): + """Processes the input through a series of convolutional layers and returns the activated output.""" for blk in self.blocks: x = checkpoint.checkpoint(blk, x) if self.use_checkpoint else blk(x) return x if self.downsample is None else self.downsample(x) @@ -157,6 +173,7 @@ class ConvLayer(nn.Module): class Mlp(nn.Module): def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.): + """Initializes Attention module with the given parameters including dimension, key_dim, number of heads, etc.""" super().__init__() out_features = out_features or in_features hidden_features = hidden_features or in_features @@ -167,6 +184,7 @@ class Mlp(nn.Module): self.drop = nn.Dropout(drop) def forward(self, x): + """Applies operations on input x and returns modified x, runs downsample if not None.""" x = self.norm(x) x = self.fc1(x) x = self.act(x) @@ -216,6 +234,7 @@ class Attention(torch.nn.Module): @torch.no_grad() def train(self, mode=True): + """Sets the module in training mode and handles attribute 'ab' based on the mode.""" super().train(mode) if mode and hasattr(self, 'ab'): del self.ab @@ -298,6 +317,9 @@ class TinyViTBlock(nn.Module): self.local_conv = Conv2d_BN(dim, dim, ks=local_conv_size, stride=1, pad=pad, groups=dim) def forward(self, x): + """Applies attention-based transformation or padding to input 'x' before passing it through a local + convolution. + """ H, W = self.input_resolution B, L, C = x.shape assert L == H * W, 'input feature has wrong size' @@ -337,6 +359,9 @@ class TinyViTBlock(nn.Module): return x + self.drop_path(self.mlp(x)) def extra_repr(self) -> str: + """Returns a formatted string representing the TinyViTBlock's parameters: dimension, input resolution, number of + attentions heads, window size, and MLP ratio. + """ return f'dim={self.dim}, input_resolution={self.input_resolution}, num_heads={self.num_heads}, ' \ f'window_size={self.window_size}, mlp_ratio={self.mlp_ratio}' @@ -402,23 +427,28 @@ class BasicLayer(nn.Module): input_resolution, dim=dim, out_dim=out_dim, activation=activation) def forward(self, x): + """Performs forward propagation on the input tensor and returns a normalized tensor.""" for blk in self.blocks: x = checkpoint.checkpoint(blk, x) if self.use_checkpoint else blk(x) return x if self.downsample is None else self.downsample(x) def extra_repr(self) -> str: + """Returns a string representation of the extra_repr function with the layer's parameters.""" return f'dim={self.dim}, input_resolution={self.input_resolution}, depth={self.depth}' class LayerNorm2d(nn.Module): + """A PyTorch implementation of Layer Normalization in 2D.""" def __init__(self, num_channels: int, eps: float = 1e-6) -> None: + """Initialize LayerNorm2d with the number of channels and an optional epsilon.""" super().__init__() self.weight = nn.Parameter(torch.ones(num_channels)) self.bias = nn.Parameter(torch.zeros(num_channels)) self.eps = eps def forward(self, x: torch.Tensor) -> torch.Tensor: + """Perform a forward pass, normalizing the input tensor.""" u = x.mean(1, keepdim=True) s = (x - u).pow(2).mean(1, keepdim=True) x = (x - u) / torch.sqrt(s + self.eps) @@ -518,6 +548,7 @@ class TinyViT(nn.Module): ) def set_layer_lr_decay(self, layer_lr_decay): + """Sets the learning rate decay for each layer in the TinyViT model.""" decay_rate = layer_lr_decay # layers -> blocks (depth) @@ -525,6 +556,7 @@ class TinyViT(nn.Module): lr_scales = [decay_rate ** (depth - i - 1) for i in range(depth)] def _set_lr_scale(m, scale): + """Sets the learning rate scale for each layer in the model based on the layer's depth.""" for p in m.parameters(): p.lr_scale = scale @@ -544,12 +576,14 @@ class TinyViT(nn.Module): p.param_name = k def _check_lr_scale(m): + """Checks if the learning rate scale attribute is present in module's parameters.""" for p in m.parameters(): assert hasattr(p, 'lr_scale'), p.param_name self.apply(_check_lr_scale) def _init_weights(self, m): + """Initializes weights for linear layers and layer normalization in the given module.""" if isinstance(m, nn.Linear): # NOTE: This initialization is needed only for training. # trunc_normal_(m.weight, std=.02) @@ -561,11 +595,12 @@ class TinyViT(nn.Module): @torch.jit.ignore def no_weight_decay_keywords(self): + """Returns a dictionary of parameter names where weight decay should not be applied.""" return {'attention_biases'} def forward_features(self, x): - # x: (N, C, H, W) - x = self.patch_embed(x) + """Runs the input through the model layers and returns the transformed output.""" + x = self.patch_embed(x) # x input is (N, C, H, W) x = self.layers[0](x) start_i = 1 @@ -579,4 +614,5 @@ class TinyViT(nn.Module): return self.neck(x) def forward(self, x): + """Executes a forward pass on the input tensor through the constructed model layers.""" return self.forward_features(x) diff --git a/ultralytics/models/sam/modules/transformer.py b/ultralytics/models/sam/modules/transformer.py index f925538b6b..95a04666a0 100644 --- a/ultralytics/models/sam/modules/transformer.py +++ b/ultralytics/models/sam/modules/transformer.py @@ -21,8 +21,7 @@ class TwoWayTransformer(nn.Module): attention_downsample_rate: int = 2, ) -> None: """ - A transformer decoder that attends to an input image using - queries whose positional embedding is supplied. + A transformer decoder that attends to an input image using queries whose positional embedding is supplied. Args: depth (int): number of layers in the transformer @@ -171,8 +170,7 @@ class TwoWayAttentionBlock(nn.Module): class Attention(nn.Module): - """ - An attention layer that allows for downscaling the size of the embedding after projection to queries, keys, and + """An attention layer that allows for downscaling the size of the embedding after projection to queries, keys, and values. """ diff --git a/ultralytics/models/sam/predict.py b/ultralytics/models/sam/predict.py index e8a8197b7b..31e0da93ab 100644 --- a/ultralytics/models/sam/predict.py +++ b/ultralytics/models/sam/predict.py @@ -19,6 +19,7 @@ from .build import build_sam class Predictor(BasePredictor): def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None): + """Initializes the Predictor class with default or provided configuration, overrides, and callbacks.""" if overrides is None: overrides = {} overrides.update(dict(task='segment', mode='predict', imgsz=1024)) @@ -34,7 +35,8 @@ class Predictor(BasePredictor): self.segment_all = False def preprocess(self, im): - """Prepares input image before inference. + """ + Prepares input image before inference. Args: im (torch.Tensor | List(np.ndarray)): BCHW for tensor, [(HWC) x B] for list. @@ -189,7 +191,8 @@ class Predictor(BasePredictor): stability_score_thresh=0.95, stability_score_offset=0.95, crop_nms_thresh=0.7): - """Segment the whole image. + """ + Segment the whole image. Args: im (torch.Tensor): The preprocessed image, (N, C, H, W). @@ -360,14 +363,15 @@ class Predictor(BasePredictor): self.prompts = prompts def reset_image(self): + """Resets the image and its features to None.""" self.im = None self.features = None @staticmethod def remove_small_regions(masks, min_area=0, nms_thresh=0.7): """ - Removes small disconnected regions and holes in masks, then reruns - box NMS to remove any new duplicates. Requires open-cv as a dependency. + Removes small disconnected regions and holes in masks, then reruns box NMS to remove any new duplicates. + Requires open-cv as a dependency. Args: masks (torch.Tensor): Masks, (N, H, W). diff --git a/ultralytics/models/utils/loss.py b/ultralytics/models/utils/loss.py index 95406e1f14..77eadce71c 100644 --- a/ultralytics/models/utils/loss.py +++ b/ultralytics/models/utils/loss.py @@ -47,6 +47,7 @@ class DETRLoss(nn.Module): self.device = None def _get_loss_class(self, pred_scores, targets, gt_scores, num_gts, postfix=''): + """Computes the classification loss based on predictions, target values, and ground truth scores.""" # logits: [b, query, num_classes], gt_class: list[[n, 1]] name_class = f'loss_class{postfix}' bs, nq = pred_scores.shape[:2] @@ -68,6 +69,9 @@ class DETRLoss(nn.Module): return {name_class: loss_cls.squeeze() * self.loss_gain['class']} def _get_loss_bbox(self, pred_bboxes, gt_bboxes, postfix=''): + """Calculates and returns the bounding box loss and GIoU loss for the predicted and ground truth bounding + boxes. + """ # boxes: [b, query, 4], gt_bbox: list[[n, 4]] name_bbox = f'loss_bbox{postfix}' name_giou = f'loss_giou{postfix}' @@ -125,7 +129,7 @@ class DETRLoss(nn.Module): postfix='', masks=None, gt_mask=None): - """Get auxiliary losses""" + """Get auxiliary losses.""" # NOTE: loss class, bbox, giou, mask, dice loss = torch.zeros(5 if masks is not None else 3, device=pred_bboxes.device) if match_indices is None and self.use_uni_match: @@ -166,12 +170,14 @@ class DETRLoss(nn.Module): @staticmethod def _get_index(match_indices): + """Returns batch indices, source indices, and destination indices from provided match indices.""" batch_idx = torch.cat([torch.full_like(src, i) for i, (src, _) in enumerate(match_indices)]) src_idx = torch.cat([src for (src, _) in match_indices]) dst_idx = torch.cat([dst for (_, dst) in match_indices]) return (batch_idx, src_idx), dst_idx def _get_assigned_bboxes(self, pred_bboxes, gt_bboxes, match_indices): + """Assigns predicted bounding boxes to ground truth bounding boxes based on the match indices.""" pred_assigned = torch.cat([ t[I] if len(I) > 0 else torch.zeros(0, t.shape[-1], device=self.device) for t, (I, _) in zip(pred_bboxes, match_indices)]) @@ -190,7 +196,7 @@ class DETRLoss(nn.Module): gt_mask=None, postfix='', match_indices=None): - """Get losses""" + """Get losses.""" if match_indices is None: match_indices = self.matcher(pred_bboxes, pred_scores, @@ -250,22 +256,43 @@ class DETRLoss(nn.Module): class RTDETRDetectionLoss(DETRLoss): + """ + Real-Time DeepTracker (RT-DETR) Detection Loss class that extends the DETRLoss. + + This class computes the detection loss for the RT-DETR model, which includes the standard detection loss as well as + an additional denoising training loss when provided with denoising metadata. + """ def forward(self, preds, batch, dn_bboxes=None, dn_scores=None, dn_meta=None): + """ + Forward pass to compute the detection loss. + + Args: + preds (tuple): Predicted bounding boxes and scores. + batch (dict): Batch data containing ground truth information. + dn_bboxes (torch.Tensor, optional): Denoising bounding boxes. Default is None. + dn_scores (torch.Tensor, optional): Denoising scores. Default is None. + dn_meta (dict, optional): Metadata for denoising. Default is None. + + Returns: + (dict): Dictionary containing the total loss and, if applicable, the denoising loss. + """ pred_bboxes, pred_scores = preds total_loss = super().forward(pred_bboxes, pred_scores, batch) + # Check for denoising metadata to compute denoising training loss if dn_meta is not None: dn_pos_idx, dn_num_group = dn_meta['dn_pos_idx'], dn_meta['dn_num_group'] assert len(batch['gt_groups']) == len(dn_pos_idx) - # Denoising match indices + # Get the match indices for denoising match_indices = self.get_dn_match_indices(dn_pos_idx, dn_num_group, batch['gt_groups']) - # Compute denoising training loss + # Compute the denoising training loss dn_loss = super().forward(dn_bboxes, dn_scores, batch, postfix='_dn', match_indices=match_indices) total_loss.update(dn_loss) else: + # If no denoising metadata is provided, set denoising loss to zero total_loss.update({f'{k}_dn': torch.tensor(0., device=self.device) for k in total_loss.keys()}) return total_loss @@ -276,12 +303,12 @@ class RTDETRDetectionLoss(DETRLoss): Get the match indices for denoising. Args: - dn_pos_idx (List[torch.Tensor]): A list includes positive indices of denoising. - dn_num_group (int): The number of groups of denoising. - gt_groups (List(int)): a list of batch size length includes the number of gts of each image. + dn_pos_idx (List[torch.Tensor]): List of tensors containing positive indices for denoising. + dn_num_group (int): Number of denoising groups. + gt_groups (List[int]): List of integers representing the number of ground truths for each image. Returns: - dn_match_indices (List(tuple)): Matched indices. + (List[tuple]): List of tuples containing matched indices for denoising. """ dn_match_indices = [] idx_groups = torch.as_tensor([0, *gt_groups[:-1]]).cumsum_(0) diff --git a/ultralytics/models/utils/ops.py b/ultralytics/models/utils/ops.py index abce97a692..99357d19fd 100644 --- a/ultralytics/models/utils/ops.py +++ b/ultralytics/models/utils/ops.py @@ -11,8 +11,8 @@ from ultralytics.utils.ops import xywh2xyxy, xyxy2xywh class HungarianMatcher(nn.Module): """ - A module implementing the HungarianMatcher, which is a differentiable module to solve the assignment problem in - an end-to-end fashion. + A module implementing the HungarianMatcher, which is a differentiable module to solve the assignment problem in an + end-to-end fashion. HungarianMatcher performs optimal assignment over the predicted and ground truth bounding boxes using a cost function that considers classification scores, bounding box coordinates, and optionally, mask predictions. @@ -32,6 +32,9 @@ class HungarianMatcher(nn.Module): """ def __init__(self, cost_gain=None, use_fl=True, with_mask=False, num_sample_points=12544, alpha=0.25, gamma=2.0): + """Initializes HungarianMatcher with cost coefficients, Focal Loss, mask prediction, sample points, and alpha + gamma factors. + """ super().__init__() if cost_gain is None: cost_gain = {'class': 1, 'bbox': 5, 'giou': 2, 'mask': 1, 'dice': 1} @@ -45,8 +48,8 @@ class HungarianMatcher(nn.Module): def forward(self, pred_bboxes, pred_scores, gt_bboxes, gt_cls, gt_groups, masks=None, gt_mask=None): """ Forward pass for HungarianMatcher. This function computes costs based on prediction and ground truth - (classification cost, L1 cost between boxes and GIoU cost between boxes) and finds the optimal matching - between predictions and ground truth based on these costs. + (classification cost, L1 cost between boxes and GIoU cost between boxes) and finds the optimal matching between + predictions and ground truth based on these costs. Args: pred_bboxes (Tensor): Predicted bounding boxes with shape [batch_size, num_queries, 4]. @@ -153,9 +156,9 @@ def get_cdn_group(batch, box_noise_scale=1.0, training=False): """ - Get contrastive denoising training group. This function creates a contrastive denoising training group with - positive and negative samples from the ground truths (gt). It applies noise to the class labels and bounding - box coordinates, and returns the modified labels, bounding boxes, attention mask and meta information. + Get contrastive denoising training group. This function creates a contrastive denoising training group with positive + and negative samples from the ground truths (gt). It applies noise to the class labels and bounding box coordinates, + and returns the modified labels, bounding boxes, attention mask and meta information. Args: batch (dict): A dict that includes 'gt_cls' (torch.Tensor with shape [num_gts, ]), 'gt_bboxes' @@ -191,12 +194,12 @@ def get_cdn_group(batch, gt_bbox = batch['bboxes'] # bs*num, 4 b_idx = batch['batch_idx'] - # each group has positive and negative queries. + # Each group has positive and negative queries. dn_cls = gt_cls.repeat(2 * num_group) # (2*num_group*bs*num, ) dn_bbox = gt_bbox.repeat(2 * num_group, 1) # 2*num_group*bs*num, 4 dn_b_idx = b_idx.repeat(2 * num_group).view(-1) # (2*num_group*bs*num, ) - # positive and negative mask + # Positive and negative mask # (bs*num*num_group, ), the second total_num*num_group part as negative samples neg_idx = torch.arange(total_num * num_group, dtype=torch.long, device=gt_bbox.device) + num_group * total_num @@ -220,10 +223,9 @@ def get_cdn_group(batch, known_bbox += rand_part * diff known_bbox.clip_(min=0.0, max=1.0) dn_bbox = xyxy2xywh(known_bbox) - dn_bbox = inverse_sigmoid(dn_bbox) + dn_bbox = torch.logit(dn_bbox, eps=1e-6) # inverse sigmoid - # total denoising queries - num_dn = int(max_nums * 2 * num_group) + num_dn = int(max_nums * 2 * num_group) # total denoising queries # class_embed = torch.cat([class_embed, torch.zeros([1, class_embed.shape[-1]], device=class_embed.device)]) dn_cls_embed = class_embed[dn_cls] # bs*num * 2 * num_group, 256 padding_cls = torch.zeros(bs, num_dn, dn_cls_embed.shape[-1], device=gt_cls.device) @@ -256,9 +258,3 @@ def get_cdn_group(batch, return padding_cls.to(class_embed.device), padding_bbox.to(class_embed.device), attn_mask.to( class_embed.device), dn_meta - - -def inverse_sigmoid(x, eps=1e-6): - """Inverse sigmoid function.""" - x = x.clip(min=0., max=1.) - return torch.log(x / (1 - x + eps) + eps) diff --git a/ultralytics/models/yolo/classify/predict.py b/ultralytics/models/yolo/classify/predict.py index a22616e574..ca463b67f4 100644 --- a/ultralytics/models/yolo/classify/predict.py +++ b/ultralytics/models/yolo/classify/predict.py @@ -26,6 +26,7 @@ class ClassificationPredictor(BasePredictor): """ def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None): + """Initializes ClassificationPredictor setting the task to 'classify'.""" super().__init__(cfg, overrides, _callbacks) self.args.task = 'classify' diff --git a/ultralytics/models/yolo/classify/train.py b/ultralytics/models/yolo/classify/train.py index 0829f05b83..c59f285355 100644 --- a/ultralytics/models/yolo/classify/train.py +++ b/ultralytics/models/yolo/classify/train.py @@ -79,6 +79,7 @@ class ClassificationTrainer(BaseTrainer): return ckpt def build_dataset(self, img_path, mode='train', batch=None): + """Creates a ClassificationDataset instance given an image path, and mode (train/test etc.).""" return ClassificationDataset(root=img_path, args=self.args, augment=mode == 'train', prefix=mode) def get_dataloader(self, dataset_path, batch_size=16, rank=0, mode='train'): @@ -113,8 +114,9 @@ class ClassificationTrainer(BaseTrainer): def label_loss_items(self, loss_items=None, prefix='train'): """ - Returns a loss dict with labelled training loss items tensor. Not needed for classification but necessary for - segmentation & detection + Returns a loss dict with labelled training loss items tensor. + + Not needed for classification but necessary for segmentation & detection """ keys = [f'{prefix}/{x}' for x in self.loss_names] if loss_items is None: diff --git a/ultralytics/models/yolo/classify/val.py b/ultralytics/models/yolo/classify/val.py index 0748e27f7f..3ebf38086c 100644 --- a/ultralytics/models/yolo/classify/val.py +++ b/ultralytics/models/yolo/classify/val.py @@ -78,6 +78,7 @@ class ClassificationValidator(BaseValidator): return self.metrics.results_dict def build_dataset(self, img_path): + """Creates and returns a ClassificationDataset instance using given image path and preprocessing parameters.""" return ClassificationDataset(root=img_path, args=self.args, augment=False, prefix=self.args.split) def get_dataloader(self, dataset_path, batch_size): diff --git a/ultralytics/models/yolo/detect/train.py b/ultralytics/models/yolo/detect/train.py index 56d9243cfe..d0028c6e68 100644 --- a/ultralytics/models/yolo/detect/train.py +++ b/ultralytics/models/yolo/detect/train.py @@ -57,7 +57,7 @@ class DetectionTrainer(BaseTrainer): return batch def set_model_attributes(self): - """nl = de_parallel(self.model).model[-1].nl # number of detection layers (to scale hyps).""" + """Nl = de_parallel(self.model).model[-1].nl # number of detection layers (to scale hyps).""" # self.args.box *= 3 / nl # scale to layers # self.args.cls *= self.data["nc"] / 80 * 3 / nl # scale to classes and layers # self.args.cls *= (self.args.imgsz / 640) ** 2 * 3 / nl # scale to image size and layers @@ -80,8 +80,9 @@ class DetectionTrainer(BaseTrainer): def label_loss_items(self, loss_items=None, prefix='train'): """ - Returns a loss dict with labelled training loss items tensor. Not needed for classification but necessary for - segmentation & detection + Returns a loss dict with labelled training loss items tensor. + + Not needed for classification but necessary for segmentation & detection """ keys = [f'{prefix}/{x}' for x in self.loss_names] if loss_items is not None: diff --git a/ultralytics/models/yolo/model.py b/ultralytics/models/yolo/model.py index b85d46bdb8..ef1b41ab80 100644 --- a/ultralytics/models/yolo/model.py +++ b/ultralytics/models/yolo/model.py @@ -6,13 +6,11 @@ from ultralytics.nn.tasks import ClassificationModel, DetectionModel, PoseModel, class YOLO(Model): - """ - YOLO (You Only Look Once) object detection model. - """ + """YOLO (You Only Look Once) object detection model.""" @property def task_map(self): - """Map head to model, trainer, validator, and predictor classes""" + """Map head to model, trainer, validator, and predictor classes.""" return { 'classify': { 'model': ClassificationModel, diff --git a/ultralytics/models/yolo/pose/predict.py b/ultralytics/models/yolo/pose/predict.py index 14ae40b118..d00cea022e 100644 --- a/ultralytics/models/yolo/pose/predict.py +++ b/ultralytics/models/yolo/pose/predict.py @@ -21,6 +21,7 @@ class PosePredictor(DetectionPredictor): """ def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None): + """Initializes PosePredictor, sets task to 'pose' and logs a warning for using 'mps' as device.""" super().__init__(cfg, overrides, _callbacks) self.args.task = 'pose' if isinstance(self.args.device, str) and self.args.device.lower() == 'mps': diff --git a/ultralytics/models/yolo/segment/predict.py b/ultralytics/models/yolo/segment/predict.py index 7d51f7d42a..ba44a48296 100644 --- a/ultralytics/models/yolo/segment/predict.py +++ b/ultralytics/models/yolo/segment/predict.py @@ -21,10 +21,12 @@ class SegmentationPredictor(DetectionPredictor): """ def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None): + """Initializes the SegmentationPredictor with the provided configuration, overrides, and callbacks.""" super().__init__(cfg, overrides, _callbacks) self.args.task = 'segment' def postprocess(self, preds, img, orig_imgs): + """Applies non-max suppression and processes detections for each image in an input batch.""" p = ops.non_max_suppression(preds[0], self.args.conf, self.args.iou, diff --git a/ultralytics/models/yolo/segment/val.py b/ultralytics/models/yolo/segment/val.py index 0a2acb41a1..599b0d5394 100644 --- a/ultralytics/models/yolo/segment/val.py +++ b/ultralytics/models/yolo/segment/val.py @@ -144,7 +144,7 @@ class SegmentationValidator(DetectionValidator): def _process_batch(self, detections, labels, pred_masks=None, gt_masks=None, overlap=False, masks=False): """ - Return correct prediction matrix + Return correct prediction matrix. Args: detections (array[N, 6]), x1, y1, x2, y2, conf, class diff --git a/ultralytics/nn/autobackend.py b/ultralytics/nn/autobackend.py index 61ca6db6b4..ab5cb2fb5a 100644 --- a/ultralytics/nn/autobackend.py +++ b/ultralytics/nn/autobackend.py @@ -20,7 +20,11 @@ from ultralytics.utils.downloads import attempt_download_asset, is_url def check_class_names(names): - """Check class names. Map imagenet class codes to human-readable names if required. Convert lists to dicts.""" + """ + Check class names. + + Map imagenet class codes to human-readable names if required. Convert lists to dicts. + """ if isinstance(names, list): # names is a list names = dict(enumerate(names)) # convert to dict if isinstance(names, dict): @@ -37,36 +41,20 @@ def check_class_names(names): class AutoBackend(nn.Module): + """ + Handles dynamic backend selection for running inference using Ultralytics YOLO models. - @torch.no_grad() - def __init__(self, - weights='yolov8n.pt', - device=torch.device('cpu'), - dnn=False, - data=None, - fp16=False, - fuse=True, - verbose=True): - """ - MultiBackend class for python inference on various platforms using Ultralytics YOLO. + The AutoBackend class is designed to provide an abstraction layer for various inference engines. It supports a wide + range of formats, each with specific naming conventions as outlined below: - Args: - weights (str): The path to the weights file. Default: 'yolov8n.pt' - device (torch.device): The device to run the model on. - dnn (bool): Use OpenCV DNN module for inference if True, defaults to False. - data (str | Path | optional): Additional data.yaml file for class names. - fp16 (bool): If True, use half precision. Default: False - fuse (bool): Whether to fuse the model or not. Default: True - verbose (bool): Whether to run in verbose mode or not. Default: True - - Supported formats and their naming conventions: - | Format | Suffix | + Supported Formats and Naming Conventions: + | Format | File Suffix | |-----------------------|------------------| | PyTorch | *.pt | | TorchScript | *.torchscript | | ONNX Runtime | *.onnx | - | ONNX OpenCV DNN | *.onnx dnn=True | - | OpenVINO | *.xml | + | ONNX OpenCV DNN | *.onnx (dnn=True)| + | OpenVINO | *openvino_model/ | | CoreML | *.mlpackage | | TensorRT | *.engine | | TensorFlow SavedModel | *_saved_model | @@ -75,6 +63,31 @@ class AutoBackend(nn.Module): | TensorFlow Edge TPU | *_edgetpu.tflite | | PaddlePaddle | *_paddle_model | | ncnn | *_ncnn_model | + + This class offers dynamic backend switching capabilities based on the input model format, making it easier to deploy + models across various platforms. + """ + + @torch.no_grad() + def __init__(self, + weights='yolov8n.pt', + device=torch.device('cpu'), + dnn=False, + data=None, + fp16=False, + fuse=True, + verbose=True): + """ + Initialize the AutoBackend for inference. + + Args: + weights (str): Path to the model weights file. Defaults to 'yolov8n.pt'. + device (torch.device): Device to run the model on. Defaults to CPU. + dnn (bool): Use OpenCV DNN module for ONNX inference. Defaults to False. + data (str | Path | optional): Path to the additional data.yaml file containing class names. Optional. + fp16 (bool): Enable half-precision inference. Supported only on specific backends. Defaults to False. + fuse (bool): Fuse Conv2D + BatchNorm layers for optimization. Defaults to True. + verbose (bool): Enable verbose logging. Defaults to True. """ super().__init__() w = str(weights[0] if isinstance(weights, list) else weights) @@ -440,14 +453,14 @@ class AutoBackend(nn.Module): def from_numpy(self, x): """ - Convert a numpy array to a tensor. + Convert a numpy array to a tensor. - Args: - x (np.ndarray): The array to be converted. + Args: + x (np.ndarray): The array to be converted. - Returns: - (torch.Tensor): The converted tensor - """ + Returns: + (torch.Tensor): The converted tensor + """ return torch.tensor(x).to(self.device) if isinstance(x, np.ndarray) else x def warmup(self, imgsz=(1, 3, 640, 640)): @@ -476,7 +489,7 @@ class AutoBackend(nn.Module): @staticmethod def _model_type(p='path/to/model.pt'): """ - This function takes a path to a model file and returns the model type + This function takes a path to a model file and returns the model type. Args: p: path to the model file. Defaults to path/to/model.pt diff --git a/ultralytics/nn/modules/__init__.py b/ultralytics/nn/modules/__init__.py index b6dc6c4423..584a394f72 100644 --- a/ultralytics/nn/modules/__init__.py +++ b/ultralytics/nn/modules/__init__.py @@ -1,16 +1,20 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license """ -Ultralytics modules. Visualize with: +Ultralytics modules. -from ultralytics.nn.modules import * -import torch -import os +Example: + Visualize a module with Netron. + ```python + from ultralytics.nn.modules import * + import torch + import os -x = torch.ones(1, 128, 40, 40) -m = Conv(128, 128) -f = f'{m._get_name()}.onnx' -torch.onnx.export(m, x, f) -os.system(f'onnxsim {f} {f} && open {f}') + x = torch.ones(1, 128, 40, 40) + m = Conv(128, 128) + f = f'{m._get_name()}.onnx' + torch.onnx.export(m, x, f) + os.system(f'onnxsim {f} {f} && open {f}') + ``` """ from .block import (C1, C2, C3, C3TR, DFL, SPP, SPPF, Bottleneck, BottleneckCSP, C2f, C3Ghost, C3x, GhostBottleneck, diff --git a/ultralytics/nn/modules/block.py b/ultralytics/nn/modules/block.py index d8183d84de..593ae24cd6 100644 --- a/ultralytics/nn/modules/block.py +++ b/ultralytics/nn/modules/block.py @@ -1,7 +1,5 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license -""" -Block modules -""" +"""Block modules.""" import torch import torch.nn as nn @@ -17,6 +15,7 @@ __all__ = ('DFL', 'HGBlock', 'HGStem', 'SPP', 'SPPF', 'C1', 'C2', 'C3', 'C2f', ' class DFL(nn.Module): """ Integral module of Distribution Focal Loss (DFL). + Proposed in Generalized Focal Loss https://ieeexplore.ieee.org/document/9792391 """ @@ -51,11 +50,14 @@ class Proto(nn.Module): class HGStem(nn.Module): - """StemBlock of PPHGNetV2 with 5 convolutions and one maxpool2d. + """ + StemBlock of PPHGNetV2 with 5 convolutions and one maxpool2d. + https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py """ def __init__(self, c1, cm, c2): + """Initialize the SPP layer with input/output channels and specified kernel sizes for max pooling.""" super().__init__() self.stem1 = Conv(c1, cm, 3, 2, act=nn.ReLU()) self.stem2a = Conv(cm, cm // 2, 2, 1, 0, act=nn.ReLU()) @@ -79,11 +81,14 @@ class HGStem(nn.Module): class HGBlock(nn.Module): - """HG_Block of PPHGNetV2 with 2 convolutions and LightConv. + """ + HG_Block of PPHGNetV2 with 2 convolutions and LightConv. + https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py """ def __init__(self, c1, cm, c2, k=3, n=6, lightconv=False, shortcut=False, act=nn.ReLU()): + """Initializes a CSP Bottleneck with 1 convolution using specified input and output channels.""" super().__init__() block = LightConv if lightconv else Conv self.m = nn.ModuleList(block(c1 if i == 0 else cm, cm, k=k, act=act) for i in range(n)) @@ -218,6 +223,7 @@ class RepC3(nn.Module): """Rep C3.""" def __init__(self, c1, c2, n=3, e=1.0): + """Initialize CSP Bottleneck with a single convolution using input channels, output channels, and number.""" super().__init__() c_ = int(c2 * e) # hidden channels self.cv1 = Conv(c1, c2, 1, 1) diff --git a/ultralytics/nn/modules/conv.py b/ultralytics/nn/modules/conv.py index 77e99c009e..21a27009dd 100644 --- a/ultralytics/nn/modules/conv.py +++ b/ultralytics/nn/modules/conv.py @@ -1,7 +1,5 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license -""" -Convolution modules -""" +"""Convolution modules.""" import math @@ -69,7 +67,9 @@ class Conv2(Conv): class LightConv(nn.Module): - """Light convolution with args(ch_in, ch_out, kernel). + """ + Light convolution with args(ch_in, ch_out, kernel). + https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py """ @@ -148,12 +148,15 @@ class GhostConv(nn.Module): class RepConv(nn.Module): """ - RepConv is a basic rep-style block, including training and deploy status. This module is used in RT-DETR. + RepConv is a basic rep-style block, including training and deploy status. + + This module is used in RT-DETR. Based on https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py """ default_act = nn.SiLU() # default activation def __init__(self, c1, c2, k=3, s=1, p=1, g=1, d=1, act=True, bn=False, deploy=False): + """Initializes Light Convolution layer with inputs, outputs & optional activation function.""" super().__init__() assert k == 3 and p == 1 self.g = g @@ -166,27 +169,30 @@ class RepConv(nn.Module): self.conv2 = Conv(c1, c2, 1, s, p=(p - k // 2), g=g, act=False) def forward_fuse(self, x): - """Forward process""" + """Forward process.""" return self.act(self.conv(x)) def forward(self, x): - """Forward process""" + """Forward process.""" id_out = 0 if self.bn is None else self.bn(x) return self.act(self.conv1(x) + self.conv2(x) + id_out) def get_equivalent_kernel_bias(self): + """Returns equivalent kernel and bias by adding 3x3 kernel, 1x1 kernel and identity kernel with their biases.""" kernel3x3, bias3x3 = self._fuse_bn_tensor(self.conv1) kernel1x1, bias1x1 = self._fuse_bn_tensor(self.conv2) kernelid, biasid = self._fuse_bn_tensor(self.bn) return kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1) + kernelid, bias3x3 + bias1x1 + biasid def _pad_1x1_to_3x3_tensor(self, kernel1x1): + """Pads a 1x1 tensor to a 3x3 tensor.""" if kernel1x1 is None: return 0 else: return torch.nn.functional.pad(kernel1x1, [1, 1, 1, 1]) def _fuse_bn_tensor(self, branch): + """Generates appropriate kernels and biases for convolution by fusing branches of the neural network.""" if branch is None: return 0, 0 if isinstance(branch, Conv): @@ -214,6 +220,7 @@ class RepConv(nn.Module): return kernel * t, beta - running_mean * gamma / std def fuse_convs(self): + """Combines two convolution layers into a single layer and removes unused attributes from the class.""" if hasattr(self, 'conv'): return kernel, bias = self.get_equivalent_kernel_bias() @@ -243,12 +250,14 @@ class ChannelAttention(nn.Module): """Channel-attention module https://github.com/open-mmlab/mmdetection/tree/v3.0.0rc1/configs/rtmdet.""" def __init__(self, channels: int) -> None: + """Initializes the class and sets the basic configurations and instance variables required.""" super().__init__() self.pool = nn.AdaptiveAvgPool2d(1) self.fc = nn.Conv2d(channels, channels, 1, 1, 0, bias=True) self.act = nn.Sigmoid() def forward(self, x: torch.Tensor) -> torch.Tensor: + """Applies forward pass using activation on convolutions of the input, optionally using batch normalization.""" return x * self.act(self.fc(self.pool(x))) diff --git a/ultralytics/nn/modules/head.py b/ultralytics/nn/modules/head.py index 0b02eb3c64..9e993d7994 100644 --- a/ultralytics/nn/modules/head.py +++ b/ultralytics/nn/modules/head.py @@ -1,7 +1,5 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license -""" -Model head modules -""" +"""Model head modules.""" import math @@ -229,6 +227,7 @@ class RTDETRDecoder(nn.Module): self._reset_parameters() def forward(self, x, batch=None): + """Runs the forward pass of the module, returning bounding box and classification scores for the input.""" from ultralytics.models.utils.ops import get_cdn_group # input projection and embedding @@ -265,6 +264,7 @@ class RTDETRDecoder(nn.Module): return y if self.export else (y, x) def _generate_anchors(self, shapes, grid_size=0.05, dtype=torch.float32, device='cpu', eps=1e-2): + """Generates anchor bounding boxes for given shapes with specific grid size and validates them.""" anchors = [] for i, (h, w) in enumerate(shapes): sy = torch.arange(end=h, dtype=dtype, device=device) @@ -284,6 +284,7 @@ class RTDETRDecoder(nn.Module): return anchors, valid_mask def _get_encoder_input(self, x): + """Processes and returns encoder inputs by getting projection features from input and concatenating them.""" # get projection features x = [self.input_proj[i](feat) for i, feat in enumerate(x)] # get encoder inputs @@ -301,6 +302,7 @@ class RTDETRDecoder(nn.Module): return feats, shapes def _get_decoder_input(self, feats, shapes, dn_embed=None, dn_bbox=None): + """Generates and prepares the input required for the decoder from the provided features and shapes.""" bs = len(feats) # prepare input for decoder anchors, valid_mask = self._generate_anchors(shapes, dtype=feats.dtype, device=feats.device) @@ -339,6 +341,7 @@ class RTDETRDecoder(nn.Module): # TODO def _reset_parameters(self): + """Initializes or resets the parameters of the model's various components with predefined weights and biases.""" # class and bbox head init bias_cls = bias_init_with_prob(0.01) / 80 * self.nc # NOTE: the weight initialization in `linear_init_` would cause NaN when training with custom datasets. diff --git a/ultralytics/nn/modules/transformer.py b/ultralytics/nn/modules/transformer.py index 9a51d2cb71..4b7c0868ff 100644 --- a/ultralytics/nn/modules/transformer.py +++ b/ultralytics/nn/modules/transformer.py @@ -1,7 +1,5 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license -""" -Transformer modules -""" +"""Transformer modules.""" import math @@ -18,9 +16,10 @@ __all__ = ('TransformerEncoderLayer', 'TransformerLayer', 'TransformerBlock', 'M class TransformerEncoderLayer(nn.Module): - """Transformer Encoder.""" + """Defines a single layer of the transformer encoder.""" def __init__(self, c1, cm=2048, num_heads=8, dropout=0.0, act=nn.GELU(), normalize_before=False): + """Initialize the TransformerEncoderLayer with specified parameters.""" super().__init__() from ...utils.torch_utils import TORCH_1_9 if not TORCH_1_9: @@ -41,10 +40,11 @@ class TransformerEncoderLayer(nn.Module): self.normalize_before = normalize_before def with_pos_embed(self, tensor, pos=None): - """Add position embeddings if given.""" + """Add position embeddings to the tensor if provided.""" return tensor if pos is None else tensor + pos def forward_post(self, src, src_mask=None, src_key_padding_mask=None, pos=None): + """Performs forward pass with post-normalization.""" q = k = self.with_pos_embed(src, pos) src2 = self.ma(q, k, value=src, attn_mask=src_mask, key_padding_mask=src_key_padding_mask)[0] src = src + self.dropout1(src2) @@ -54,6 +54,7 @@ class TransformerEncoderLayer(nn.Module): return self.norm2(src) def forward_pre(self, src, src_mask=None, src_key_padding_mask=None, pos=None): + """Performs forward pass with pre-normalization.""" src2 = self.norm1(src) q = k = self.with_pos_embed(src2, pos) src2 = self.ma(q, k, value=src2, attn_mask=src_mask, key_padding_mask=src_key_padding_mask)[0] @@ -70,11 +71,14 @@ class TransformerEncoderLayer(nn.Module): class AIFI(TransformerEncoderLayer): + """Defines the AIFI transformer layer.""" def __init__(self, c1, cm=2048, num_heads=8, dropout=0, act=nn.GELU(), normalize_before=False): + """Initialize the AIFI instance with specified parameters.""" super().__init__(c1, cm, num_heads, dropout, act, normalize_before) def forward(self, x): + """Forward pass for the AIFI transformer layer.""" c, h, w = x.shape[1:] pos_embed = self.build_2d_sincos_position_embedding(w, h, c) # flatten [B, C, H, W] to [B, HxW, C] @@ -82,7 +86,8 @@ class AIFI(TransformerEncoderLayer): return x.permute(0, 2, 1).view([-1, c, h, w]).contiguous() @staticmethod - def build_2d_sincos_position_embedding(w, h, embed_dim=256, temperature=10000.): + def build_2d_sincos_position_embedding(w, h, embed_dim=256, temperature=10000.0): + """Builds 2D sine-cosine position embedding.""" grid_w = torch.arange(int(w), dtype=torch.float32) grid_h = torch.arange(int(h), dtype=torch.float32) grid_w, grid_h = torch.meshgrid(grid_w, grid_h, indexing='ij') @@ -140,27 +145,32 @@ class TransformerBlock(nn.Module): class MLPBlock(nn.Module): + """Implements a single block of a multi-layer perceptron.""" def __init__(self, embedding_dim, mlp_dim, act=nn.GELU): + """Initialize the MLPBlock with specified embedding dimension, MLP dimension, and activation function.""" super().__init__() self.lin1 = nn.Linear(embedding_dim, mlp_dim) self.lin2 = nn.Linear(mlp_dim, embedding_dim) self.act = act() def forward(self, x: torch.Tensor) -> torch.Tensor: + """Forward pass for the MLPBlock.""" return self.lin2(self.act(self.lin1(x))) class MLP(nn.Module): - """ Very simple multi-layer perceptron (also called FFN)""" + """Implements a simple multi-layer perceptron (also called FFN).""" def __init__(self, input_dim, hidden_dim, output_dim, num_layers): + """Initialize the MLP with specified input, hidden, output dimensions and number of layers.""" super().__init__() self.num_layers = num_layers h = [hidden_dim] * (num_layers - 1) self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim])) def forward(self, x): + """Forward pass for the entire MLP.""" for i, layer in enumerate(self.layers): x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x) return x @@ -168,17 +178,22 @@ class MLP(nn.Module): class LayerNorm2d(nn.Module): """ - LayerNorm2d module from https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/batch_norm.py + 2D Layer Normalization module inspired by Detectron2 and ConvNeXt implementations. + + Original implementation at + https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/batch_norm.py https://github.com/facebookresearch/ConvNeXt/blob/d1fa8f6fef0a165b27399986cc2bdacc92777e40/models/convnext.py#L119 """ def __init__(self, num_channels, eps=1e-6): + """Initialize LayerNorm2d with the given parameters.""" super().__init__() self.weight = nn.Parameter(torch.ones(num_channels)) self.bias = nn.Parameter(torch.zeros(num_channels)) self.eps = eps def forward(self, x): + """Perform forward pass for 2D layer normalization.""" u = x.mean(1, keepdim=True) s = (x - u).pow(2).mean(1, keepdim=True) x = (x - u) / torch.sqrt(s + self.eps) @@ -187,11 +202,13 @@ class LayerNorm2d(nn.Module): class MSDeformAttn(nn.Module): """ - Original Multi-Scale Deformable Attention Module. + Multi-Scale Deformable Attention Module based on Deformable-DETR and PaddleDetection implementations. + https://github.com/fundamentalvision/Deformable-DETR/blob/main/models/ops/modules/ms_deform_attn.py """ def __init__(self, d_model=256, n_levels=4, n_heads=8, n_points=4): + """Initialize MSDeformAttn with the given parameters.""" super().__init__() if d_model % n_heads != 0: raise ValueError(f'd_model must be divisible by n_heads, but got {d_model} and {n_heads}') @@ -214,6 +231,7 @@ class MSDeformAttn(nn.Module): self._reset_parameters() def _reset_parameters(self): + """Reset module parameters.""" constant_(self.sampling_offsets.weight.data, 0.) thetas = torch.arange(self.n_heads, dtype=torch.float32) * (2.0 * math.pi / self.n_heads) grid_init = torch.stack([thetas.cos(), thetas.sin()], -1) @@ -232,7 +250,10 @@ class MSDeformAttn(nn.Module): def forward(self, query, refer_bbox, value, value_shapes, value_mask=None): """ + Perform forward pass for multi-scale deformable attention. + https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/transformers/deformable_transformer.py + Args: query (torch.Tensor): [bs, query_length, C] refer_bbox (torch.Tensor): [bs, query_length, n_levels, 2], range in [0, 1], top-left (0,0), @@ -272,24 +293,27 @@ class MSDeformAttn(nn.Module): class DeformableTransformerDecoderLayer(nn.Module): """ + Deformable Transformer Decoder Layer inspired by PaddleDetection and Deformable-DETR implementations. + https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/transformers/deformable_transformer.py https://github.com/fundamentalvision/Deformable-DETR/blob/main/models/deformable_transformer.py """ def __init__(self, d_model=256, n_heads=8, d_ffn=1024, dropout=0., act=nn.ReLU(), n_levels=4, n_points=4): + """Initialize the DeformableTransformerDecoderLayer with the given parameters.""" super().__init__() - # self attention + # Self attention self.self_attn = nn.MultiheadAttention(d_model, n_heads, dropout=dropout) self.dropout1 = nn.Dropout(dropout) self.norm1 = nn.LayerNorm(d_model) - # cross attention + # Cross attention self.cross_attn = MSDeformAttn(d_model, n_levels, n_heads, n_points) self.dropout2 = nn.Dropout(dropout) self.norm2 = nn.LayerNorm(d_model) - # ffn + # FFN self.linear1 = nn.Linear(d_model, d_ffn) self.act = act self.dropout3 = nn.Dropout(dropout) @@ -299,37 +323,44 @@ class DeformableTransformerDecoderLayer(nn.Module): @staticmethod def with_pos_embed(tensor, pos): + """Add positional embeddings to the input tensor, if provided.""" return tensor if pos is None else tensor + pos def forward_ffn(self, tgt): + """Perform forward pass through the Feed-Forward Network part of the layer.""" tgt2 = self.linear2(self.dropout3(self.act(self.linear1(tgt)))) tgt = tgt + self.dropout4(tgt2) return self.norm3(tgt) def forward(self, embed, refer_bbox, feats, shapes, padding_mask=None, attn_mask=None, query_pos=None): - # self attention + """Perform the forward pass through the entire decoder layer.""" + + # Self attention q = k = self.with_pos_embed(embed, query_pos) tgt = self.self_attn(q.transpose(0, 1), k.transpose(0, 1), embed.transpose(0, 1), attn_mask=attn_mask)[0].transpose(0, 1) embed = embed + self.dropout1(tgt) embed = self.norm1(embed) - # cross attention + # Cross attention tgt = self.cross_attn(self.with_pos_embed(embed, query_pos), refer_bbox.unsqueeze(2), feats, shapes, padding_mask) embed = embed + self.dropout2(tgt) embed = self.norm2(embed) - # ffn + # FFN return self.forward_ffn(embed) class DeformableTransformerDecoder(nn.Module): """ + Implementation of Deformable Transformer Decoder based on PaddleDetection. + https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/transformers/deformable_transformer.py """ def __init__(self, hidden_dim, decoder_layer, num_layers, eval_idx=-1): + """Initialize the DeformableTransformerDecoder with the given parameters.""" super().__init__() self.layers = _get_clones(decoder_layer, num_layers) self.num_layers = num_layers @@ -347,6 +378,7 @@ class DeformableTransformerDecoder(nn.Module): pos_mlp, attn_mask=None, padding_mask=None): + """Perform the forward pass through the entire decoder.""" output = embed dec_bboxes = [] dec_cls = [] diff --git a/ultralytics/nn/modules/utils.py b/ultralytics/nn/modules/utils.py index f8636dc479..c7bec7aff6 100644 --- a/ultralytics/nn/modules/utils.py +++ b/ultralytics/nn/modules/utils.py @@ -1,7 +1,5 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license -""" -Module utils -""" +"""Module utils.""" import copy import math @@ -16,15 +14,17 @@ __all__ = 'multi_scale_deformable_attn_pytorch', 'inverse_sigmoid' def _get_clones(module, n): + """Create a list of cloned modules from the given module.""" return nn.ModuleList([copy.deepcopy(module) for _ in range(n)]) def bias_init_with_prob(prior_prob=0.01): - """initialize conv/fc bias value according to a given probability value.""" + """Initialize conv/fc bias value according to a given probability value.""" return float(-np.log((1 - prior_prob) / prior_prob)) # return bias_init def linear_init_(module): + """Initialize the weights and biases of a linear module.""" bound = 1 / math.sqrt(module.weight.shape[0]) uniform_(module.weight, -bound, bound) if hasattr(module, 'bias') and module.bias is not None: @@ -32,6 +32,7 @@ def linear_init_(module): def inverse_sigmoid(x, eps=1e-5): + """Calculate the inverse sigmoid function for a tensor.""" x = x.clamp(min=0, max=1) x1 = x.clamp(min=eps) x2 = (1 - x).clamp(min=eps) @@ -43,6 +44,7 @@ def multi_scale_deformable_attn_pytorch(value: torch.Tensor, value_spatial_shape attention_weights: torch.Tensor) -> torch.Tensor: """ Multi-scale deformable attention. + https://github.com/IDEA-Research/detrex/blob/main/detrex/layers/multi_scale_deform_attn.py """ diff --git a/ultralytics/nn/tasks.py b/ultralytics/nn/tasks.py index 24153d244c..6145146ba0 100644 --- a/ultralytics/nn/tasks.py +++ b/ultralytics/nn/tasks.py @@ -25,14 +25,11 @@ except ImportError: class BaseModel(nn.Module): - """ - The BaseModel class serves as a base class for all the models in the Ultralytics YOLO family. - """ + """The BaseModel class serves as a base class for all the models in the Ultralytics YOLO family.""" def forward(self, x, *args, **kwargs): """ - Forward pass of the model on a single scale. - Wrapper for `_forward_once` method. + Forward pass of the model on a single scale. Wrapper for `_forward_once` method. Args: x (torch.Tensor | dict): The input image tensor or a dict including image tensor and gt labels. @@ -93,8 +90,8 @@ class BaseModel(nn.Module): def _profile_one_layer(self, m, x, dt): """ - Profile the computation time and FLOPs of a single layer of the model on a given input. - Appends the results to the provided list. + Profile the computation time and FLOPs of a single layer of the model on a given input. Appends the results to + the provided list. Args: m (nn.Module): The layer to be profiled. @@ -158,7 +155,7 @@ class BaseModel(nn.Module): def info(self, detailed=False, verbose=True, imgsz=640): """ - Prints model information + Prints model information. Args: detailed (bool): if True, prints out detailed information about the model. Defaults to False @@ -175,7 +172,7 @@ class BaseModel(nn.Module): fn (function): the function to apply to the model Returns: - A model that is a Detect() object. + (BaseModel): An updated BaseModel object. """ self = super()._apply(fn) m = self.model[-1] # Detect() @@ -202,7 +199,7 @@ class BaseModel(nn.Module): def loss(self, batch, preds=None): """ - Compute loss + Compute loss. Args: batch (dict): Batch to compute loss on @@ -215,6 +212,7 @@ class BaseModel(nn.Module): return self.criterion(preds, batch) def init_criterion(self): + """Initialize the loss criterion for the BaseModel.""" raise NotImplementedError('compute_loss() needs to be implemented by task heads') @@ -222,6 +220,7 @@ class DetectionModel(BaseModel): """YOLOv8 detection model.""" def __init__(self, cfg='yolov8n.yaml', ch=3, nc=None, verbose=True): # model, input channels, number of classes + """Initialize the YOLOv8 detection model with the given config and parameters.""" super().__init__() self.yaml = cfg if isinstance(cfg, dict) else yaml_model_load(cfg) # cfg dict @@ -289,6 +288,7 @@ class DetectionModel(BaseModel): return y def init_criterion(self): + """Initialize the loss criterion for the DetectionModel.""" return v8DetectionLoss(self) @@ -300,6 +300,7 @@ class SegmentationModel(DetectionModel): super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose) def init_criterion(self): + """Initialize the loss criterion for the SegmentationModel.""" return v8SegmentationLoss(self) @@ -316,6 +317,7 @@ class PoseModel(DetectionModel): super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose) def init_criterion(self): + """Initialize the loss criterion for the PoseModel.""" return v8PoseLoss(self) @@ -365,22 +367,59 @@ class ClassificationModel(BaseModel): m[i] = nn.Conv2d(m[i].in_channels, nc, m[i].kernel_size, m[i].stride, bias=m[i].bias is not None) def init_criterion(self): - """Compute the classification loss between predictions and true labels.""" + """Initialize the loss criterion for the ClassificationModel.""" return v8ClassificationLoss() class RTDETRDetectionModel(DetectionModel): + """ + RTDETR (Real-time DEtection and Tracking using Transformers) Detection Model class. + + This class is responsible for constructing the RTDETR architecture, defining loss functions, and + facilitating both the training and inference processes. RTDETR is an object detection and tracking model + that extends from the DetectionModel base class. + + Attributes: + cfg (str): The configuration file path or preset string. Default is 'rtdetr-l.yaml'. + ch (int): Number of input channels. Default is 3 (RGB). + nc (int, optional): Number of classes for object detection. Default is None. + verbose (bool): Specifies if summary statistics are shown during initialization. Default is True. + + Methods: + init_criterion: Initializes the criterion used for loss calculation. + loss: Computes and returns the loss during training. + predict: Performs a forward pass through the network and returns the output. + """ def __init__(self, cfg='rtdetr-l.yaml', ch=3, nc=None, verbose=True): + """ + Initialize the RTDETRDetectionModel. + + Args: + cfg (str): Configuration file name or path. + ch (int): Number of input channels. + nc (int, optional): Number of classes. Defaults to None. + verbose (bool, optional): Print additional information during initialization. Defaults to True. + """ super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose) def init_criterion(self): - """Compute the classification loss between predictions and true labels.""" + """Initialize the loss criterion for the RTDETRDetectionModel.""" from ultralytics.models.utils.loss import RTDETRDetectionLoss return RTDETRDetectionLoss(nc=self.nc, use_vfl=True) def loss(self, batch, preds=None): + """ + Compute the loss for the given batch of data. + + Args: + batch (dict): Dictionary containing image and label data. + preds (torch.Tensor, optional): Precomputed model predictions. Defaults to None. + + Returns: + tuple: A tuple containing the total loss and main three losses in a tensor. + """ if not hasattr(self, 'criterion'): self.criterion = self.init_criterion() @@ -417,16 +456,17 @@ class RTDETRDetectionModel(DetectionModel): def predict(self, x, profile=False, visualize=False, batch=None, augment=False): """ - Perform a forward pass through the network. + Perform a forward pass through the model. Args: - x (torch.Tensor): The input tensor to the model - profile (bool): Print the computation time of each layer if True, defaults to False. - visualize (bool): Save the feature maps of the model if True, defaults to False - batch (dict): A dict including gt boxes and labels from dataloader. + x (torch.Tensor): The input tensor. + profile (bool, optional): If True, profile the computation time for each layer. Defaults to False. + visualize (bool, optional): If True, save feature maps for visualization. Defaults to False. + batch (dict, optional): Ground truth data for evaluation. Defaults to None. + augment (bool, optional): If True, perform data augmentation during inference. Defaults to False. Returns: - (torch.Tensor): The last output of the model. + torch.Tensor: Model's output tensor. """ y, dt = [], [] # outputs for m in self.model[:-1]: # except the head part @@ -708,9 +748,9 @@ def yaml_model_load(path): def guess_model_scale(model_path): """ - Takes a path to a YOLO model's YAML file as input and extracts the size character of the model's scale. - The function uses regular expression matching to find the pattern of the model scale in the YAML file name, - which is denoted by n, s, m, l, or x. The function returns the size character of the model scale as a string. + Takes a path to a YOLO model's YAML file as input and extracts the size character of the model's scale. The function + uses regular expression matching to find the pattern of the model scale in the YAML file name, which is denoted by + n, s, m, l, or x. The function returns the size character of the model scale as a string. Args: model_path (str | Path): The path to the YOLO model's YAML file. diff --git a/ultralytics/trackers/bot_sort.py b/ultralytics/trackers/bot_sort.py index d42d46ebdd..543d75262f 100644 --- a/ultralytics/trackers/bot_sort.py +++ b/ultralytics/trackers/bot_sort.py @@ -12,6 +12,33 @@ from .utils.kalman_filter import KalmanFilterXYWH class BOTrack(STrack): + """ + An extended version of the STrack class for YOLOv8, adding object tracking features. + + Attributes: + shared_kalman (KalmanFilterXYWH): A shared Kalman filter for all instances of BOTrack. + smooth_feat (np.ndarray): Smoothed feature vector. + curr_feat (np.ndarray): Current feature vector. + features (deque): A deque to store feature vectors with a maximum length defined by `feat_history`. + alpha (float): Smoothing factor for the exponential moving average of features. + mean (np.ndarray): The mean state of the Kalman filter. + covariance (np.ndarray): The covariance matrix of the Kalman filter. + + Methods: + update_features(feat): Update features vector and smooth it using exponential moving average. + predict(): Predicts the mean and covariance using Kalman filter. + re_activate(new_track, frame_id, new_id): Reactivates a track with updated features and optionally new ID. + update(new_track, frame_id): Update the YOLOv8 instance with new track and frame ID. + tlwh: Property that gets the current position in tlwh format `(top left x, top left y, width, height)`. + multi_predict(stracks): Predicts the mean and covariance of multiple object tracks using shared Kalman filter. + convert_coords(tlwh): Converts tlwh bounding box coordinates to xywh format. + tlwh_to_xywh(tlwh): Convert bounding box to xywh format `(center x, center y, width, height)`. + + Usage: + bo_track = BOTrack(tlwh, score, cls, feat) + bo_track.predict() + bo_track.update(new_track, frame_id) + """ shared_kalman = KalmanFilterXYWH() def __init__(self, tlwh, score, cls, feat=None, feat_history=50): @@ -59,9 +86,7 @@ class BOTrack(STrack): @property def tlwh(self): - """Get current position in bounding box format `(top left x, top left y, - width, height)`. - """ + """Get current position in bounding box format `(top left x, top left y, width, height)`.""" if self.mean is None: return self._tlwh.copy() ret = self.mean[:4].copy() @@ -90,15 +115,37 @@ class BOTrack(STrack): @staticmethod def tlwh_to_xywh(tlwh): - """Convert bounding box to format `(center x, center y, width, - height)`. - """ + """Convert bounding box to format `(center x, center y, width, height)`.""" ret = np.asarray(tlwh).copy() ret[:2] += ret[2:] / 2 return ret class BOTSORT(BYTETracker): + """ + An extended version of the BYTETracker class for YOLOv8, designed for object tracking with ReID and GMC algorithm. + + Attributes: + proximity_thresh (float): Threshold for spatial proximity (IoU) between tracks and detections. + appearance_thresh (float): Threshold for appearance similarity (ReID embeddings) between tracks and detections. + encoder (object): Object to handle ReID embeddings, set to None if ReID is not enabled. + gmc (GMC): An instance of the GMC algorithm for data association. + args (object): Parsed command-line arguments containing tracking parameters. + + Methods: + get_kalmanfilter(): Returns an instance of KalmanFilterXYWH for object tracking. + init_track(dets, scores, cls, img): Initialize track with detections, scores, and classes. + get_dists(tracks, detections): Get distances between tracks and detections using IoU and (optionally) ReID. + multi_predict(tracks): Predict and track multiple objects with YOLOv8 model. + + Usage: + bot_sort = BOTSORT(args, frame_rate) + bot_sort.init_track(dets, scores, cls, img) + bot_sort.multi_predict(tracks) + + Note: + The class is designed to work with the YOLOv8 object detection model and supports ReID only if enabled via args. + """ def __init__(self, args, frame_rate=30): """Initialize YOLOv8 object with ReID module and GMC algorithm.""" diff --git a/ultralytics/trackers/byte_tracker.py b/ultralytics/trackers/byte_tracker.py index 04958cda6f..40954c98b4 100644 --- a/ultralytics/trackers/byte_tracker.py +++ b/ultralytics/trackers/byte_tracker.py @@ -8,10 +8,43 @@ from .utils.kalman_filter import KalmanFilterXYAH class STrack(BaseTrack): + """ + Single object tracking representation that uses Kalman filtering for state estimation. + + This class is responsible for storing all the information regarding individual tracklets and performs state updates + and predictions based on Kalman filter. + + Attributes: + shared_kalman (KalmanFilterXYAH): Shared Kalman filter that is used across all STrack instances for prediction. + _tlwh (np.ndarray): Private attribute to store top-left corner coordinates and width and height of bounding box. + kalman_filter (KalmanFilterXYAH): Instance of Kalman filter used for this particular object track. + mean (np.ndarray): Mean state estimate vector. + covariance (np.ndarray): Covariance of state estimate. + is_activated (bool): Boolean flag indicating if the track has been activated. + score (float): Confidence score of the track. + tracklet_len (int): Length of the tracklet. + cls (any): Class label for the object. + idx (int): Index or identifier for the object. + frame_id (int): Current frame ID. + start_frame (int): Frame where the object was first detected. + + Methods: + predict(): Predict the next state of the object using Kalman filter. + multi_predict(stracks): Predict the next states for multiple tracks. + multi_gmc(stracks, H): Update multiple track states using a homography matrix. + activate(kalman_filter, frame_id): Activate a new tracklet. + re_activate(new_track, frame_id, new_id): Reactivate a previously lost tracklet. + update(new_track, frame_id): Update the state of a matched track. + convert_coords(tlwh): Convert bounding box to x-y-angle-height format. + tlwh_to_xyah(tlwh): Convert tlwh bounding box to xyah format. + tlbr_to_tlwh(tlbr): Convert tlbr bounding box to tlwh format. + tlwh_to_tlbr(tlwh): Convert tlwh bounding box to tlbr format. + """ + shared_kalman = KalmanFilterXYAH() def __init__(self, tlwh, score, cls): - """wait activate.""" + """Initialize new STrack instance.""" self._tlwh = np.asarray(self.tlbr_to_tlwh(tlwh[:-1]), dtype=np.float32) self.kalman_filter = None self.mean, self.covariance = None, None @@ -92,10 +125,11 @@ class STrack(BaseTrack): def update(self, new_track, frame_id): """ - Update a matched track - :type new_track: STrack - :type frame_id: int - :return: + Update the state of a matched track. + + Args: + new_track (STrack): The new track containing updated information. + frame_id (int): The ID of the current frame. """ self.frame_id = frame_id self.tracklet_len += 1 @@ -116,9 +150,7 @@ class STrack(BaseTrack): @property def tlwh(self): - """Get current position in bounding box format `(top left x, top left y, - width, height)`. - """ + """Get current position in bounding box format (top left x, top left y, width, height).""" if self.mean is None: return self._tlwh.copy() ret = self.mean[:4].copy() @@ -128,17 +160,15 @@ class STrack(BaseTrack): @property def tlbr(self): - """Convert bounding box to format `(min x, min y, max x, max y)`, i.e., - `(top left, bottom right)`. - """ + """Convert bounding box to format (min x, min y, max x, max y), i.e., (top left, bottom right).""" ret = self.tlwh.copy() ret[2:] += ret[:2] return ret @staticmethod def tlwh_to_xyah(tlwh): - """Convert bounding box to format `(center x, center y, aspect ratio, - height)`, where the aspect ratio is `width / height`. + """Convert bounding box to format (center x, center y, aspect ratio, height), where the aspect ratio is width / + height. """ ret = np.asarray(tlwh).copy() ret[:2] += ret[2:] / 2 @@ -165,6 +195,33 @@ class STrack(BaseTrack): class BYTETracker: + """ + BYTETracker: A tracking algorithm built on top of YOLOv8 for object detection and tracking. + + The class is responsible for initializing, updating, and managing the tracks for detected objects in a video + sequence. It maintains the state of tracked, lost, and removed tracks over frames, utilizes Kalman filtering for + predicting the new object locations, and performs data association. + + Attributes: + tracked_stracks (list[STrack]): List of successfully activated tracks. + lost_stracks (list[STrack]): List of lost tracks. + removed_stracks (list[STrack]): List of removed tracks. + frame_id (int): The current frame ID. + args (namespace): Command-line arguments. + max_time_lost (int): The maximum frames for a track to be considered as 'lost'. + kalman_filter (object): Kalman Filter object. + + Methods: + update(results, img=None): Updates object tracker with new detections. + get_kalmanfilter(): Returns a Kalman filter object for tracking bounding boxes. + init_track(dets, scores, cls, img=None): Initialize object tracking with detections. + get_dists(tracks, detections): Calculates the distance between tracks and detections. + multi_predict(tracks): Predicts the location of tracks. + reset_id(): Resets the ID counter of STrack. + joint_stracks(tlista, tlistb): Combines two lists of stracks. + sub_stracks(tlista, tlistb): Filters out the stracks present in the second list from the first list. + remove_duplicate_stracks(stracksa, stracksb): Removes duplicate stracks based on IOU. + """ def __init__(self, args, frame_rate=30): """Initialize a YOLOv8 object to track objects with given arguments and frame rate.""" @@ -234,8 +291,7 @@ class BYTETracker: else: track.re_activate(det, self.frame_id, new_id=False) refind_stracks.append(track) - # Step 3: Second association, with low score detection boxes - # association the untrack to the low score detections + # Step 3: Second association, with low score detection boxes association the untrack to the low score detections detections_second = self.init_track(dets_second, scores_second, cls_second, img) r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked] # TODO diff --git a/ultralytics/trackers/track.py b/ultralytics/trackers/track.py index cfb4b08adc..cf06c033cf 100644 --- a/ultralytics/trackers/track.py +++ b/ultralytics/trackers/track.py @@ -60,7 +60,6 @@ def register_tracker(model, persist): Args: model (object): The model object to register tracking callbacks for. persist (bool): Whether to persist the trackers if they already exist. - """ model.add_callback('on_predict_start', partial(on_predict_start, persist=persist)) model.add_callback('on_predict_postprocess_end', on_predict_postprocess_end) diff --git a/ultralytics/trackers/utils/gmc.py b/ultralytics/trackers/utils/gmc.py index 4d91df45f2..24e4907d8e 100644 --- a/ultralytics/trackers/utils/gmc.py +++ b/ultralytics/trackers/utils/gmc.py @@ -9,6 +9,29 @@ from ultralytics.utils import LOGGER class GMC: + """ + Generalized Motion Compensation (GMC) class for tracking and object detection in video frames. + + This class provides methods for tracking and detecting objects based on several tracking algorithms including ORB, + SIFT, ECC, and Sparse Optical Flow. It also supports downscaling of frames for computational efficiency. + + Attributes: + method (str): The method used for tracking. Options include 'orb', 'sift', 'ecc', 'sparseOptFlow', 'none'. + downscale (int): Factor by which to downscale the frames for processing. + prevFrame (np.array): Stores the previous frame for tracking. + prevKeyPoints (list): Stores the keypoints from the previous frame. + prevDescriptors (np.array): Stores the descriptors from the previous frame. + initializedFirstFrame (bool): Flag to indicate if the first frame has been processed. + + Methods: + __init__(self, method='sparseOptFlow', downscale=2): Initializes a GMC object with the specified method + and downscale factor. + apply(self, raw_frame, detections=None): Applies the chosen method to a raw frame and optionally uses + provided detections. + applyEcc(self, raw_frame, detections=None): Applies the ECC algorithm to a raw frame. + applyFeatures(self, raw_frame, detections=None): Applies feature-based methods like ORB or SIFT to a raw frame. + applySparseOptFlow(self, raw_frame, detections=None): Applies the Sparse Optical Flow method to a raw frame. + """ def __init__(self, method='sparseOptFlow', downscale=2): """Initialize a video tracker with specified parameters.""" diff --git a/ultralytics/trackers/utils/kalman_filter.py b/ultralytics/trackers/utils/kalman_filter.py index 9527ede792..d74082745a 100644 --- a/ultralytics/trackers/utils/kalman_filter.py +++ b/ultralytics/trackers/utils/kalman_filter.py @@ -8,8 +8,8 @@ class KalmanFilterXYAH: """ For bytetrack. A simple Kalman filter for tracking bounding boxes in image space. - The 8-dimensional state space (x, y, a, h, vx, vy, va, vh) contains the bounding box center position (x, y), - aspect ratio a, height h, and their respective velocities. + The 8-dimensional state space (x, y, a, h, vx, vy, va, vh) contains the bounding box center position (x, y), aspect + ratio a, height h, and their respective velocities. Object motion follows a constant velocity model. The bounding box location (x, y, a, h) is taken as direct observation of the state space (linear observation model). @@ -182,8 +182,8 @@ class KalmanFilterXYAH: def gating_distance(self, mean, covariance, measurements, only_position=False, metric='maha'): """ Compute gating distance between state distribution and measurements. A suitable distance threshold can be - obtained from `chi2inv95`. If `only_position` is False, the chi-square distribution has 4 degrees of - freedom, otherwise 2. + obtained from `chi2inv95`. If `only_position` is False, the chi-square distribution has 4 degrees of freedom, + otherwise 2. Parameters ---------- @@ -223,8 +223,8 @@ class KalmanFilterXYWH(KalmanFilterXYAH): """ For BoT-SORT. A simple Kalman filter for tracking bounding boxes in image space. - The 8-dimensional state space (x, y, w, h, vx, vy, vw, vh) contains the bounding box center position (x, y), - width w, height h, and their respective velocities. + The 8-dimensional state space (x, y, w, h, vx, vy, vw, vh) contains the bounding box center position (x, y), width + w, height h, and their respective velocities. Object motion follows a constant velocity model. The bounding box location (x, y, w, h) is taken as direct observation of the state space (linear observation model). diff --git a/ultralytics/utils/__init__.py b/ultralytics/utils/__init__.py index c449013643..d8e57431a9 100644 --- a/ultralytics/utils/__init__.py +++ b/ultralytics/utils/__init__.py @@ -117,6 +117,7 @@ class TQDM(tqdm_original): """ def __init__(self, *args, **kwargs): + """Initialize custom Ultralytics tqdm class with different default arguments.""" # Set new default values (these can still be overridden when calling TQDM) kwargs['disable'] = not VERBOSE or kwargs.get('disable', False) # logical 'and' with default value if passed kwargs.setdefault('bar_format', TQDM_BAR_FORMAT) # override default value if passed @@ -124,8 +125,7 @@ class TQDM(tqdm_original): class SimpleClass: - """ - Ultralytics SimpleClass is a base class providing helpful string representation, error reporting, and attribute + """Ultralytics SimpleClass is a base class providing helpful string representation, error reporting, and attribute access methods for easier debugging and usage. """ @@ -154,8 +154,7 @@ class SimpleClass: class IterableSimpleNamespace(SimpleNamespace): - """ - Ultralytics IterableSimpleNamespace is an extension class of SimpleNamespace that adds iterable functionality and + """Ultralytics IterableSimpleNamespace is an extension class of SimpleNamespace that adds iterable functionality and enables usage with dict() and for loops. """ @@ -256,8 +255,8 @@ class EmojiFilter(logging.Filter): """ A custom logging filter class for removing emojis in log messages. - This filter is particularly useful for ensuring compatibility with Windows terminals - that may not support the display of emojis in log messages. + This filter is particularly useful for ensuring compatibility with Windows terminals that may not support the + display of emojis in log messages. """ def filter(self, record): @@ -275,9 +274,9 @@ if WINDOWS: # emoji-safe logging class ThreadingLocked: """ - A decorator class for ensuring thread-safe execution of a function or method. - This class can be used as a decorator to make sure that if the decorated function - is called from multiple threads, only one thread at a time will be able to execute the function. + A decorator class for ensuring thread-safe execution of a function or method. This class can be used as a decorator + to make sure that if the decorated function is called from multiple threads, only one thread at a time will be able + to execute the function. Attributes: lock (threading.Lock): A lock object used to manage access to the decorated function. @@ -294,13 +293,16 @@ class ThreadingLocked: """ def __init__(self): + """Initializes the decorator class for thread-safe execution of a function or method.""" self.lock = threading.Lock() def __call__(self, f): + """Run thread-safe execution of function or method.""" from functools import wraps @wraps(f) def decorated(*args, **kwargs): + """Applies thread-safety to the decorated function or method.""" with self.lock: return f(*args, **kwargs) @@ -424,8 +426,7 @@ def is_kaggle(): def is_jupyter(): """ - Check if the current script is running inside a Jupyter Notebook. - Verified on Colab, Jupyterlab, Kaggle, Paperspace. + Check if the current script is running inside a Jupyter Notebook. Verified on Colab, Jupyterlab, Kaggle, Paperspace. Returns: (bool): True if running inside a Jupyter Notebook, False otherwise. @@ -529,8 +530,8 @@ def is_github_actions_ci() -> bool: def is_git_dir(): """ - Determines whether the current file is part of a git repository. - If the current file is not part of a git repository, returns None. + Determines whether the current file is part of a git repository. If the current file is not part of a git + repository, returns None. Returns: (bool): True if current file is part of a git repository. @@ -540,8 +541,8 @@ def is_git_dir(): def get_git_dir(): """ - Determines whether the current file is part of a git repository and if so, returns the repository root directory. - If the current file is not part of a git repository, returns None. + Determines whether the current file is part of a git repository and if so, returns the repository root directory. If + the current file is not part of a git repository, returns None. Returns: (Path | None): Git root directory if found or None if not found. @@ -578,7 +579,8 @@ def get_git_branch(): def get_default_args(func): - """Returns a dictionary of default arguments for a function. + """ + Returns a dictionary of default arguments for a function. Args: func (callable): The function to inspect. @@ -710,7 +712,11 @@ def remove_colorstr(input_string): class TryExcept(contextlib.ContextDecorator): - """YOLOv8 TryExcept class. Usage: @TryExcept() decorator or 'with TryExcept():' context manager.""" + """ + YOLOv8 TryExcept class. + + Use as @TryExcept() decorator or 'with TryExcept():' context manager. + """ def __init__(self, msg='', verbose=True): """Initialize TryExcept class with optional message and verbosity settings.""" @@ -729,7 +735,11 @@ class TryExcept(contextlib.ContextDecorator): def threaded(func): - """Multi-threads a target function and returns thread. Usage: @threaded decorator.""" + """ + Multi-threads a target function and returns thread. + + Use as @threaded decorator. + """ def wrapper(*args, **kwargs): """Multi-threads a given function and returns the thread.""" @@ -824,6 +834,9 @@ class SettingsManager(dict): """ def __init__(self, file=SETTINGS_YAML, version='0.0.4'): + """Initialize the SettingsManager with default settings, load and validate current settings from the YAML + file. + """ import copy import hashlib diff --git a/ultralytics/utils/autobatch.py b/ultralytics/utils/autobatch.py index 4e9ed07c7c..89f7e99fc5 100644 --- a/ultralytics/utils/autobatch.py +++ b/ultralytics/utils/autobatch.py @@ -1,7 +1,5 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license -""" -Functions for estimating the best YOLO batch size to use a fraction of the available CUDA memory in PyTorch. -""" +"""Functions for estimating the best YOLO batch size to use a fraction of the available CUDA memory in PyTorch.""" from copy import deepcopy diff --git a/ultralytics/utils/benchmarks.py b/ultralytics/utils/benchmarks.py index e4135bc85b..bf86b53559 100644 --- a/ultralytics/utils/benchmarks.py +++ b/ultralytics/utils/benchmarks.py @@ -1,6 +1,6 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license """ -Benchmark a YOLO model formats for speed and accuracy +Benchmark a YOLO model formats for speed and accuracy. Usage: from ultralytics.utils.benchmarks import ProfileModels, benchmark @@ -194,6 +194,7 @@ class ProfileModels: self.device = device or torch.device(0 if torch.cuda.is_available() else 'cpu') def profile(self): + """Logs the benchmarking results of a model, checks metrics against floor and returns the results.""" files = self.get_files() if not files: @@ -235,6 +236,7 @@ class ProfileModels: return output def get_files(self): + """Returns a list of paths for all relevant model files given by the user.""" files = [] for path in self.paths: path = Path(path) @@ -250,10 +252,14 @@ class ProfileModels: return [Path(file) for file in sorted(files)] def get_onnx_model_info(self, onnx_file: str): + """Retrieves the information including number of layers, parameters, gradients and FLOPs for an ONNX model + file. + """ # return (num_layers, num_params, num_gradients, num_flops) return 0.0, 0.0, 0.0, 0.0 def iterative_sigma_clipping(self, data, sigma=2, max_iters=3): + """Applies an iterative sigma clipping algorithm to the given data times number of iterations.""" data = np.array(data) for _ in range(max_iters): mean, std = np.mean(data), np.std(data) @@ -264,6 +270,7 @@ class ProfileModels: return data def profile_tensorrt_model(self, engine_file: str, eps: float = 1e-3): + """Profiles the TensorRT model, measuring average run time and standard deviation among runs.""" if not self.trt or not Path(engine_file).is_file(): return 0.0, 0.0 @@ -292,6 +299,9 @@ class ProfileModels: return np.mean(run_times), np.std(run_times) def profile_onnx_model(self, onnx_file: str, eps: float = 1e-3): + """Profiles an ONNX model by executing it multiple times and returns the mean and standard deviation of run + times. + """ check_requirements('onnxruntime') import onnxruntime as ort @@ -344,10 +354,12 @@ class ProfileModels: return np.mean(run_times), np.std(run_times) def generate_table_row(self, model_name, t_onnx, t_engine, model_info): + """Generates a formatted string for a table row that includes model performance and metric details.""" layers, params, gradients, flops = model_info return f'| {model_name:18s} | {self.imgsz} | - | {t_onnx[0]:.2f} ± {t_onnx[1]:.2f} ms | {t_engine[0]:.2f} ± {t_engine[1]:.2f} ms | {params / 1e6:.1f} | {flops:.1f} |' def generate_results_dict(self, model_name, t_onnx, t_engine, model_info): + """Generates a dictionary of model details including name, parameters, GFLOPS and speed metrics.""" layers, params, gradients, flops = model_info return { 'model/name': model_name, @@ -357,6 +369,7 @@ class ProfileModels: 'model/speed_TensorRT(ms)': round(t_engine[0], 3)} def print_table(self, table_rows): + """Formats and prints a comparison table for different models with given statistics and performance data.""" gpu = torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'GPU' header = f'| Model | size
(pixels) | mAPval
50-95 | Speed
CPU ONNX
(ms) | Speed
{gpu} TensorRT
(ms) | params
(M) | FLOPs
(B) |' separator = '|-------------|---------------------|--------------------|------------------------------|-----------------------------------|------------------|-----------------|' diff --git a/ultralytics/utils/callbacks/base.py b/ultralytics/utils/callbacks/base.py index ace8bfbf63..211ae5bf7a 100644 --- a/ultralytics/utils/callbacks/base.py +++ b/ultralytics/utils/callbacks/base.py @@ -1,7 +1,5 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license -""" -Base callbacks -""" +"""Base callbacks.""" from collections import defaultdict from copy import deepcopy diff --git a/ultralytics/utils/callbacks/comet.py b/ultralytics/utils/callbacks/comet.py index 2da71a955f..e8016f4eb0 100644 --- a/ultralytics/utils/callbacks/comet.py +++ b/ultralytics/utils/callbacks/comet.py @@ -26,31 +26,38 @@ except (ImportError, AssertionError): def _get_comet_mode(): + """Returns the mode of comet set in the environment variables, defaults to 'online' if not set.""" return os.getenv('COMET_MODE', 'online') def _get_comet_model_name(): + """Returns the model name for Comet from the environment variable 'COMET_MODEL_NAME' or defaults to 'YOLOv8'.""" return os.getenv('COMET_MODEL_NAME', 'YOLOv8') def _get_eval_batch_logging_interval(): + """Get the evaluation batch logging interval from environment variable or use default value 1.""" return int(os.getenv('COMET_EVAL_BATCH_LOGGING_INTERVAL', 1)) def _get_max_image_predictions_to_log(): + """Get the maximum number of image predictions to log from the environment variables.""" return int(os.getenv('COMET_MAX_IMAGE_PREDICTIONS', 100)) def _scale_confidence_score(score): + """Scales the given confidence score by a factor specified in an environment variable.""" scale = float(os.getenv('COMET_MAX_CONFIDENCE_SCORE', 100.0)) return score * scale def _should_log_confusion_matrix(): + """Determines if the confusion matrix should be logged based on the environment variable settings.""" return os.getenv('COMET_EVAL_LOG_CONFUSION_MATRIX', 'false').lower() == 'true' def _should_log_image_predictions(): + """Determines whether to log image predictions based on a specified environment variable.""" return os.getenv('COMET_EVAL_LOG_IMAGE_PREDICTIONS', 'true').lower() == 'true' @@ -104,9 +111,10 @@ def _fetch_trainer_metadata(trainer): def _scale_bounding_box_to_original_image_shape(box, resized_image_shape, original_image_shape, ratio_pad): - """YOLOv8 resizes images during training and the label values - are normalized based on this resized shape. This function rescales the - bounding box labels to the original image shape. + """ + YOLOv8 resizes images during training and the label values are normalized based on this resized shape. + + This function rescales the bounding box labels to the original image shape. """ resized_image_height, resized_image_width = resized_image_shape diff --git a/ultralytics/utils/callbacks/dvc.py b/ultralytics/utils/callbacks/dvc.py index 98e232b8a6..7fa05c6b00 100644 --- a/ultralytics/utils/callbacks/dvc.py +++ b/ultralytics/utils/callbacks/dvc.py @@ -25,6 +25,7 @@ except (ImportError, AssertionError, TypeError): def _log_images(path, prefix=''): + """Logs images at specified path with an optional prefix using DVCLive.""" if live: name = path.name @@ -38,6 +39,7 @@ def _log_images(path, prefix=''): def _log_plots(plots, prefix=''): + """Logs plot images for training progress if they have not been previously processed.""" for name, params in plots.items(): timestamp = params['timestamp'] if _processed_plots.get(name) != timestamp: @@ -46,6 +48,7 @@ def _log_plots(plots, prefix=''): def _log_confusion_matrix(validator): + """Logs the confusion matrix for the given validator using DVCLive.""" targets = [] preds = [] matrix = validator.confusion_matrix.matrix @@ -62,6 +65,7 @@ def _log_confusion_matrix(validator): def on_pretrain_routine_start(trainer): + """Initializes DVCLive logger for training metadata during pre-training routine.""" try: global live live = dvclive.Live(save_dvc_exp=True, cache_images=True) @@ -71,20 +75,24 @@ def on_pretrain_routine_start(trainer): def on_pretrain_routine_end(trainer): + """Logs plots related to the training process at the end of the pretraining routine.""" _log_plots(trainer.plots, 'train') def on_train_start(trainer): + """Logs the training parameters if DVCLive logging is active.""" if live: live.log_params(trainer.args) def on_train_epoch_start(trainer): + """Sets the global variable _training_epoch value to True at the start of training each epoch.""" global _training_epoch _training_epoch = True def on_fit_epoch_end(trainer): + """Logs training metrics and model info, and advances to next step on the end of each fit epoch.""" global _training_epoch if live and _training_epoch: all_metrics = {**trainer.label_loss_items(trainer.tloss, prefix='train'), **trainer.metrics, **trainer.lr} @@ -104,6 +112,7 @@ def on_fit_epoch_end(trainer): def on_train_end(trainer): + """Logs the best metrics, plots, and confusion matrix at the end of training if DVCLive is active.""" if live: # At the end log the best metrics. It runs validator on the best model internally. all_metrics = {**trainer.label_loss_items(trainer.tloss, prefix='train'), **trainer.metrics, **trainer.lr} diff --git a/ultralytics/utils/callbacks/neptune.py b/ultralytics/utils/callbacks/neptune.py index 40916a3cd6..088e3f8e7a 100644 --- a/ultralytics/utils/callbacks/neptune.py +++ b/ultralytics/utils/callbacks/neptune.py @@ -31,14 +31,13 @@ def _log_images(imgs_dict, group=''): def _log_plot(title, plot_path): - """Log plots to the NeptuneAI experiment logger.""" """ - Log image as plot in the plot section of NeptuneAI + Log plots to the NeptuneAI experiment logger. - arguments: - title (str) Title of the plot - plot_path (PosixPath or str) Path to the saved image file - """ + Args: + title (str): Title of the plot. + plot_path (PosixPath | str): Path to the saved image file. + """ import matplotlib.image as mpimg import matplotlib.pyplot as plt diff --git a/ultralytics/utils/callbacks/wb.py b/ultralytics/utils/callbacks/wb.py index dd2f5d320c..b901e3cc52 100644 --- a/ultralytics/utils/callbacks/wb.py +++ b/ultralytics/utils/callbacks/wb.py @@ -17,6 +17,7 @@ except (ImportError, AssertionError): def _log_plots(plots, step): + """Logs plots from the input dictionary if they haven't been logged already at the specified step.""" for name, params in plots.items(): timestamp = params['timestamp'] if _processed_plots.get(name) != timestamp: diff --git a/ultralytics/utils/checks.py b/ultralytics/utils/checks.py index 956f1b32ab..184ce06ef6 100644 --- a/ultralytics/utils/checks.py +++ b/ultralytics/utils/checks.py @@ -64,8 +64,8 @@ def parse_requirements(file_path=ROOT.parent / 'requirements.txt', package=''): def parse_version(version='0.0.0') -> tuple: """ - Convert a version string to a tuple of integers, ignoring any extra non-numeric string attached to the version. - This function replaces deprecated 'pkg_resources.parse_version(v)' + Convert a version string to a tuple of integers, ignoring any extra non-numeric string attached to the version. This + function replaces deprecated 'pkg_resources.parse_version(v)'. Args: version (str): Version string, i.e. '2.0.1+cpu' @@ -372,8 +372,10 @@ def check_torchvision(): Checks the installed versions of PyTorch and Torchvision to ensure they're compatible. This function checks the installed versions of PyTorch and Torchvision, and warns if they're incompatible according - to the provided compatibility table based on https://github.com/pytorch/vision#installation. The - compatibility table is a dictionary where the keys are PyTorch versions and the values are lists of compatible + to the provided compatibility table based on: + https://github.com/pytorch/vision#installation. + + The compatibility table is a dictionary where the keys are PyTorch versions and the values are lists of compatible Torchvision versions. """ @@ -527,9 +529,9 @@ def collect_system_info(): def check_amp(model): """ - This function checks the PyTorch Automatic Mixed Precision (AMP) functionality of a YOLOv8 model. - If the checks fail, it means there are anomalies with AMP on the system that may cause NaN losses or zero-mAP - results, so AMP will be disabled during training. + This function checks the PyTorch Automatic Mixed Precision (AMP) functionality of a YOLOv8 model. If the checks + fail, it means there are anomalies with AMP on the system that may cause NaN losses or zero-mAP results, so AMP will + be disabled during training. Args: model (nn.Module): A YOLOv8 model instance. @@ -606,7 +608,8 @@ def print_args(args: Optional[dict] = None, show_file=True, show_func=False): def cuda_device_count() -> int: - """Get the number of NVIDIA GPUs available in the environment. + """ + Get the number of NVIDIA GPUs available in the environment. Returns: (int): The number of NVIDIA GPUs available. @@ -626,7 +629,8 @@ def cuda_device_count() -> int: def cuda_is_available() -> bool: - """Check if CUDA is available in the environment. + """ + Check if CUDA is available in the environment. Returns: (bool): True if one or more NVIDIA GPUs are available, False otherwise. diff --git a/ultralytics/utils/dist.py b/ultralytics/utils/dist.py index 11900985ac..aaef2b94c3 100644 --- a/ultralytics/utils/dist.py +++ b/ultralytics/utils/dist.py @@ -13,7 +13,8 @@ from .torch_utils import TORCH_1_9 def find_free_network_port() -> int: - """Finds a free port on localhost. + """ + Finds a free port on localhost. It is useful in single-node training when we don't want to connect to a real main node but have to set the `MASTER_PORT` environment variable. diff --git a/ultralytics/utils/downloads.py b/ultralytics/utils/downloads.py index 3db1d3ec54..10983a6e7f 100644 --- a/ultralytics/utils/downloads.py +++ b/ultralytics/utils/downloads.py @@ -69,8 +69,8 @@ def delete_dsstore(path, files_to_delete=('.DS_Store', '__MACOSX')): def zip_directory(directory, compress=True, exclude=('.DS_Store', '__MACOSX'), progress=True): """ - Zips the contents of a directory, excluding files containing strings in the exclude list. - The resulting zip file is named after the directory and placed alongside it. + Zips the contents of a directory, excluding files containing strings in the exclude list. The resulting zip file is + named after the directory and placed alongside it. Args: directory (str | Path): The path to the directory to be zipped. @@ -341,7 +341,11 @@ def get_github_assets(repo='ultralytics/assets', version='latest', retry=False): def attempt_download_asset(file, repo='ultralytics/assets', release='v0.0.0'): - """Attempt file download from GitHub release assets if not found locally. release = 'latest', 'v6.2', etc.""" + """ + Attempt file download from GitHub release assets if not found locally. + + release = 'latest', 'v6.2', etc. + """ from ultralytics.utils import SETTINGS # scoped for circular import # YOLOv3/5u updates diff --git a/ultralytics/utils/files.py b/ultralytics/utils/files.py index 0102c4b6a8..82e8ebfef0 100644 --- a/ultralytics/utils/files.py +++ b/ultralytics/utils/files.py @@ -30,9 +30,9 @@ class WorkingDirectory(contextlib.ContextDecorator): @contextmanager def spaces_in_path(path): """ - Context manager to handle paths with spaces in their names. - If a path contains spaces, it replaces them with underscores, copies the file/directory to the new path, - executes the context code block, then copies the file/directory back to its original location. + Context manager to handle paths with spaces in their names. If a path contains spaces, it replaces them with + underscores, copies the file/directory to the new path, executes the context code block, then copies the + file/directory back to its original location. Args: path (str | Path): The original path. diff --git a/ultralytics/utils/instance.py b/ultralytics/utils/instance.py index 4e2e438098..28f1f65470 100644 --- a/ultralytics/utils/instance.py +++ b/ultralytics/utils/instance.py @@ -32,9 +32,14 @@ __all__ = 'Bboxes', # tuple or list class Bboxes: - """Bounding Boxes class. Only numpy variables are supported.""" + """ + Bounding Boxes class. + + Only numpy variables are supported. + """ def __init__(self, bboxes, format='xyxy') -> None: + """Initializes the Bboxes class with bounding box data in a specified format.""" assert format in _formats, f'Invalid bounding box format: {format}, format must be one of {_formats}' bboxes = bboxes[None, :] if bboxes.ndim == 1 else bboxes assert bboxes.ndim == 2 @@ -194,7 +199,7 @@ class Instances: return self._bboxes.areas() def scale(self, scale_w, scale_h, bbox_only=False): - """this might be similar with denormalize func but without normalized sign.""" + """This might be similar with denormalize func but without normalized sign.""" self._bboxes.mul(scale=(scale_w, scale_h, scale_w, scale_h)) if bbox_only: return @@ -307,7 +312,11 @@ class Instances: self.keypoints[..., 1] = self.keypoints[..., 1].clip(0, h) def remove_zero_area_boxes(self): - """Remove zero-area boxes, i.e. after clipping some boxes may have zero width or height. This removes them.""" + """ + Remove zero-area boxes, i.e. after clipping some boxes may have zero width or height. + + This removes them. + """ good = self.bbox_areas > 0 if not all(good): self._bboxes = self._bboxes[good] diff --git a/ultralytics/utils/loss.py b/ultralytics/utils/loss.py index dacf326f96..62186678dd 100644 --- a/ultralytics/utils/loss.py +++ b/ultralytics/utils/loss.py @@ -13,7 +13,11 @@ from .tal import bbox2dist class VarifocalLoss(nn.Module): - """Varifocal loss by Zhang et al. https://arxiv.org/abs/2008.13367.""" + """ + Varifocal loss by Zhang et al. + + https://arxiv.org/abs/2008.13367. + """ def __init__(self): """Initialize the VarifocalLoss class.""" @@ -33,6 +37,7 @@ class FocalLoss(nn.Module): """Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5).""" def __init__(self, ): + """Initializer for FocalLoss class with no parameters.""" super().__init__() @staticmethod @@ -93,6 +98,7 @@ class KeypointLoss(nn.Module): """Criterion class for computing training losses.""" def __init__(self, sigmas) -> None: + """Initialize the KeypointLoss class.""" super().__init__() self.sigmas = sigmas diff --git a/ultralytics/utils/metrics.py b/ultralytics/utils/metrics.py index 574d403913..36957e9fe9 100644 --- a/ultralytics/utils/metrics.py +++ b/ultralytics/utils/metrics.py @@ -1,7 +1,5 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license -""" -Model validation metrics -""" +"""Model validation metrics.""" import math import warnings @@ -195,7 +193,7 @@ class ConfusionMatrix: def process_cls_preds(self, preds, targets): """ - Update confusion matrix for classification task + Update confusion matrix for classification task. Args: preds (Array[N, min(nc,5)]): Predicted class labels. @@ -308,9 +306,7 @@ class ConfusionMatrix: on_plot(plot_fname) def print(self): - """ - Print the confusion matrix to the console. - """ + """Print the confusion matrix to the console.""" for i in range(self.nc + 1): LOGGER.info(' '.join(map(str, self.matrix[i]))) @@ -440,7 +436,6 @@ def ap_per_class(tp, f1 (np.ndarray): F1-score values at each confidence threshold. ap (np.ndarray): Average precision for each class at different IoU thresholds. unique_classes (np.ndarray): An array of unique classes that have data. - """ # Sort by objectness @@ -498,32 +493,33 @@ def ap_per_class(tp, class Metric(SimpleClass): """ - Class for computing evaluation metrics for YOLOv8 model. - - Attributes: - p (list): Precision for each class. Shape: (nc,). - r (list): Recall for each class. Shape: (nc,). - f1 (list): F1 score for each class. Shape: (nc,). - all_ap (list): AP scores for all classes and all IoU thresholds. Shape: (nc, 10). - ap_class_index (list): Index of class for each AP score. Shape: (nc,). - nc (int): Number of classes. - - Methods: - ap50(): AP at IoU threshold of 0.5 for all classes. Returns: List of AP scores. Shape: (nc,) or []. - ap(): AP at IoU thresholds from 0.5 to 0.95 for all classes. Returns: List of AP scores. Shape: (nc,) or []. - mp(): Mean precision of all classes. Returns: Float. - mr(): Mean recall of all classes. Returns: Float. - map50(): Mean AP at IoU threshold of 0.5 for all classes. Returns: Float. - map75(): Mean AP at IoU threshold of 0.75 for all classes. Returns: Float. - map(): Mean AP at IoU thresholds from 0.5 to 0.95 for all classes. Returns: Float. - mean_results(): Mean of results, returns mp, mr, map50, map. - class_result(i): Class-aware result, returns p[i], r[i], ap50[i], ap[i]. - maps(): mAP of each class. Returns: Array of mAP scores, shape: (nc,). - fitness(): Model fitness as a weighted combination of metrics. Returns: Float. - update(results): Update metric attributes with new evaluation results. - """ + Class for computing evaluation metrics for YOLOv8 model. + + Attributes: + p (list): Precision for each class. Shape: (nc,). + r (list): Recall for each class. Shape: (nc,). + f1 (list): F1 score for each class. Shape: (nc,). + all_ap (list): AP scores for all classes and all IoU thresholds. Shape: (nc, 10). + ap_class_index (list): Index of class for each AP score. Shape: (nc,). + nc (int): Number of classes. + + Methods: + ap50(): AP at IoU threshold of 0.5 for all classes. Returns: List of AP scores. Shape: (nc,) or []. + ap(): AP at IoU thresholds from 0.5 to 0.95 for all classes. Returns: List of AP scores. Shape: (nc,) or []. + mp(): Mean precision of all classes. Returns: Float. + mr(): Mean recall of all classes. Returns: Float. + map50(): Mean AP at IoU threshold of 0.5 for all classes. Returns: Float. + map75(): Mean AP at IoU threshold of 0.75 for all classes. Returns: Float. + map(): Mean AP at IoU thresholds from 0.5 to 0.95 for all classes. Returns: Float. + mean_results(): Mean of results, returns mp, mr, map50, map. + class_result(i): Class-aware result, returns p[i], r[i], ap50[i], ap[i]. + maps(): mAP of each class. Returns: Array of mAP scores, shape: (nc,). + fitness(): Model fitness as a weighted combination of metrics. Returns: Float. + update(results): Update metric attributes with new evaluation results. + """ def __init__(self) -> None: + """Initializes a Metric instance for computing evaluation metrics for the YOLOv8 model.""" self.p = [] # (nc, ) self.r = [] # (nc, ) self.f1 = [] # (nc, ) @@ -606,12 +602,12 @@ class Metric(SimpleClass): return [self.mp, self.mr, self.map50, self.map] def class_result(self, i): - """class-aware result, return p[i], r[i], ap50[i], ap[i].""" + """Class-aware result, return p[i], r[i], ap50[i], ap[i].""" return self.p[i], self.r[i], self.ap50[i], self.ap[i] @property def maps(self): - """mAP of each class.""" + """MAP of each class.""" maps = np.zeros(self.nc) + self.map for i, c in enumerate(self.ap_class_index): maps[c] = self.ap[i] @@ -672,6 +668,7 @@ class DetMetrics(SimpleClass): """ def __init__(self, save_dir=Path('.'), plot=False, on_plot=None, names=()) -> None: + """Initialize a DetMetrics instance with a save directory, plot flag, callback function, and class names.""" self.save_dir = save_dir self.plot = plot self.on_plot = on_plot @@ -756,6 +753,7 @@ class SegmentMetrics(SimpleClass): """ def __init__(self, save_dir=Path('.'), plot=False, on_plot=None, names=()) -> None: + """Initialize a SegmentMetrics instance with a save directory, plot flag, callback function, and class names.""" self.save_dir = save_dir self.plot = plot self.on_plot = on_plot @@ -865,6 +863,7 @@ class PoseMetrics(SegmentMetrics): """ def __init__(self, save_dir=Path('.'), plot=False, on_plot=None, names=()) -> None: + """Initialize the PoseMetrics class with directory path, class names, and plotting options.""" super().__init__(save_dir, plot, names) self.save_dir = save_dir self.plot = plot @@ -954,6 +953,7 @@ class ClassifyMetrics(SimpleClass): """ def __init__(self) -> None: + """Initialize a ClassifyMetrics instance.""" self.top1 = 0 self.top5 = 0 self.speed = {'preprocess': 0.0, 'inference': 0.0, 'loss': 0.0, 'postprocess': 0.0} diff --git a/ultralytics/utils/ops.py b/ultralytics/utils/ops.py index 9089d0fa0f..5d37c59134 100644 --- a/ultralytics/utils/ops.py +++ b/ultralytics/utils/ops.py @@ -50,6 +50,7 @@ class Profile(contextlib.ContextDecorator): self.t += self.dt # accumulate dt def __str__(self): + """Returns a human-readable string representing the accumulated elapsed time in the profiler.""" return f'Elapsed time is {self.t} s' def time(self): @@ -303,7 +304,7 @@ def clip_coords(coords, shape): def scale_image(masks, im0_shape, ratio_pad=None): """ - Takes a mask, and resizes it to the original image size + Takes a mask, and resizes it to the original image size. Args: masks (np.ndarray): resized and padded masks/images, [h, w, num]/[h, w, 3]. @@ -403,8 +404,8 @@ def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0): def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0): """ - Convert bounding box coordinates from (x1, y1, x2, y2) format to (x, y, width, height, normalized) format. - x, y, width and height are normalized to image dimensions + Convert bounding box coordinates from (x1, y1, x2, y2) format to (x, y, width, height, normalized) format. x, y, + width and height are normalized to image dimensions. Args: x (np.ndarray | torch.Tensor): The input bounding box coordinates in (x1, y1, x2, y2) format. @@ -445,7 +446,7 @@ def xywh2ltwh(x): def xyxy2ltwh(x): """ - Convert nx4 bounding boxes from [x1, y1, x2, y2] to [x1, y1, w, h], where xy1=top-left, xy2=bottom-right + Convert nx4 bounding boxes from [x1, y1, x2, y2] to [x1, y1, w, h], where xy1=top-left, xy2=bottom-right. Args: x (np.ndarray | torch.Tensor): The input tensor with the bounding boxes coordinates in the xyxy format @@ -461,7 +462,7 @@ def xyxy2ltwh(x): def ltwh2xywh(x): """ - Convert nx4 boxes from [x1, y1, w, h] to [x, y, w, h] where xy1=top-left, xy=center + Convert nx4 boxes from [x1, y1, w, h] to [x, y, w, h] where xy1=top-left, xy=center. Args: x (torch.Tensor): the input tensor @@ -544,7 +545,7 @@ def xywhr2xyxyxyxy(center): def ltwh2xyxy(x): """ - It converts the bounding box from [x1, y1, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right + It converts the bounding box from [x1, y1, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right. Args: x (np.ndarray | torch.Tensor): the input image @@ -616,8 +617,8 @@ def crop_mask(masks, boxes): def process_mask_upsample(protos, masks_in, bboxes, shape): """ - Takes the output of the mask head, and applies the mask to the bounding boxes. This produces masks of higher - quality but is slower. + Takes the output of the mask head, and applies the mask to the bounding boxes. This produces masks of higher quality + but is slower. Args: protos (torch.Tensor): [mask_dim, mask_h, mask_w] @@ -713,7 +714,7 @@ def scale_masks(masks, shape, padding=True): def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None, normalize=False, padding=True): """ - Rescale segment coordinates (xy) from img1_shape to img0_shape + Rescale segment coordinates (xy) from img1_shape to img0_shape. Args: img1_shape (tuple): The shape of the image that the coords are from. diff --git a/ultralytics/utils/patches.py b/ultralytics/utils/patches.py index a145763947..541cf45a40 100644 --- a/ultralytics/utils/patches.py +++ b/ultralytics/utils/patches.py @@ -1,7 +1,5 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license -""" -Monkey patches to update/extend functionality of existing functions -""" +"""Monkey patches to update/extend functionality of existing functions.""" from pathlib import Path @@ -14,7 +12,8 @@ _imshow = cv2.imshow # copy to avoid recursion errors def imread(filename: str, flags: int = cv2.IMREAD_COLOR): - """Read an image from a file. + """ + Read an image from a file. Args: filename (str): Path to the file to read. @@ -27,7 +26,8 @@ def imread(filename: str, flags: int = cv2.IMREAD_COLOR): def imwrite(filename: str, img: np.ndarray, params=None): - """Write an image to a file. + """ + Write an image to a file. Args: filename (str): Path to the file to write. @@ -45,7 +45,8 @@ def imwrite(filename: str, img: np.ndarray, params=None): def imshow(winname: str, mat: np.ndarray): - """Displays an image in the specified window. + """ + Displays an image in the specified window. Args: winname (str): Name of the window. @@ -59,7 +60,8 @@ _torch_save = torch.save # copy to avoid recursion errors def torch_save(*args, **kwargs): - """Use dill (if exists) to serialize the lambda functions where pickle does not do this. + """ + Use dill (if exists) to serialize the lambda functions where pickle does not do this. Args: *args (tuple): Positional arguments to pass to torch.save. diff --git a/ultralytics/utils/plotting.py b/ultralytics/utils/plotting.py index bfd2aaa1da..88fb73c2ac 100644 --- a/ultralytics/utils/plotting.py +++ b/ultralytics/utils/plotting.py @@ -316,7 +316,8 @@ def plot_labels(boxes, cls, names=(), save_dir=Path(''), on_plot=None): def save_one_box(xyxy, im, file=Path('im.jpg'), gain=1.02, pad=10, square=False, BGR=False, save=True): - """Save image crop as {file} with crop size multiple {gain} and {pad} pixels. Save and/or return crop. + """ + Save image crop as {file} with crop size multiple {gain} and {pad} pixels. Save and/or return crop. This function takes a bounding box and an image, and then saves a cropped portion of the image according to the bounding box. Optionally, the crop can be squared, and the function allows for gain and padding diff --git a/ultralytics/utils/torch_utils.py b/ultralytics/utils/torch_utils.py index 0ea740887a..ea6d7a6b18 100644 --- a/ultralytics/utils/torch_utils.py +++ b/ultralytics/utils/torch_utils.py @@ -205,7 +205,11 @@ def fuse_deconv_and_bn(deconv, bn): def model_info(model, detailed=False, verbose=True, imgsz=640): - """Model information. imgsz may be int or list, i.e. imgsz=640 or imgsz=[640, 320].""" + """ + Model information. + + imgsz may be int or list, i.e. imgsz=640 or imgsz=[640, 320]. + """ if not verbose: return n_p = get_num_params(model) # number of parameters @@ -517,13 +521,11 @@ def profile(input, ops, n=10, device=None): class EarlyStopping: - """ - Early stopping class that stops training when a specified number of epochs have passed without improvement. - """ + """Early stopping class that stops training when a specified number of epochs have passed without improvement.""" def __init__(self, patience=50): """ - Initialize early stopping object + Initialize early stopping object. Args: patience (int, optional): Number of epochs to wait after fitness stops improving before stopping. @@ -535,7 +537,7 @@ class EarlyStopping: def __call__(self, epoch, fitness): """ - Check whether to stop training + Check whether to stop training. Args: epoch (int): Current epoch of training diff --git a/ultralytics/utils/triton.py b/ultralytics/utils/triton.py index c48e418aad..45bb6e5b67 100644 --- a/ultralytics/utils/triton.py +++ b/ultralytics/utils/triton.py @@ -7,7 +7,8 @@ import numpy as np class TritonRemoteModel: - """Client for interacting with a remote Triton Inference Server model. + """ + Client for interacting with a remote Triton Inference Server model. Attributes: endpoint (str): The name of the model on the Triton server.