From 291883a23fc5c98aef522d0f17c7223a12cafba6 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 21 Jul 2024 20:14:49 +0200 Subject: [PATCH] Engine Model and Results Docs improvements (#14564) Signed-off-by: Glenn Jocher Co-authored-by: UltralyticsAssistant Co-authored-by: Felipe Parodi --- docs/build_reference.py | 5 +- docs/en/reference/cfg/__init__.md | 30 +- docs/en/reference/data/annotator.md | 2 +- docs/en/reference/data/augment.md | 38 +- docs/en/reference/data/base.md | 2 +- docs/en/reference/data/build.md | 16 +- docs/en/reference/data/converter.md | 14 +- docs/en/reference/data/dataset.md | 12 +- docs/en/reference/data/explorer/explorer.md | 4 +- docs/en/reference/data/explorer/gui/dash.md | 22 +- docs/en/reference/data/explorer/utils.md | 10 +- docs/en/reference/data/loaders.md | 16 +- docs/en/reference/data/split_dota.md | 16 +- docs/en/reference/data/utils.md | 32 +- docs/en/reference/engine/exporter.md | 10 +- docs/en/reference/engine/model.md | 2 +- docs/en/reference/engine/predictor.md | 2 +- docs/en/reference/engine/results.md | 14 +- docs/en/reference/engine/trainer.md | 2 +- docs/en/reference/engine/tuner.md | 2 +- docs/en/reference/engine/validator.md | 2 +- docs/en/reference/hub/__init__.md | 14 +- docs/en/reference/hub/auth.md | 2 +- docs/en/reference/hub/session.md | 2 +- docs/en/reference/hub/utils.md | 8 +- docs/en/reference/models/fastsam/model.md | 2 +- docs/en/reference/models/fastsam/predict.md | 2 +- docs/en/reference/models/fastsam/prompt.md | 2 +- docs/en/reference/models/fastsam/utils.md | 4 +- docs/en/reference/models/fastsam/val.md | 2 +- docs/en/reference/models/nas/model.md | 2 +- docs/en/reference/models/nas/predict.md | 2 +- docs/en/reference/models/nas/val.md | 2 +- docs/en/reference/models/rtdetr/model.md | 2 +- docs/en/reference/models/rtdetr/predict.md | 2 +- docs/en/reference/models/rtdetr/train.md | 2 +- docs/en/reference/models/rtdetr/val.md | 4 +- docs/en/reference/models/sam/amg.md | 22 +- docs/en/reference/models/sam/build.md | 12 +- docs/en/reference/models/sam/model.md | 2 +- .../reference/models/sam/modules/decoders.md | 4 +- .../reference/models/sam/modules/encoders.md | 20 +- docs/en/reference/models/sam/modules/sam.md | 2 +- .../models/sam/modules/tiny_encoder.md | 22 +- .../models/sam/modules/transformer.md | 6 +- docs/en/reference/models/sam/predict.md | 2 +- docs/en/reference/models/utils/loss.md | 4 +- docs/en/reference/models/utils/ops.md | 4 +- .../reference/models/yolo/classify/predict.md | 2 +- .../reference/models/yolo/classify/train.md | 2 +- docs/en/reference/models/yolo/classify/val.md | 2 +- .../reference/models/yolo/detect/predict.md | 2 +- docs/en/reference/models/yolo/detect/train.md | 2 +- docs/en/reference/models/yolo/detect/val.md | 2 +- docs/en/reference/models/yolo/model.md | 4 +- docs/en/reference/models/yolo/obb/predict.md | 2 +- docs/en/reference/models/yolo/obb/train.md | 2 +- docs/en/reference/models/yolo/obb/val.md | 2 +- docs/en/reference/models/yolo/pose/predict.md | 2 +- docs/en/reference/models/yolo/pose/train.md | 2 +- docs/en/reference/models/yolo/pose/val.md | 2 +- .../reference/models/yolo/segment/predict.md | 2 +- .../en/reference/models/yolo/segment/train.md | 2 +- docs/en/reference/models/yolo/segment/val.md | 2 +- docs/en/reference/models/yolo/world/train.md | 4 +- .../models/yolo/world/train_world.md | 2 +- docs/en/reference/nn/autobackend.md | 6 +- docs/en/reference/nn/modules/block.md | 78 +- docs/en/reference/nn/modules/conv.md | 28 +- docs/en/reference/nn/modules/head.md | 16 +- docs/en/reference/nn/modules/transformer.md | 20 +- docs/en/reference/nn/modules/utils.md | 10 +- docs/en/reference/nn/tasks.md | 34 +- docs/en/reference/solutions/ai_gym.md | 2 +- docs/en/reference/solutions/analytics.md | 2 +- .../solutions/distance_calculation.md | 2 +- docs/en/reference/solutions/heatmap.md | 2 +- docs/en/reference/solutions/object_counter.md | 2 +- .../reference/solutions/parking_management.md | 4 +- .../reference/solutions/queue_management.md | 2 +- .../reference/solutions/speed_estimation.md | 2 +- .../solutions/streamlit_inference.md | 2 +- docs/en/reference/trackers/basetrack.md | 4 +- docs/en/reference/trackers/bot_sort.md | 4 +- docs/en/reference/trackers/byte_tracker.md | 4 +- docs/en/reference/trackers/track.md | 6 +- docs/en/reference/trackers/utils/gmc.md | 2 +- .../reference/trackers/utils/kalman_filter.md | 4 +- docs/en/reference/trackers/utils/matching.md | 8 +- docs/en/reference/utils/__init__.md | 80 +- docs/en/reference/utils/autobatch.md | 4 +- docs/en/reference/utils/benchmarks.md | 6 +- docs/en/reference/utils/callbacks/base.md | 54 +- docs/en/reference/utils/callbacks/clearml.md | 14 +- docs/en/reference/utils/callbacks/comet.md | 48 +- docs/en/reference/utils/callbacks/dvc.md | 18 +- docs/en/reference/utils/callbacks/hub.md | 18 +- docs/en/reference/utils/callbacks/mlflow.md | 10 +- docs/en/reference/utils/callbacks/neptune.md | 16 +- docs/en/reference/utils/callbacks/raytune.md | 2 +- .../reference/utils/callbacks/tensorboard.md | 12 +- docs/en/reference/utils/callbacks/wb.md | 14 +- docs/en/reference/utils/checks.md | 50 +- docs/en/reference/utils/dist.md | 8 +- docs/en/reference/utils/downloads.md | 20 +- docs/en/reference/utils/errors.md | 2 +- docs/en/reference/utils/files.md | 16 +- docs/en/reference/utils/instance.md | 6 +- docs/en/reference/utils/loss.md | 24 +- docs/en/reference/utils/metrics.md | 42 +- docs/en/reference/utils/ops.md | 60 +- docs/en/reference/utils/patches.md | 8 +- docs/en/reference/utils/plotting.md | 22 +- docs/en/reference/utils/tal.md | 12 +- docs/en/reference/utils/torch_utils.md | 56 +- docs/en/reference/utils/triton.md | 2 +- docs/en/reference/utils/tuner.md | 2 +- .../YOLOv8-SAHI-Inference-Video/readme.md | 2 +- mkdocs.yml | 6 + ultralytics/cfg/__init__.py | 233 ++- ultralytics/data/augment.py | 1669 +++++++++++++++-- ultralytics/engine/model.py | 659 +++++-- ultralytics/engine/results.py | 1291 ++++++++++--- 123 files changed, 3799 insertions(+), 1378 deletions(-) diff --git a/docs/build_reference.py b/docs/build_reference.py index 6de5a5dbd6..77f9e7baaf 100644 --- a/docs/build_reference.py +++ b/docs/build_reference.py @@ -63,8 +63,9 @@ def create_markdown(py_filepath: Path, module_path: str, classes: list, function f" This file is available at [{pretty}]({url}). If you spot a problem please help fix it by [contributing]" f"(https://docs.ultralytics.com/help/contributing/) a [Pull Request]({edit}) 🛠️. Thank you 🙏!\n\n" ) - md_content = ["

\n"] + [f"## ::: {module_name}.{class_name}\n\n

\n" for class_name in classes] - md_content.extend(f"## ::: {module_name}.{func_name}\n\n

\n" for func_name in functions) + md_content = ["
\n"] + [f"## ::: {module_name}.{class_name}\n\n



\n" for class_name in classes] + md_content.extend(f"## ::: {module_name}.{func_name}\n\n



\n" for func_name in functions) + md_content[-1] = md_content[-1].replace("

", "") # remove last horizontal line md_content = header_content + title_content + "\n".join(md_content) if not md_content.endswith("\n"): md_content += "\n" diff --git a/docs/en/reference/cfg/__init__.md b/docs/en/reference/cfg/__init__.md index 85af8a626c..48ed75caa5 100644 --- a/docs/en/reference/cfg/__init__.md +++ b/docs/en/reference/cfg/__init__.md @@ -9,63 +9,63 @@ keywords: Ultralytics, YOLO, configuration, cfg2dict, get_cfg, check_cfg, save_d This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/\_\_init\_\_.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/__init__.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/cfg/__init__.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.cfg.cfg2dict -

+



## ::: ultralytics.cfg.get_cfg -

+



## ::: ultralytics.cfg.check_cfg -

+



## ::: ultralytics.cfg.get_save_dir -

+



## ::: ultralytics.cfg._handle_deprecation -

+



## ::: ultralytics.cfg.check_dict_alignment -

+



## ::: ultralytics.cfg.merge_equals_args -

+



## ::: ultralytics.cfg.handle_yolo_hub -

+



## ::: ultralytics.cfg.handle_yolo_settings -

+



## ::: ultralytics.cfg.handle_explorer -

+



## ::: ultralytics.cfg.handle_streamlit_inference -

+



## ::: ultralytics.cfg.parse_key_value_pair -

+



## ::: ultralytics.cfg.smart_value -

+



## ::: ultralytics.cfg.entrypoint -

+



## ::: ultralytics.cfg.copy_default_cfg diff --git a/docs/en/reference/data/annotator.md b/docs/en/reference/data/annotator.md index e746a32f61..02ef04e04b 100644 --- a/docs/en/reference/data/annotator.md +++ b/docs/en/reference/data/annotator.md @@ -9,7 +9,7 @@ keywords: Ultralytics, image annotation, YOLO, SAM, Python script, GitHub, objec This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/annotator.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/annotator.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/data/annotator.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.data.annotator.auto_annotate diff --git a/docs/en/reference/data/augment.md b/docs/en/reference/data/augment.md index 3dfcdcda18..951141b9cf 100644 --- a/docs/en/reference/data/augment.md +++ b/docs/en/reference/data/augment.md @@ -9,79 +9,79 @@ keywords: Ultralytics, image augmentation, MixUp, Mosaic, Random Perspective, de This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/augment.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/augment.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/data/augment.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.data.augment.BaseTransform -

+



## ::: ultralytics.data.augment.Compose -

+



## ::: ultralytics.data.augment.BaseMixTransform -

+



## ::: ultralytics.data.augment.Mosaic -

+



## ::: ultralytics.data.augment.MixUp -

+



## ::: ultralytics.data.augment.RandomPerspective -

+



## ::: ultralytics.data.augment.RandomHSV -

+



## ::: ultralytics.data.augment.RandomFlip -

+



## ::: ultralytics.data.augment.LetterBox -

+



## ::: ultralytics.data.augment.CopyPaste -

+



## ::: ultralytics.data.augment.Albumentations -

+



## ::: ultralytics.data.augment.Format -

+



## ::: ultralytics.data.augment.RandomLoadText -

+



## ::: ultralytics.data.augment.ClassifyLetterBox -

+



## ::: ultralytics.data.augment.CenterCrop -

+



## ::: ultralytics.data.augment.ToTensor -

+



## ::: ultralytics.data.augment.v8_transforms -

+



## ::: ultralytics.data.augment.classify_transforms -

+



## ::: ultralytics.data.augment.classify_augmentations diff --git a/docs/en/reference/data/base.md b/docs/en/reference/data/base.md index f78159dc82..ddb9eb703c 100644 --- a/docs/en/reference/data/base.md +++ b/docs/en/reference/data/base.md @@ -9,7 +9,7 @@ keywords: Ultralytics, BaseDataset, image processing, data augmentation, YOLO, d This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/base.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/base.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/data/base.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.data.base.BaseDataset diff --git a/docs/en/reference/data/build.md b/docs/en/reference/data/build.md index 7f78aa4d6d..bae38c9e27 100644 --- a/docs/en/reference/data/build.md +++ b/docs/en/reference/data/build.md @@ -9,35 +9,35 @@ keywords: Ultralytics, Data Builders, InfiniteDataLoader, YOLO dataset, build.py This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/build.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/build.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/data/build.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.data.build.InfiniteDataLoader -

+



## ::: ultralytics.data.build._RepeatSampler -

+



## ::: ultralytics.data.build.seed_worker -

+



## ::: ultralytics.data.build.build_yolo_dataset -

+



## ::: ultralytics.data.build.build_grounding -

+



## ::: ultralytics.data.build.build_dataloader -

+



## ::: ultralytics.data.build.check_source -

+



## ::: ultralytics.data.build.load_inference_source diff --git a/docs/en/reference/data/converter.md b/docs/en/reference/data/converter.md index 89061cdef7..d1b63f2f0e 100644 --- a/docs/en/reference/data/converter.md +++ b/docs/en/reference/data/converter.md @@ -9,31 +9,31 @@ keywords: Ultralytics, data conversion, YOLO models, COCO, DOTA, YOLO bbox2segme This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/converter.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/converter.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/data/converter.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.data.converter.coco91_to_coco80_class -

+



## ::: ultralytics.data.converter.coco80_to_coco91_class -

+



## ::: ultralytics.data.converter.convert_coco -

+



## ::: ultralytics.data.converter.convert_dota_to_yolo_obb -

+



## ::: ultralytics.data.converter.min_index -

+



## ::: ultralytics.data.converter.merge_multi_segment -

+



## ::: ultralytics.data.converter.yolo_bbox2segment diff --git a/docs/en/reference/data/dataset.md b/docs/en/reference/data/dataset.md index 2c6b50364d..8038780bf1 100644 --- a/docs/en/reference/data/dataset.md +++ b/docs/en/reference/data/dataset.md @@ -9,27 +9,27 @@ keywords: Ultralytics, YOLODataset, object detection, segmentation, dataset load This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/dataset.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/dataset.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/data/dataset.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.data.dataset.YOLODataset -

+



## ::: ultralytics.data.dataset.YOLOMultiModalDataset -

+



## ::: ultralytics.data.dataset.GroundingDataset -

+



## ::: ultralytics.data.dataset.YOLOConcatDataset -

+



## ::: ultralytics.data.dataset.SemanticDataset -

+



## ::: ultralytics.data.dataset.ClassificationDataset diff --git a/docs/en/reference/data/explorer/explorer.md b/docs/en/reference/data/explorer/explorer.md index a32a4db05d..65e6426137 100644 --- a/docs/en/reference/data/explorer/explorer.md +++ b/docs/en/reference/data/explorer/explorer.md @@ -10,11 +10,11 @@ keywords: Ultralytics, YOLO, data explorer, image querying, embeddings, similari This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/explorer/explorer.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/explorer/explorer.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/data/explorer/explorer.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.data.explorer.explorer.ExplorerDataset -

+



## ::: ultralytics.data.explorer.explorer.Explorer diff --git a/docs/en/reference/data/explorer/gui/dash.md b/docs/en/reference/data/explorer/gui/dash.md index e18333a31a..68a2fa46d1 100644 --- a/docs/en/reference/data/explorer/gui/dash.md +++ b/docs/en/reference/data/explorer/gui/dash.md @@ -10,47 +10,47 @@ keywords: Ultralytics, Explorer, GUI, dash, documentation, data explorer, AI que This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/explorer/gui/dash.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/explorer/gui/dash.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/data/explorer/gui/dash.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.data.explorer.gui.dash._get_explorer -

+



## ::: ultralytics.data.explorer.gui.dash.init_explorer_form -

+



## ::: ultralytics.data.explorer.gui.dash.query_form -

+



## ::: ultralytics.data.explorer.gui.dash.ai_query_form -

+



## ::: ultralytics.data.explorer.gui.dash.find_similar_imgs -

+



## ::: ultralytics.data.explorer.gui.dash.similarity_form -

+



## ::: ultralytics.data.explorer.gui.dash.run_sql_query -

+



## ::: ultralytics.data.explorer.gui.dash.run_ai_query -

+



## ::: ultralytics.data.explorer.gui.dash.reset_explorer -

+



## ::: ultralytics.data.explorer.gui.dash.utralytics_explorer_docs_callback -

+



## ::: ultralytics.data.explorer.gui.dash.layout diff --git a/docs/en/reference/data/explorer/utils.md b/docs/en/reference/data/explorer/utils.md index a08969a708..e8f56c01e8 100644 --- a/docs/en/reference/data/explorer/utils.md +++ b/docs/en/reference/data/explorer/utils.md @@ -10,23 +10,23 @@ keywords: Ultralytics, data explorer, utils, schema, sanitize batch, plot query This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/explorer/utils.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/explorer/utils.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/data/explorer/utils.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.data.explorer.utils.get_table_schema -

+



## ::: ultralytics.data.explorer.utils.get_sim_index_schema -

+



## ::: ultralytics.data.explorer.utils.sanitize_batch -

+



## ::: ultralytics.data.explorer.utils.plot_query_result -

+



## ::: ultralytics.data.explorer.utils.prompt_sql_query diff --git a/docs/en/reference/data/loaders.md b/docs/en/reference/data/loaders.md index a26dc277f5..2925a88332 100644 --- a/docs/en/reference/data/loaders.md +++ b/docs/en/reference/data/loaders.md @@ -9,35 +9,35 @@ keywords: Ultralytics, data loaders, SourceTypes, LoadStreams, LoadScreenshots, This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/loaders.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/loaders.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/data/loaders.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.data.loaders.SourceTypes -

+



## ::: ultralytics.data.loaders.LoadStreams -

+



## ::: ultralytics.data.loaders.LoadScreenshots -

+



## ::: ultralytics.data.loaders.LoadImagesAndVideos -

+



## ::: ultralytics.data.loaders.LoadPilAndNumpy -

+



## ::: ultralytics.data.loaders.LoadTensor -

+



## ::: ultralytics.data.loaders.autocast_list -

+



## ::: ultralytics.data.loaders.get_best_youtube_url diff --git a/docs/en/reference/data/split_dota.md b/docs/en/reference/data/split_dota.md index 9bbf79ec4d..27d448411b 100644 --- a/docs/en/reference/data/split_dota.md +++ b/docs/en/reference/data/split_dota.md @@ -9,35 +9,35 @@ keywords: Ultralytics, DOTA dataset, data splitting, YOLO, Python, bbox_iof, loa This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/split_dota.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/split_dota.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/data/split_dota.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.data.split_dota.bbox_iof -

+



## ::: ultralytics.data.split_dota.load_yolo_dota -

+



## ::: ultralytics.data.split_dota.get_windows -

+



## ::: ultralytics.data.split_dota.get_window_obj -

+



## ::: ultralytics.data.split_dota.crop_and_save -

+



## ::: ultralytics.data.split_dota.split_images_and_labels -

+



## ::: ultralytics.data.split_dota.split_trainval -

+



## ::: ultralytics.data.split_dota.split_test diff --git a/docs/en/reference/data/utils.md b/docs/en/reference/data/utils.md index afccca25db..07474265bc 100644 --- a/docs/en/reference/data/utils.md +++ b/docs/en/reference/data/utils.md @@ -9,67 +9,67 @@ keywords: Ultralytics, dataset utils, data handling, image verification, Python, This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/utils.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/utils.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/data/utils.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.data.utils.HUBDatasetStats -

+



## ::: ultralytics.data.utils.img2label_paths -

+



## ::: ultralytics.data.utils.get_hash -

+



## ::: ultralytics.data.utils.exif_size -

+



## ::: ultralytics.data.utils.verify_image -

+



## ::: ultralytics.data.utils.verify_image_label -

+



## ::: ultralytics.data.utils.polygon2mask -

+



## ::: ultralytics.data.utils.polygons2masks -

+



## ::: ultralytics.data.utils.polygons2masks_overlap -

+



## ::: ultralytics.data.utils.find_dataset_yaml -

+



## ::: ultralytics.data.utils.check_det_dataset -

+



## ::: ultralytics.data.utils.check_cls_dataset -

+



## ::: ultralytics.data.utils.compress_one_image -

+



## ::: ultralytics.data.utils.autosplit -

+



## ::: ultralytics.data.utils.load_dataset_cache_file -

+



## ::: ultralytics.data.utils.save_dataset_cache_file diff --git a/docs/en/reference/engine/exporter.md b/docs/en/reference/engine/exporter.md index 8e1b4acfe8..b6354dec9d 100644 --- a/docs/en/reference/engine/exporter.md +++ b/docs/en/reference/engine/exporter.md @@ -9,23 +9,23 @@ keywords: YOLOv8, export formats, ONNX, TensorRT, CoreML, machine learning model This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/engine/exporter.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/engine/exporter.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/engine/exporter.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.engine.exporter.Exporter -

+



## ::: ultralytics.engine.exporter.IOSDetectModel -

+



## ::: ultralytics.engine.exporter.export_formats -

+



## ::: ultralytics.engine.exporter.gd_outputs -

+



## ::: ultralytics.engine.exporter.try_export diff --git a/docs/en/reference/engine/model.md b/docs/en/reference/engine/model.md index 66ea1f2c98..d8001a63c9 100644 --- a/docs/en/reference/engine/model.md +++ b/docs/en/reference/engine/model.md @@ -9,7 +9,7 @@ keywords: YOLO model, Ultralytics, machine learning, deep learning, PyTorch mode This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/engine/model.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/engine/model.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/engine/model.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.engine.model.Model diff --git a/docs/en/reference/engine/predictor.md b/docs/en/reference/engine/predictor.md index a381f9d127..12e6bb7cec 100644 --- a/docs/en/reference/engine/predictor.md +++ b/docs/en/reference/engine/predictor.md @@ -9,7 +9,7 @@ keywords: Ultralytics, YOLO, Base Predictor, image inference, video inference, m This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/engine/predictor.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/engine/predictor.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/engine/predictor.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.engine.predictor.BasePredictor diff --git a/docs/en/reference/engine/results.md b/docs/en/reference/engine/results.md index 2f14b7aff5..1dab8064b6 100644 --- a/docs/en/reference/engine/results.md +++ b/docs/en/reference/engine/results.md @@ -9,31 +9,31 @@ keywords: Ultralytics, engine results, BaseTensor, Results class, Boxes, Masks, This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/engine/results.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/engine/results.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/engine/results.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.engine.results.BaseTensor -

+



## ::: ultralytics.engine.results.Results -

+



## ::: ultralytics.engine.results.Boxes -

+



## ::: ultralytics.engine.results.Masks -

+



## ::: ultralytics.engine.results.Keypoints -

+



## ::: ultralytics.engine.results.Probs -

+



## ::: ultralytics.engine.results.OBB diff --git a/docs/en/reference/engine/trainer.md b/docs/en/reference/engine/trainer.md index a63dea62a4..d8d69f6929 100644 --- a/docs/en/reference/engine/trainer.md +++ b/docs/en/reference/engine/trainer.md @@ -9,7 +9,7 @@ keywords: Ultralytics, YOLO, BaseTrainer, model training, configuration, dataset This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/engine/trainer.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/engine/trainer.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/engine/trainer.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.engine.trainer.BaseTrainer diff --git a/docs/en/reference/engine/tuner.md b/docs/en/reference/engine/tuner.md index 358f730e05..bc88c8ebff 100644 --- a/docs/en/reference/engine/tuner.md +++ b/docs/en/reference/engine/tuner.md @@ -9,7 +9,7 @@ keywords: Ultralytics, YOLO, hyperparameter tuning, machine learning, deep learn This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/engine/tuner.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/engine/tuner.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/engine/tuner.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.engine.tuner.Tuner diff --git a/docs/en/reference/engine/validator.md b/docs/en/reference/engine/validator.md index 05ed6bf0fb..aebcaf8645 100644 --- a/docs/en/reference/engine/validator.md +++ b/docs/en/reference/engine/validator.md @@ -9,7 +9,7 @@ keywords: Ultralytics, BaseValidator, model validation, PyTorch, TensorFlow, ONN This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/engine/validator.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/engine/validator.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/engine/validator.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.engine.validator.BaseValidator diff --git a/docs/en/reference/hub/__init__.md b/docs/en/reference/hub/__init__.md index f68de0bbf9..697bb05098 100644 --- a/docs/en/reference/hub/__init__.md +++ b/docs/en/reference/hub/__init__.md @@ -9,31 +9,31 @@ keywords: Ultralytics HUB API, login, logout, reset model, export model, check d This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/hub/\_\_init\_\_.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/hub/__init__.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/hub/__init__.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.hub.login -

+



## ::: ultralytics.hub.logout -

+



## ::: ultralytics.hub.reset_model -

+



## ::: ultralytics.hub.export_fmts_hub -

+



## ::: ultralytics.hub.export_model -

+



## ::: ultralytics.hub.get_export -

+



## ::: ultralytics.hub.check_dataset diff --git a/docs/en/reference/hub/auth.md b/docs/en/reference/hub/auth.md index e641bd4310..dac368c020 100644 --- a/docs/en/reference/hub/auth.md +++ b/docs/en/reference/hub/auth.md @@ -9,7 +9,7 @@ keywords: Ultralytics, authentication, API key, cookies, Auth class, YOLO, API, This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/hub/auth.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/hub/auth.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/hub/auth.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.hub.auth.Auth diff --git a/docs/en/reference/hub/session.md b/docs/en/reference/hub/session.md index 05bc875f2c..d9cadf4ebd 100644 --- a/docs/en/reference/hub/session.md +++ b/docs/en/reference/hub/session.md @@ -9,7 +9,7 @@ keywords: Ultralytics, YOLO, HUBTrainingSession, model training, heartbeats, che This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/hub/session.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/hub/session.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/hub/session.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.hub.session.HUBTrainingSession diff --git a/docs/en/reference/hub/utils.md b/docs/en/reference/hub/utils.md index d7f8a9d6cd..d7e439a9ea 100644 --- a/docs/en/reference/hub/utils.md +++ b/docs/en/reference/hub/utils.md @@ -9,19 +9,19 @@ keywords: Ultralytics, HUB, Utilities, YOLO, smart_request, request_with_credent This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/hub/utils.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/hub/utils.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/hub/utils.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.hub.utils.Events -

+



## ::: ultralytics.hub.utils.request_with_credentials -

+



## ::: ultralytics.hub.utils.requests_with_progress -

+



## ::: ultralytics.hub.utils.smart_request diff --git a/docs/en/reference/models/fastsam/model.md b/docs/en/reference/models/fastsam/model.md index 1c60705f6e..86d43933d5 100644 --- a/docs/en/reference/models/fastsam/model.md +++ b/docs/en/reference/models/fastsam/model.md @@ -9,7 +9,7 @@ keywords: FastSAM, Ultralytics, model interface, YOLO, deep learning, machine le This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/fastsam/model.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/fastsam/model.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/fastsam/model.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.models.fastsam.model.FastSAM diff --git a/docs/en/reference/models/fastsam/predict.md b/docs/en/reference/models/fastsam/predict.md index 89b2da0cfd..ba7cd48215 100644 --- a/docs/en/reference/models/fastsam/predict.md +++ b/docs/en/reference/models/fastsam/predict.md @@ -9,7 +9,7 @@ keywords: Ultralytics, Fast SAM Predictor, YOLO, segmentation, prediction, AI mo This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/fastsam/predict.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/fastsam/predict.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/fastsam/predict.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.models.fastsam.predict.FastSAMPredictor diff --git a/docs/en/reference/models/fastsam/prompt.md b/docs/en/reference/models/fastsam/prompt.md index bfc8be0eb5..295b798e29 100644 --- a/docs/en/reference/models/fastsam/prompt.md +++ b/docs/en/reference/models/fastsam/prompt.md @@ -9,7 +9,7 @@ keywords: Ultralytics, FastSAM, image annotation, image visualization, FastSAMPr This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/fastsam/prompt.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/fastsam/prompt.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/fastsam/prompt.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.models.fastsam.prompt.FastSAMPrompt diff --git a/docs/en/reference/models/fastsam/utils.md b/docs/en/reference/models/fastsam/utils.md index 0f223a7b31..43c5617c21 100644 --- a/docs/en/reference/models/fastsam/utils.md +++ b/docs/en/reference/models/fastsam/utils.md @@ -9,11 +9,11 @@ keywords: FastSAM, bounding boxes, IoU, Ultralytics, image processing, computer This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/fastsam/utils.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/fastsam/utils.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/fastsam/utils.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.models.fastsam.utils.adjust_bboxes_to_image_border -

+



## ::: ultralytics.models.fastsam.utils.bbox_iou diff --git a/docs/en/reference/models/fastsam/val.md b/docs/en/reference/models/fastsam/val.md index b6eb03250c..e37aa7442e 100644 --- a/docs/en/reference/models/fastsam/val.md +++ b/docs/en/reference/models/fastsam/val.md @@ -9,7 +9,7 @@ keywords: FastSAM Validator, Ultralytics, YOLO, segmentation, validation, metric This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/fastsam/val.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/fastsam/val.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/fastsam/val.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.models.fastsam.val.FastSAMValidator diff --git a/docs/en/reference/models/nas/model.md b/docs/en/reference/models/nas/model.md index 8de3c9117f..a7f71960b8 100644 --- a/docs/en/reference/models/nas/model.md +++ b/docs/en/reference/models/nas/model.md @@ -9,7 +9,7 @@ keywords: Ultralytics, YOLO, YOLO-NAS, object detection, pre-trained models, mac This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/nas/model.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/nas/model.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/nas/model.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.models.nas.model.NAS diff --git a/docs/en/reference/models/nas/predict.md b/docs/en/reference/models/nas/predict.md index 2ce3efd0ff..476402528d 100644 --- a/docs/en/reference/models/nas/predict.md +++ b/docs/en/reference/models/nas/predict.md @@ -9,7 +9,7 @@ keywords: Ultralytics, YOLO, NASPredictor, object detection, machine learning, A This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/nas/predict.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/nas/predict.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/nas/predict.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.models.nas.predict.NASPredictor diff --git a/docs/en/reference/models/nas/val.md b/docs/en/reference/models/nas/val.md index 39a5c15675..1a3b7da3c2 100644 --- a/docs/en/reference/models/nas/val.md +++ b/docs/en/reference/models/nas/val.md @@ -9,7 +9,7 @@ keywords: Ultralytics, YOLO, NASValidator, object detection, non-maximum suppres This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/nas/val.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/nas/val.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/nas/val.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.models.nas.val.NASValidator diff --git a/docs/en/reference/models/rtdetr/model.md b/docs/en/reference/models/rtdetr/model.md index b6bf9042ea..620eaf7ffe 100644 --- a/docs/en/reference/models/rtdetr/model.md +++ b/docs/en/reference/models/rtdetr/model.md @@ -9,7 +9,7 @@ keywords: RT-DETR, real-time object detection, Vision Transformer, Ultralytics, This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/rtdetr/model.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/rtdetr/model.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/rtdetr/model.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.models.rtdetr.model.RTDETR diff --git a/docs/en/reference/models/rtdetr/predict.md b/docs/en/reference/models/rtdetr/predict.md index 06980b6079..8786346ebf 100644 --- a/docs/en/reference/models/rtdetr/predict.md +++ b/docs/en/reference/models/rtdetr/predict.md @@ -9,7 +9,7 @@ keywords: RTDETRPredictor, Ultralytics, Real-Time Detection Transformer, object This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/rtdetr/predict.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/rtdetr/predict.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/rtdetr/predict.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.models.rtdetr.predict.RTDETRPredictor diff --git a/docs/en/reference/models/rtdetr/train.md b/docs/en/reference/models/rtdetr/train.md index 5210254dcc..52dbb174a4 100644 --- a/docs/en/reference/models/rtdetr/train.md +++ b/docs/en/reference/models/rtdetr/train.md @@ -9,7 +9,7 @@ keywords: RTDETRTrainer, real-time object detection, Vision Transformers, YOLO, This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/rtdetr/train.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/rtdetr/train.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/rtdetr/train.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.models.rtdetr.train.RTDETRTrainer diff --git a/docs/en/reference/models/rtdetr/val.md b/docs/en/reference/models/rtdetr/val.md index f4ee247071..525b300dd2 100644 --- a/docs/en/reference/models/rtdetr/val.md +++ b/docs/en/reference/models/rtdetr/val.md @@ -9,11 +9,11 @@ keywords: RTDETR, Ultralytics, object detection, tracking, YOLO, RTDETRDataset, This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/rtdetr/val.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/rtdetr/val.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/rtdetr/val.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.models.rtdetr.val.RTDETRDataset -

+



## ::: ultralytics.models.rtdetr.val.RTDETRValidator diff --git a/docs/en/reference/models/sam/amg.md b/docs/en/reference/models/sam/amg.md index c20818d279..8c16850427 100644 --- a/docs/en/reference/models/sam/amg.md +++ b/docs/en/reference/models/sam/amg.md @@ -9,47 +9,47 @@ keywords: Ultralytics, SAM, AMG, API Reference, models, mask stability, crop box This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/sam/amg.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/sam/amg.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/sam/amg.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.models.sam.amg.is_box_near_crop_edge -

+



## ::: ultralytics.models.sam.amg.batch_iterator -

+



## ::: ultralytics.models.sam.amg.calculate_stability_score -

+



## ::: ultralytics.models.sam.amg.build_point_grid -

+



## ::: ultralytics.models.sam.amg.build_all_layer_point_grids -

+



## ::: ultralytics.models.sam.amg.generate_crop_boxes -

+



## ::: ultralytics.models.sam.amg.uncrop_boxes_xyxy -

+



## ::: ultralytics.models.sam.amg.uncrop_points -

+



## ::: ultralytics.models.sam.amg.uncrop_masks -

+



## ::: ultralytics.models.sam.amg.remove_small_regions -

+



## ::: ultralytics.models.sam.amg.batched_mask_to_box diff --git a/docs/en/reference/models/sam/build.md b/docs/en/reference/models/sam/build.md index f5d25e86da..c5a66c656b 100644 --- a/docs/en/reference/models/sam/build.md +++ b/docs/en/reference/models/sam/build.md @@ -9,27 +9,27 @@ keywords: Ultralytics, SAM model, Segment Anything Model, SAM ViT, Mobile-SAM, m This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/sam/build.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/sam/build.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/sam/build.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.models.sam.build.build_sam_vit_h -

+



## ::: ultralytics.models.sam.build.build_sam_vit_l -

+



## ::: ultralytics.models.sam.build.build_sam_vit_b -

+



## ::: ultralytics.models.sam.build.build_mobile_sam -

+



## ::: ultralytics.models.sam.build._build_sam -

+



## ::: ultralytics.models.sam.build.build_sam diff --git a/docs/en/reference/models/sam/model.md b/docs/en/reference/models/sam/model.md index 1d01cb3080..2255ad8829 100644 --- a/docs/en/reference/models/sam/model.md +++ b/docs/en/reference/models/sam/model.md @@ -9,7 +9,7 @@ keywords: Ultralytics, SAM, Segment Anything Model, image segmentation, real-tim This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/sam/model.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/sam/model.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/sam/model.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.models.sam.model.SAM diff --git a/docs/en/reference/models/sam/modules/decoders.md b/docs/en/reference/models/sam/modules/decoders.md index 4c7bceafa9..ff3aaa9e7a 100644 --- a/docs/en/reference/models/sam/modules/decoders.md +++ b/docs/en/reference/models/sam/modules/decoders.md @@ -9,11 +9,11 @@ keywords: Ultralytics, MaskDecoder, MLP, machine learning, transformer architect This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/sam/modules/decoders.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/sam/modules/decoders.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/sam/modules/decoders.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.models.sam.modules.decoders.MaskDecoder -

+



## ::: ultralytics.models.sam.modules.decoders.MLP diff --git a/docs/en/reference/models/sam/modules/encoders.md b/docs/en/reference/models/sam/modules/encoders.md index f8be7f1b02..4e839e4794 100644 --- a/docs/en/reference/models/sam/modules/encoders.md +++ b/docs/en/reference/models/sam/modules/encoders.md @@ -9,43 +9,43 @@ keywords: Ultralytics, SAM encoder, ImageEncoderViT, PromptEncoder, PositionEmbe This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/sam/modules/encoders.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/sam/modules/encoders.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/sam/modules/encoders.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.models.sam.modules.encoders.ImageEncoderViT -

+



## ::: ultralytics.models.sam.modules.encoders.PromptEncoder -

+



## ::: ultralytics.models.sam.modules.encoders.PositionEmbeddingRandom -

+



## ::: ultralytics.models.sam.modules.encoders.Block -

+



## ::: ultralytics.models.sam.modules.encoders.Attention -

+



## ::: ultralytics.models.sam.modules.encoders.PatchEmbed -

+



## ::: ultralytics.models.sam.modules.encoders.window_partition -

+



## ::: ultralytics.models.sam.modules.encoders.window_unpartition -

+



## ::: ultralytics.models.sam.modules.encoders.get_rel_pos -

+



## ::: ultralytics.models.sam.modules.encoders.add_decomposed_rel_pos diff --git a/docs/en/reference/models/sam/modules/sam.md b/docs/en/reference/models/sam/modules/sam.md index d0cefde4f1..5a7c30e42b 100644 --- a/docs/en/reference/models/sam/modules/sam.md +++ b/docs/en/reference/models/sam/modules/sam.md @@ -9,7 +9,7 @@ keywords: Ultralytics, Sam Module, object segmentation, image encoder, mask deco This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/sam/modules/sam.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/sam/modules/sam.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/sam/modules/sam.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.models.sam.modules.sam.Sam diff --git a/docs/en/reference/models/sam/modules/tiny_encoder.md b/docs/en/reference/models/sam/modules/tiny_encoder.md index dcbed97043..a5cac0529a 100644 --- a/docs/en/reference/models/sam/modules/tiny_encoder.md +++ b/docs/en/reference/models/sam/modules/tiny_encoder.md @@ -9,47 +9,47 @@ keywords: Ultralytics, TinyViT, Conv2d_BN, PatchEmbed, MBConv, Attention, PyTorc This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/sam/modules/tiny_encoder.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/sam/modules/tiny_encoder.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/sam/modules/tiny_encoder.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.models.sam.modules.tiny_encoder.Conv2d_BN -

+



## ::: ultralytics.models.sam.modules.tiny_encoder.PatchEmbed -

+



## ::: ultralytics.models.sam.modules.tiny_encoder.MBConv -

+



## ::: ultralytics.models.sam.modules.tiny_encoder.PatchMerging -

+



## ::: ultralytics.models.sam.modules.tiny_encoder.ConvLayer -

+



## ::: ultralytics.models.sam.modules.tiny_encoder.Mlp -

+



## ::: ultralytics.models.sam.modules.tiny_encoder.Attention -

+



## ::: ultralytics.models.sam.modules.tiny_encoder.TinyViTBlock -

+



## ::: ultralytics.models.sam.modules.tiny_encoder.BasicLayer -

+



## ::: ultralytics.models.sam.modules.tiny_encoder.LayerNorm2d -

+



## ::: ultralytics.models.sam.modules.tiny_encoder.TinyViT diff --git a/docs/en/reference/models/sam/modules/transformer.md b/docs/en/reference/models/sam/modules/transformer.md index 8c258bfca8..ba190a43e3 100644 --- a/docs/en/reference/models/sam/modules/transformer.md +++ b/docs/en/reference/models/sam/modules/transformer.md @@ -9,15 +9,15 @@ keywords: Ultralytics, TwoWayTransformer, module, deep learning, transformer, ob This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/sam/modules/transformer.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/sam/modules/transformer.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/sam/modules/transformer.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.models.sam.modules.transformer.TwoWayTransformer -

+



## ::: ultralytics.models.sam.modules.transformer.TwoWayAttentionBlock -

+



## ::: ultralytics.models.sam.modules.transformer.Attention diff --git a/docs/en/reference/models/sam/predict.md b/docs/en/reference/models/sam/predict.md index 84d1ba8893..6c35fdc005 100644 --- a/docs/en/reference/models/sam/predict.md +++ b/docs/en/reference/models/sam/predict.md @@ -9,7 +9,7 @@ keywords: Ultralytics, SAM, Segment Anything Model, image segmentation, real-tim This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/sam/predict.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/sam/predict.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/sam/predict.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.models.sam.predict.Predictor diff --git a/docs/en/reference/models/utils/loss.md b/docs/en/reference/models/utils/loss.md index 827803bfb8..8092d71a92 100644 --- a/docs/en/reference/models/utils/loss.md +++ b/docs/en/reference/models/utils/loss.md @@ -9,11 +9,11 @@ keywords: ultralytics, YOLO, DETR, RT-DETR, loss functions, object detection, de This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/utils/loss.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/utils/loss.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/utils/loss.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.models.utils.loss.DETRLoss -

+



## ::: ultralytics.models.utils.loss.RTDETRDetectionLoss diff --git a/docs/en/reference/models/utils/ops.md b/docs/en/reference/models/utils/ops.md index 61fc29270b..2aa12614e5 100644 --- a/docs/en/reference/models/utils/ops.md +++ b/docs/en/reference/models/utils/ops.md @@ -9,11 +9,11 @@ keywords: Ultralytics, models, utils, operations, HungarianMatcher, get_cdn_grou This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/utils/ops.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/utils/ops.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/utils/ops.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.models.utils.ops.HungarianMatcher -

+



## ::: ultralytics.models.utils.ops.get_cdn_group diff --git a/docs/en/reference/models/yolo/classify/predict.md b/docs/en/reference/models/yolo/classify/predict.md index 8e802dc0e9..2f53cfcbcf 100644 --- a/docs/en/reference/models/yolo/classify/predict.md +++ b/docs/en/reference/models/yolo/classify/predict.md @@ -9,7 +9,7 @@ keywords: YOLO, ClassificationPredictor, Ultralytics, model prediction, preproce This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/classify/predict.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/classify/predict.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/yolo/classify/predict.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.models.yolo.classify.predict.ClassificationPredictor diff --git a/docs/en/reference/models/yolo/classify/train.md b/docs/en/reference/models/yolo/classify/train.md index 9bd3b80447..c2da7eecf6 100644 --- a/docs/en/reference/models/yolo/classify/train.md +++ b/docs/en/reference/models/yolo/classify/train.md @@ -9,7 +9,7 @@ keywords: YOLO, Ultralytics, classification, training, machine learning, deep le This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/classify/train.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/classify/train.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/yolo/classify/train.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.models.yolo.classify.train.ClassificationTrainer diff --git a/docs/en/reference/models/yolo/classify/val.md b/docs/en/reference/models/yolo/classify/val.md index b8c04c3327..5afd20fb1a 100644 --- a/docs/en/reference/models/yolo/classify/val.md +++ b/docs/en/reference/models/yolo/classify/val.md @@ -9,7 +9,7 @@ keywords: Ultralytics, YOLO, classification, validation, ClassifyMetrics, Confus This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/classify/val.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/classify/val.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/yolo/classify/val.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.models.yolo.classify.val.ClassificationValidator diff --git a/docs/en/reference/models/yolo/detect/predict.md b/docs/en/reference/models/yolo/detect/predict.md index 6548817dfb..3ad1182b38 100644 --- a/docs/en/reference/models/yolo/detect/predict.md +++ b/docs/en/reference/models/yolo/detect/predict.md @@ -9,7 +9,7 @@ keywords: YOLO, Ultralytics, DetectionPredictor, object detection, Python, machi This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/detect/predict.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/detect/predict.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/yolo/detect/predict.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.models.yolo.detect.predict.DetectionPredictor diff --git a/docs/en/reference/models/yolo/detect/train.md b/docs/en/reference/models/yolo/detect/train.md index 081a35862d..906449b2fe 100644 --- a/docs/en/reference/models/yolo/detect/train.md +++ b/docs/en/reference/models/yolo/detect/train.md @@ -9,7 +9,7 @@ keywords: Ultralytics, YOLO, DetectionTrainer, training, object detection, machi This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/detect/train.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/detect/train.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/yolo/detect/train.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.models.yolo.detect.train.DetectionTrainer diff --git a/docs/en/reference/models/yolo/detect/val.md b/docs/en/reference/models/yolo/detect/val.md index 8cf8b218cc..38a235512c 100644 --- a/docs/en/reference/models/yolo/detect/val.md +++ b/docs/en/reference/models/yolo/detect/val.md @@ -9,7 +9,7 @@ keywords: YOLO validation, detection validation, YOLO metrics, Ultralytics, obje This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/detect/val.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/detect/val.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/yolo/detect/val.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.models.yolo.detect.val.DetectionValidator diff --git a/docs/en/reference/models/yolo/model.md b/docs/en/reference/models/yolo/model.md index 71a6b3e815..712a86e5f7 100644 --- a/docs/en/reference/models/yolo/model.md +++ b/docs/en/reference/models/yolo/model.md @@ -9,11 +9,11 @@ keywords: YOLO, object detection, Ultralytics, YOLO model, machine learning, Pyt This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/model.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/model.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/yolo/model.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.models.yolo.model.YOLO -

+



## ::: ultralytics.models.yolo.model.YOLOWorld diff --git a/docs/en/reference/models/yolo/obb/predict.md b/docs/en/reference/models/yolo/obb/predict.md index a3914cd6c7..350d12e0d1 100644 --- a/docs/en/reference/models/yolo/obb/predict.md +++ b/docs/en/reference/models/yolo/obb/predict.md @@ -9,7 +9,7 @@ keywords: Ultralytics, YOLO, OBBPredictor, oriented bounding box, object detecti This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/obb/predict.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/obb/predict.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/yolo/obb/predict.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.models.yolo.obb.predict.OBBPredictor diff --git a/docs/en/reference/models/yolo/obb/train.md b/docs/en/reference/models/yolo/obb/train.md index 6f45aecb52..f0f30fa09c 100644 --- a/docs/en/reference/models/yolo/obb/train.md +++ b/docs/en/reference/models/yolo/obb/train.md @@ -9,7 +9,7 @@ keywords: Ultralytics, YOLO, OBB Trainer, Oriented Bounding Box, Machine Learnin This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/obb/train.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/obb/train.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/yolo/obb/train.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.models.yolo.obb.train.OBBTrainer diff --git a/docs/en/reference/models/yolo/obb/val.md b/docs/en/reference/models/yolo/obb/val.md index c0022b0bc0..6be5bd9559 100644 --- a/docs/en/reference/models/yolo/obb/val.md +++ b/docs/en/reference/models/yolo/obb/val.md @@ -9,7 +9,7 @@ keywords: Ultralytics, YOLO, OBBValidator, Oriented Bounding Boxes, DetectionVal This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/obb/val.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/obb/val.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/yolo/obb/val.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.models.yolo.obb.val.OBBValidator diff --git a/docs/en/reference/models/yolo/pose/predict.md b/docs/en/reference/models/yolo/pose/predict.md index dc0323498c..961f03fd57 100644 --- a/docs/en/reference/models/yolo/pose/predict.md +++ b/docs/en/reference/models/yolo/pose/predict.md @@ -9,7 +9,7 @@ keywords: YOLO, Pose Prediction, Ultralytics, PosePredictor, YOLOv8, Machine Lea This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/pose/predict.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/pose/predict.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/yolo/pose/predict.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.models.yolo.pose.predict.PosePredictor diff --git a/docs/en/reference/models/yolo/pose/train.md b/docs/en/reference/models/yolo/pose/train.md index 695f9126d3..1aed5d02cd 100644 --- a/docs/en/reference/models/yolo/pose/train.md +++ b/docs/en/reference/models/yolo/pose/train.md @@ -9,7 +9,7 @@ keywords: PoseTrainer, YOLO, Ultralytics, pose models, training, model configura This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/pose/train.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/pose/train.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/yolo/pose/train.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.models.yolo.pose.train.PoseTrainer diff --git a/docs/en/reference/models/yolo/pose/val.md b/docs/en/reference/models/yolo/pose/val.md index 133b51555c..fd4166d97b 100644 --- a/docs/en/reference/models/yolo/pose/val.md +++ b/docs/en/reference/models/yolo/pose/val.md @@ -9,7 +9,7 @@ keywords: Ultralytics, YOLO, PoseValidator, pose validation, machine learning, o This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/pose/val.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/pose/val.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/yolo/pose/val.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.models.yolo.pose.val.PoseValidator diff --git a/docs/en/reference/models/yolo/segment/predict.md b/docs/en/reference/models/yolo/segment/predict.md index f30d5fbcdf..f075b89da6 100644 --- a/docs/en/reference/models/yolo/segment/predict.md +++ b/docs/en/reference/models/yolo/segment/predict.md @@ -9,7 +9,7 @@ keywords: YOLO, SegmentationPredictor, machine learning, computer vision, object This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/segment/predict.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/segment/predict.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/yolo/segment/predict.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.models.yolo.segment.predict.SegmentationPredictor diff --git a/docs/en/reference/models/yolo/segment/train.md b/docs/en/reference/models/yolo/segment/train.md index 342eeeec13..844b19cad7 100644 --- a/docs/en/reference/models/yolo/segment/train.md +++ b/docs/en/reference/models/yolo/segment/train.md @@ -9,7 +9,7 @@ keywords: YOLO, segmentation, train, Ultralytics, SegmentationTrainer, Python, m This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/segment/train.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/segment/train.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/yolo/segment/train.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.models.yolo.segment.train.SegmentationTrainer diff --git a/docs/en/reference/models/yolo/segment/val.md b/docs/en/reference/models/yolo/segment/val.md index e630939da6..37c7672570 100644 --- a/docs/en/reference/models/yolo/segment/val.md +++ b/docs/en/reference/models/yolo/segment/val.md @@ -9,7 +9,7 @@ keywords: YOLO, segmentation, validator, Ultralytics, model validation, machine This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/segment/val.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/segment/val.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/yolo/segment/val.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.models.yolo.segment.val.SegmentationValidator diff --git a/docs/en/reference/models/yolo/world/train.md b/docs/en/reference/models/yolo/world/train.md index 63624fce98..d92cb60b5d 100644 --- a/docs/en/reference/models/yolo/world/train.md +++ b/docs/en/reference/models/yolo/world/train.md @@ -9,11 +9,11 @@ keywords: Ultralytics, YOLO, World Model, training, deep learning, computer visi This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/world/train.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/world/train.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/yolo/world/train.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.models.yolo.world.train.WorldTrainer -

+



## ::: ultralytics.models.yolo.world.train.on_pretrain_routine_end diff --git a/docs/en/reference/models/yolo/world/train_world.md b/docs/en/reference/models/yolo/world/train_world.md index 27e0ce778b..06dd62356b 100644 --- a/docs/en/reference/models/yolo/world/train_world.md +++ b/docs/en/reference/models/yolo/world/train_world.md @@ -9,7 +9,7 @@ keywords: YOLO, WorldTrainer, open-set datasets, training, evaluation, build dat This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/world/train_world.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/yolo/world/train_world.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/models/yolo/world/train_world.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.models.yolo.world.train_world.WorldTrainerFromScratch diff --git a/docs/en/reference/nn/autobackend.md b/docs/en/reference/nn/autobackend.md index 3e8c2f7a22..b24fb5956e 100644 --- a/docs/en/reference/nn/autobackend.md +++ b/docs/en/reference/nn/autobackend.md @@ -9,15 +9,15 @@ keywords: Ultralytics, AutoBackend, check_class_names, YOLO, YOLO models, optimi This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/nn/autobackend.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/nn/autobackend.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/nn/autobackend.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.nn.autobackend.AutoBackend -

+



## ::: ultralytics.nn.autobackend.check_class_names -

+



## ::: ultralytics.nn.autobackend.default_class_names diff --git a/docs/en/reference/nn/modules/block.md b/docs/en/reference/nn/modules/block.md index 0521ccb543..160355712b 100644 --- a/docs/en/reference/nn/modules/block.md +++ b/docs/en/reference/nn/modules/block.md @@ -9,159 +9,159 @@ keywords: Ultralytics, YOLO, neural networks, block modules, DFL, Proto, HGStem, This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/nn/modules/block.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/nn/modules/block.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/nn/modules/block.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.nn.modules.block.DFL -

+



## ::: ultralytics.nn.modules.block.Proto -

+



## ::: ultralytics.nn.modules.block.HGStem -

+



## ::: ultralytics.nn.modules.block.HGBlock -

+



## ::: ultralytics.nn.modules.block.SPP -

+



## ::: ultralytics.nn.modules.block.SPPF -

+



## ::: ultralytics.nn.modules.block.C1 -

+



## ::: ultralytics.nn.modules.block.C2 -

+



## ::: ultralytics.nn.modules.block.C2f -

+



## ::: ultralytics.nn.modules.block.C3 -

+



## ::: ultralytics.nn.modules.block.C3x -

+



## ::: ultralytics.nn.modules.block.RepC3 -

+



## ::: ultralytics.nn.modules.block.C3TR -

+



## ::: ultralytics.nn.modules.block.C3Ghost -

+



## ::: ultralytics.nn.modules.block.GhostBottleneck -

+



## ::: ultralytics.nn.modules.block.Bottleneck -

+



## ::: ultralytics.nn.modules.block.BottleneckCSP -

+



## ::: ultralytics.nn.modules.block.ResNetBlock -

+



## ::: ultralytics.nn.modules.block.ResNetLayer -

+



## ::: ultralytics.nn.modules.block.MaxSigmoidAttnBlock -

+



## ::: ultralytics.nn.modules.block.C2fAttn -

+



## ::: ultralytics.nn.modules.block.ImagePoolingAttn -

+



## ::: ultralytics.nn.modules.block.ContrastiveHead -

+



## ::: ultralytics.nn.modules.block.BNContrastiveHead -

+



## ::: ultralytics.nn.modules.block.RepBottleneck -

+



## ::: ultralytics.nn.modules.block.RepCSP -

+



## ::: ultralytics.nn.modules.block.RepNCSPELAN4 -

+



## ::: ultralytics.nn.modules.block.ELAN1 -

+



## ::: ultralytics.nn.modules.block.AConv -

+



## ::: ultralytics.nn.modules.block.ADown -

+



## ::: ultralytics.nn.modules.block.SPPELAN -

+



## ::: ultralytics.nn.modules.block.CBLinear -

+



## ::: ultralytics.nn.modules.block.CBFuse -

+



## ::: ultralytics.nn.modules.block.RepVGGDW -

+



## ::: ultralytics.nn.modules.block.CIB -

+



## ::: ultralytics.nn.modules.block.C2fCIB -

+



## ::: ultralytics.nn.modules.block.Attention -

+



## ::: ultralytics.nn.modules.block.PSA -

+



## ::: ultralytics.nn.modules.block.SCDown diff --git a/docs/en/reference/nn/modules/conv.md b/docs/en/reference/nn/modules/conv.md index 501a0b5a4b..5912614758 100644 --- a/docs/en/reference/nn/modules/conv.md +++ b/docs/en/reference/nn/modules/conv.md @@ -9,59 +9,59 @@ keywords: Ultralytics, convolution modules, Conv, LightConv, GhostConv, YOLO, de This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/nn/modules/conv.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/nn/modules/conv.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/nn/modules/conv.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.nn.modules.conv.Conv -

+



## ::: ultralytics.nn.modules.conv.Conv2 -

+



## ::: ultralytics.nn.modules.conv.LightConv -

+



## ::: ultralytics.nn.modules.conv.DWConv -

+



## ::: ultralytics.nn.modules.conv.DWConvTranspose2d -

+



## ::: ultralytics.nn.modules.conv.ConvTranspose -

+



## ::: ultralytics.nn.modules.conv.Focus -

+



## ::: ultralytics.nn.modules.conv.GhostConv -

+



## ::: ultralytics.nn.modules.conv.RepConv -

+



## ::: ultralytics.nn.modules.conv.ChannelAttention -

+



## ::: ultralytics.nn.modules.conv.SpatialAttention -

+



## ::: ultralytics.nn.modules.conv.CBAM -

+



## ::: ultralytics.nn.modules.conv.Concat -

+



## ::: ultralytics.nn.modules.conv.autopad diff --git a/docs/en/reference/nn/modules/head.md b/docs/en/reference/nn/modules/head.md index 21e994f4d8..310f8b9141 100644 --- a/docs/en/reference/nn/modules/head.md +++ b/docs/en/reference/nn/modules/head.md @@ -9,35 +9,35 @@ keywords: Ultralytics, YOLO, Detection, Pose, RTDETRDecoder, nn modules, guides This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/nn/modules/head.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/nn/modules/head.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/nn/modules/head.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.nn.modules.head.Detect -

+



## ::: ultralytics.nn.modules.head.Segment -

+



## ::: ultralytics.nn.modules.head.OBB -

+



## ::: ultralytics.nn.modules.head.Pose -

+



## ::: ultralytics.nn.modules.head.Classify -

+



## ::: ultralytics.nn.modules.head.WorldDetect -

+



## ::: ultralytics.nn.modules.head.RTDETRDecoder -

+



## ::: ultralytics.nn.modules.head.v10Detect diff --git a/docs/en/reference/nn/modules/transformer.md b/docs/en/reference/nn/modules/transformer.md index 91904be542..6e38675b20 100644 --- a/docs/en/reference/nn/modules/transformer.md +++ b/docs/en/reference/nn/modules/transformer.md @@ -9,43 +9,43 @@ keywords: Ultralytics, Ultralytics documentation, TransformerEncoderLayer, Trans This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/nn/modules/transformer.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/nn/modules/transformer.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/nn/modules/transformer.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.nn.modules.transformer.TransformerEncoderLayer -

+



## ::: ultralytics.nn.modules.transformer.AIFI -

+



## ::: ultralytics.nn.modules.transformer.TransformerLayer -

+



## ::: ultralytics.nn.modules.transformer.TransformerBlock -

+



## ::: ultralytics.nn.modules.transformer.MLPBlock -

+



## ::: ultralytics.nn.modules.transformer.MLP -

+



## ::: ultralytics.nn.modules.transformer.LayerNorm2d -

+



## ::: ultralytics.nn.modules.transformer.MSDeformAttn -

+



## ::: ultralytics.nn.modules.transformer.DeformableTransformerDecoderLayer -

+



## ::: ultralytics.nn.modules.transformer.DeformableTransformerDecoder diff --git a/docs/en/reference/nn/modules/utils.md b/docs/en/reference/nn/modules/utils.md index eb17c2df73..3c747359b9 100644 --- a/docs/en/reference/nn/modules/utils.md +++ b/docs/en/reference/nn/modules/utils.md @@ -9,23 +9,23 @@ keywords: Ultralytics, PyTorch, utils, initialization, inverse sigmoid, multisca This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/nn/modules/utils.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/nn/modules/utils.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/nn/modules/utils.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.nn.modules.utils._get_clones -

+



## ::: ultralytics.nn.modules.utils.bias_init_with_prob -

+



## ::: ultralytics.nn.modules.utils.linear_init -

+



## ::: ultralytics.nn.modules.utils.inverse_sigmoid -

+



## ::: ultralytics.nn.modules.utils.multi_scale_deformable_attn_pytorch diff --git a/docs/en/reference/nn/tasks.md b/docs/en/reference/nn/tasks.md index 66d2e0e1bc..e528a09f71 100644 --- a/docs/en/reference/nn/tasks.md +++ b/docs/en/reference/nn/tasks.md @@ -9,71 +9,71 @@ keywords: Ultralytics, YOLO, nn tasks, DetectionModel, PoseModel, RTDETRDetectio This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/nn/tasks.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/nn/tasks.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/nn/tasks.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.nn.tasks.BaseModel -

+



## ::: ultralytics.nn.tasks.DetectionModel -

+



## ::: ultralytics.nn.tasks.OBBModel -

+



## ::: ultralytics.nn.tasks.SegmentationModel -

+



## ::: ultralytics.nn.tasks.PoseModel -

+



## ::: ultralytics.nn.tasks.ClassificationModel -

+



## ::: ultralytics.nn.tasks.RTDETRDetectionModel -

+



## ::: ultralytics.nn.tasks.WorldModel -

+



## ::: ultralytics.nn.tasks.Ensemble -

+



## ::: ultralytics.nn.tasks.temporary_modules -

+



## ::: ultralytics.nn.tasks.torch_safe_load -

+



## ::: ultralytics.nn.tasks.attempt_load_weights -

+



## ::: ultralytics.nn.tasks.attempt_load_one_weight -

+



## ::: ultralytics.nn.tasks.parse_model -

+



## ::: ultralytics.nn.tasks.yaml_model_load -

+



## ::: ultralytics.nn.tasks.guess_model_scale -

+



## ::: ultralytics.nn.tasks.guess_model_task diff --git a/docs/en/reference/solutions/ai_gym.md b/docs/en/reference/solutions/ai_gym.md index 963a7ee9e8..e8126ae481 100644 --- a/docs/en/reference/solutions/ai_gym.md +++ b/docs/en/reference/solutions/ai_gym.md @@ -9,7 +9,7 @@ keywords: Ultralytics, AI Gym, YOLO, pose detection, gym step counting, real-tim This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/solutions/ai_gym.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/solutions/ai_gym.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/solutions/ai_gym.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.solutions.ai_gym.AIGym diff --git a/docs/en/reference/solutions/analytics.md b/docs/en/reference/solutions/analytics.md index 5fc2791360..b6ab9b9939 100644 --- a/docs/en/reference/solutions/analytics.md +++ b/docs/en/reference/solutions/analytics.md @@ -9,7 +9,7 @@ keywords: Ultralytics, Analytics, Python, visual analytics, line chart, bar char This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/solutions/analytics.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/solutions/analytics.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/solutions/analytics.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.solutions.analytics.Analytics diff --git a/docs/en/reference/solutions/distance_calculation.md b/docs/en/reference/solutions/distance_calculation.md index d663fd7358..ec104e6cce 100644 --- a/docs/en/reference/solutions/distance_calculation.md +++ b/docs/en/reference/solutions/distance_calculation.md @@ -9,7 +9,7 @@ keywords: Ultralytics, distance calculation, object tracking, real-time video, c This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/solutions/distance_calculation.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/solutions/distance_calculation.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/solutions/distance_calculation.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.solutions.distance_calculation.DistanceCalculation diff --git a/docs/en/reference/solutions/heatmap.md b/docs/en/reference/solutions/heatmap.md index 4acdc9fdc2..ae3ec53934 100644 --- a/docs/en/reference/solutions/heatmap.md +++ b/docs/en/reference/solutions/heatmap.md @@ -9,7 +9,7 @@ keywords: Ultralytics, Heatmap, Python, Real-time Video, Object Tracking, cv2, S This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/solutions/heatmap.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/solutions/heatmap.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/solutions/heatmap.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.solutions.heatmap.Heatmap diff --git a/docs/en/reference/solutions/object_counter.md b/docs/en/reference/solutions/object_counter.md index 0520f3b5b3..ff7c81d34d 100644 --- a/docs/en/reference/solutions/object_counter.md +++ b/docs/en/reference/solutions/object_counter.md @@ -9,7 +9,7 @@ keywords: Ultralytics, Object Counter, Real-time Tracking, Video Stream, Python, This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/solutions/object_counter.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/solutions/object_counter.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/solutions/object_counter.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.solutions.object_counter.ObjectCounter diff --git a/docs/en/reference/solutions/parking_management.md b/docs/en/reference/solutions/parking_management.md index 18ddfa11f7..112b46aabd 100644 --- a/docs/en/reference/solutions/parking_management.md +++ b/docs/en/reference/solutions/parking_management.md @@ -9,11 +9,11 @@ keywords: Ultralytics, YOLO, parking management, computer vision, parking monito This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/solutions/parking_management.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/solutions/parking_management.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/solutions/parking_management.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.solutions.parking_management.ParkingPtsSelection -

+



## ::: ultralytics.solutions.parking_management.ParkingManagement diff --git a/docs/en/reference/solutions/queue_management.md b/docs/en/reference/solutions/queue_management.md index b4ff393031..3214ef2177 100644 --- a/docs/en/reference/solutions/queue_management.md +++ b/docs/en/reference/solutions/queue_management.md @@ -9,7 +9,7 @@ keywords: Ultralytics, queue management, object tracking, real-time video, Pytho This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/solutions/queue_management.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/solutions/queue_management.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/solutions/queue_management.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.solutions.queue_management.QueueManager diff --git a/docs/en/reference/solutions/speed_estimation.md b/docs/en/reference/solutions/speed_estimation.md index ce28e73c7c..d21b13bed1 100644 --- a/docs/en/reference/solutions/speed_estimation.md +++ b/docs/en/reference/solutions/speed_estimation.md @@ -9,7 +9,7 @@ keywords: Ultralytics, speed estimation, YOLO, real-time tracking, object tracki This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/solutions/speed_estimation.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/solutions/speed_estimation.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/solutions/speed_estimation.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.solutions.speed_estimation.SpeedEstimator diff --git a/docs/en/reference/solutions/streamlit_inference.md b/docs/en/reference/solutions/streamlit_inference.md index f31e477154..a89d709580 100644 --- a/docs/en/reference/solutions/streamlit_inference.md +++ b/docs/en/reference/solutions/streamlit_inference.md @@ -9,7 +9,7 @@ keywords: Ultralytics, YOLOv8, live inference, real-time object detection, Strea This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/solutions/streamlit_inference.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/solutions/streamlit_inference.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/solutions/streamlit_inference.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.solutions.streamlit_inference.inference diff --git a/docs/en/reference/trackers/basetrack.md b/docs/en/reference/trackers/basetrack.md index 3a70cb13d7..aa5966bd8b 100644 --- a/docs/en/reference/trackers/basetrack.md +++ b/docs/en/reference/trackers/basetrack.md @@ -9,11 +9,11 @@ keywords: Ultralytics, YOLO, object tracking, BaseTrack, TrackState, tracking me This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/trackers/basetrack.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/trackers/basetrack.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/trackers/basetrack.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.trackers.basetrack.TrackState -

+



## ::: ultralytics.trackers.basetrack.BaseTrack diff --git a/docs/en/reference/trackers/bot_sort.md b/docs/en/reference/trackers/bot_sort.md index eb0d68f194..f817afefea 100644 --- a/docs/en/reference/trackers/bot_sort.md +++ b/docs/en/reference/trackers/bot_sort.md @@ -9,11 +9,11 @@ keywords: Ultralytics, Bot SORT, BOTrack, BOTSORT, YOLOv8, object tracking, Kalm This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/trackers/bot_sort.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/trackers/bot_sort.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/trackers/bot_sort.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.trackers.bot_sort.BOTrack -

+



## ::: ultralytics.trackers.bot_sort.BOTSORT diff --git a/docs/en/reference/trackers/byte_tracker.md b/docs/en/reference/trackers/byte_tracker.md index 434dc6e87b..85eebe3d72 100644 --- a/docs/en/reference/trackers/byte_tracker.md +++ b/docs/en/reference/trackers/byte_tracker.md @@ -9,11 +9,11 @@ keywords: Ultralytics, BYTETracker, object tracking, Kalman filter, YOLOv8, docu This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/trackers/byte_tracker.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/trackers/byte_tracker.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/trackers/byte_tracker.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.trackers.byte_tracker.STrack -

+



## ::: ultralytics.trackers.byte_tracker.BYTETracker diff --git a/docs/en/reference/trackers/track.md b/docs/en/reference/trackers/track.md index 39a60f4b07..a98f04e09c 100644 --- a/docs/en/reference/trackers/track.md +++ b/docs/en/reference/trackers/track.md @@ -9,15 +9,15 @@ keywords: Ultralytics, YOLO, object tracking, track.py, on_predict_start, on_pre This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/trackers/track.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/trackers/track.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/trackers/track.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.trackers.track.on_predict_start -

+



## ::: ultralytics.trackers.track.on_predict_postprocess_end -

+



## ::: ultralytics.trackers.track.register_tracker diff --git a/docs/en/reference/trackers/utils/gmc.md b/docs/en/reference/trackers/utils/gmc.md index fd34d868e0..382c542395 100644 --- a/docs/en/reference/trackers/utils/gmc.md +++ b/docs/en/reference/trackers/utils/gmc.md @@ -9,7 +9,7 @@ keywords: GMC, Generalized Motion Compensation, Ultralytics, tracking, object de This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/trackers/utils/gmc.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/trackers/utils/gmc.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/trackers/utils/gmc.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.trackers.utils.gmc.GMC diff --git a/docs/en/reference/trackers/utils/kalman_filter.md b/docs/en/reference/trackers/utils/kalman_filter.md index 752c8767d1..b45351c463 100644 --- a/docs/en/reference/trackers/utils/kalman_filter.md +++ b/docs/en/reference/trackers/utils/kalman_filter.md @@ -9,11 +9,11 @@ keywords: Kalman Filter, Object Tracking, Python, Ultralytics, YOLO, Bounding Bo This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/trackers/utils/kalman_filter.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/trackers/utils/kalman_filter.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/trackers/utils/kalman_filter.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.trackers.utils.kalman_filter.KalmanFilterXYAH -

+



## ::: ultralytics.trackers.utils.kalman_filter.KalmanFilterXYWH diff --git a/docs/en/reference/trackers/utils/matching.md b/docs/en/reference/trackers/utils/matching.md index d8e9bd4d35..d929ae4bf5 100644 --- a/docs/en/reference/trackers/utils/matching.md +++ b/docs/en/reference/trackers/utils/matching.md @@ -9,19 +9,19 @@ keywords: Ultralytics, matching utils, linear assignment, IoU distance, embeddin This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/trackers/utils/matching.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/trackers/utils/matching.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/trackers/utils/matching.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.trackers.utils.matching.linear_assignment -

+



## ::: ultralytics.trackers.utils.matching.iou_distance -

+



## ::: ultralytics.trackers.utils.matching.embedding_distance -

+



## ::: ultralytics.trackers.utils.matching.fuse_score diff --git a/docs/en/reference/utils/__init__.md b/docs/en/reference/utils/__init__.md index 269437ef57..264f2e504d 100644 --- a/docs/en/reference/utils/__init__.md +++ b/docs/en/reference/utils/__init__.md @@ -9,163 +9,163 @@ keywords: Ultralytics, utils, TQDM, Python, ML, Machine Learning utilities, YOLO This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/\_\_init\_\_.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/__init__.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/__init__.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.utils.TQDM -

+



## ::: ultralytics.utils.SimpleClass -

+



## ::: ultralytics.utils.IterableSimpleNamespace -

+



## ::: ultralytics.utils.ThreadingLocked -

+



## ::: ultralytics.utils.TryExcept -

+



## ::: ultralytics.utils.Retry -

+



## ::: ultralytics.utils.SettingsManager -

+



## ::: ultralytics.utils.plt_settings -

+



## ::: ultralytics.utils.set_logging -

+



## ::: ultralytics.utils.emojis -

+



## ::: ultralytics.utils.yaml_save -

+



## ::: ultralytics.utils.yaml_load -

+



## ::: ultralytics.utils.yaml_print -

+



## ::: ultralytics.utils.read_device_model -

+



## ::: ultralytics.utils.is_ubuntu -

+



## ::: ultralytics.utils.is_colab -

+



## ::: ultralytics.utils.is_kaggle -

+



## ::: ultralytics.utils.is_jupyter -

+



## ::: ultralytics.utils.is_docker -

+



## ::: ultralytics.utils.is_raspberrypi -

+



## ::: ultralytics.utils.is_jetson -

+



## ::: ultralytics.utils.is_online -

+



## ::: ultralytics.utils.is_pip_package -

+



## ::: ultralytics.utils.is_dir_writeable -

+



## ::: ultralytics.utils.is_pytest_running -

+



## ::: ultralytics.utils.is_github_action_running -

+



## ::: ultralytics.utils.get_git_dir -

+



## ::: ultralytics.utils.is_git_dir -

+



## ::: ultralytics.utils.get_git_origin_url -

+



## ::: ultralytics.utils.get_git_branch -

+



## ::: ultralytics.utils.get_default_args -

+



## ::: ultralytics.utils.get_ubuntu_version -

+



## ::: ultralytics.utils.get_user_config_dir -

+



## ::: ultralytics.utils.colorstr -

+



## ::: ultralytics.utils.remove_colorstr -

+



## ::: ultralytics.utils.threaded -

+



## ::: ultralytics.utils.set_sentry -

+



## ::: ultralytics.utils.deprecation_warn -

+



## ::: ultralytics.utils.clean_url -

+



## ::: ultralytics.utils.url2file diff --git a/docs/en/reference/utils/autobatch.md b/docs/en/reference/utils/autobatch.md index c42e121a9f..df9ecbf758 100644 --- a/docs/en/reference/utils/autobatch.md +++ b/docs/en/reference/utils/autobatch.md @@ -9,11 +9,11 @@ keywords: YOLO batch size, CUDA memory, PyTorch autobatch, Ultralytics, machine This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/autobatch.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/autobatch.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/autobatch.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.utils.autobatch.check_train_batch_size -

+



## ::: ultralytics.utils.autobatch.autobatch diff --git a/docs/en/reference/utils/benchmarks.md b/docs/en/reference/utils/benchmarks.md index 50ab51a280..0833a9bdd7 100644 --- a/docs/en/reference/utils/benchmarks.md +++ b/docs/en/reference/utils/benchmarks.md @@ -9,15 +9,15 @@ keywords: YOLO, model benchmarking, ONNX, TensorRT, PyTorch, TensorFlow, CoreML, This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/benchmarks.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/benchmarks.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/benchmarks.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.utils.benchmarks.RF100Benchmark -

+



## ::: ultralytics.utils.benchmarks.ProfileModels -

+



## ::: ultralytics.utils.benchmarks.benchmark diff --git a/docs/en/reference/utils/callbacks/base.md b/docs/en/reference/utils/callbacks/base.md index ca49be8d78..53e1b0892d 100644 --- a/docs/en/reference/utils/callbacks/base.md +++ b/docs/en/reference/utils/callbacks/base.md @@ -9,111 +9,111 @@ keywords: Ultralytics, base callbacks, training, validation, prediction, model e This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/callbacks/base.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/callbacks/base.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/callbacks/base.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.utils.callbacks.base.on_pretrain_routine_start -

+



## ::: ultralytics.utils.callbacks.base.on_pretrain_routine_end -

+



## ::: ultralytics.utils.callbacks.base.on_train_start -

+



## ::: ultralytics.utils.callbacks.base.on_train_epoch_start -

+



## ::: ultralytics.utils.callbacks.base.on_train_batch_start -

+



## ::: ultralytics.utils.callbacks.base.optimizer_step -

+



## ::: ultralytics.utils.callbacks.base.on_before_zero_grad -

+



## ::: ultralytics.utils.callbacks.base.on_train_batch_end -

+



## ::: ultralytics.utils.callbacks.base.on_train_epoch_end -

+



## ::: ultralytics.utils.callbacks.base.on_fit_epoch_end -

+



## ::: ultralytics.utils.callbacks.base.on_model_save -

+



## ::: ultralytics.utils.callbacks.base.on_train_end -

+



## ::: ultralytics.utils.callbacks.base.on_params_update -

+



## ::: ultralytics.utils.callbacks.base.teardown -

+



## ::: ultralytics.utils.callbacks.base.on_val_start -

+



## ::: ultralytics.utils.callbacks.base.on_val_batch_start -

+



## ::: ultralytics.utils.callbacks.base.on_val_batch_end -

+



## ::: ultralytics.utils.callbacks.base.on_val_end -

+



## ::: ultralytics.utils.callbacks.base.on_predict_start -

+



## ::: ultralytics.utils.callbacks.base.on_predict_batch_start -

+



## ::: ultralytics.utils.callbacks.base.on_predict_batch_end -

+



## ::: ultralytics.utils.callbacks.base.on_predict_postprocess_end -

+



## ::: ultralytics.utils.callbacks.base.on_predict_end -

+



## ::: ultralytics.utils.callbacks.base.on_export_start -

+



## ::: ultralytics.utils.callbacks.base.on_export_end -

+



## ::: ultralytics.utils.callbacks.base.get_default_callbacks -

+



## ::: ultralytics.utils.callbacks.base.add_integration_callbacks diff --git a/docs/en/reference/utils/callbacks/clearml.md b/docs/en/reference/utils/callbacks/clearml.md index 8cb749ac7a..a1fe2a3183 100644 --- a/docs/en/reference/utils/callbacks/clearml.md +++ b/docs/en/reference/utils/callbacks/clearml.md @@ -9,31 +9,31 @@ keywords: Ultralytics, YOLO, ClearML, integration, callbacks, pretraining, train This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/callbacks/clearml.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/callbacks/clearml.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/callbacks/clearml.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.utils.callbacks.clearml._log_debug_samples -

+



## ::: ultralytics.utils.callbacks.clearml._log_plot -

+



## ::: ultralytics.utils.callbacks.clearml.on_pretrain_routine_start -

+



## ::: ultralytics.utils.callbacks.clearml.on_train_epoch_end -

+



## ::: ultralytics.utils.callbacks.clearml.on_fit_epoch_end -

+



## ::: ultralytics.utils.callbacks.clearml.on_val_end -

+



## ::: ultralytics.utils.callbacks.clearml.on_train_end diff --git a/docs/en/reference/utils/callbacks/comet.md b/docs/en/reference/utils/callbacks/comet.md index d6fb342c9c..b73eb8d5d5 100644 --- a/docs/en/reference/utils/callbacks/comet.md +++ b/docs/en/reference/utils/callbacks/comet.md @@ -9,99 +9,99 @@ keywords: Ultralytics, YOLO, Comet, callbacks, logging, machine learning, monito This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/callbacks/comet.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/callbacks/comet.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/callbacks/comet.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.utils.callbacks.comet._get_comet_mode -

+



## ::: ultralytics.utils.callbacks.comet._get_comet_model_name -

+



## ::: ultralytics.utils.callbacks.comet._get_eval_batch_logging_interval -

+



## ::: ultralytics.utils.callbacks.comet._get_max_image_predictions_to_log -

+



## ::: ultralytics.utils.callbacks.comet._scale_confidence_score -

+



## ::: ultralytics.utils.callbacks.comet._should_log_confusion_matrix -

+



## ::: ultralytics.utils.callbacks.comet._should_log_image_predictions -

+



## ::: ultralytics.utils.callbacks.comet._get_experiment_type -

+



## ::: ultralytics.utils.callbacks.comet._create_experiment -

+



## ::: ultralytics.utils.callbacks.comet._fetch_trainer_metadata -

+



## ::: ultralytics.utils.callbacks.comet._scale_bounding_box_to_original_image_shape -

+



## ::: ultralytics.utils.callbacks.comet._format_ground_truth_annotations_for_detection -

+



## ::: ultralytics.utils.callbacks.comet._format_prediction_annotations_for_detection -

+



## ::: ultralytics.utils.callbacks.comet._fetch_annotations -

+



## ::: ultralytics.utils.callbacks.comet._create_prediction_metadata_map -

+



## ::: ultralytics.utils.callbacks.comet._log_confusion_matrix -

+



## ::: ultralytics.utils.callbacks.comet._log_images -

+



## ::: ultralytics.utils.callbacks.comet._log_image_predictions -

+



## ::: ultralytics.utils.callbacks.comet._log_plots -

+



## ::: ultralytics.utils.callbacks.comet._log_model -

+



## ::: ultralytics.utils.callbacks.comet.on_pretrain_routine_start -

+



## ::: ultralytics.utils.callbacks.comet.on_train_epoch_end -

+



## ::: ultralytics.utils.callbacks.comet.on_fit_epoch_end -

+



## ::: ultralytics.utils.callbacks.comet.on_train_end diff --git a/docs/en/reference/utils/callbacks/dvc.md b/docs/en/reference/utils/callbacks/dvc.md index 04c86e63b4..6d4083e4cd 100644 --- a/docs/en/reference/utils/callbacks/dvc.md +++ b/docs/en/reference/utils/callbacks/dvc.md @@ -9,39 +9,39 @@ keywords: Ultralytics, DVC, DVCLive, machine learning, logging, training, callba This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/callbacks/dvc.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/callbacks/dvc.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/callbacks/dvc.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.utils.callbacks.dvc._log_images -

+



## ::: ultralytics.utils.callbacks.dvc._log_plots -

+



## ::: ultralytics.utils.callbacks.dvc._log_confusion_matrix -

+



## ::: ultralytics.utils.callbacks.dvc.on_pretrain_routine_start -

+



## ::: ultralytics.utils.callbacks.dvc.on_pretrain_routine_end -

+



## ::: ultralytics.utils.callbacks.dvc.on_train_start -

+



## ::: ultralytics.utils.callbacks.dvc.on_train_epoch_start -

+



## ::: ultralytics.utils.callbacks.dvc.on_fit_epoch_end -

+



## ::: ultralytics.utils.callbacks.dvc.on_train_end diff --git a/docs/en/reference/utils/callbacks/hub.md b/docs/en/reference/utils/callbacks/hub.md index d7e24bf245..799384f4f9 100644 --- a/docs/en/reference/utils/callbacks/hub.md +++ b/docs/en/reference/utils/callbacks/hub.md @@ -9,39 +9,39 @@ keywords: Ultralytics, callbacks, pretrain, model save, train start, train end, This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/callbacks/hub.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/callbacks/hub.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/callbacks/hub.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.utils.callbacks.hub.on_pretrain_routine_start -

+



## ::: ultralytics.utils.callbacks.hub.on_pretrain_routine_end -

+



## ::: ultralytics.utils.callbacks.hub.on_fit_epoch_end -

+



## ::: ultralytics.utils.callbacks.hub.on_model_save -

+



## ::: ultralytics.utils.callbacks.hub.on_train_end -

+



## ::: ultralytics.utils.callbacks.hub.on_train_start -

+



## ::: ultralytics.utils.callbacks.hub.on_val_start -

+



## ::: ultralytics.utils.callbacks.hub.on_predict_start -

+



## ::: ultralytics.utils.callbacks.hub.on_export_start diff --git a/docs/en/reference/utils/callbacks/mlflow.md b/docs/en/reference/utils/callbacks/mlflow.md index 0e4dc065b5..897520a64a 100644 --- a/docs/en/reference/utils/callbacks/mlflow.md +++ b/docs/en/reference/utils/callbacks/mlflow.md @@ -9,23 +9,23 @@ keywords: MLflow, Ultralytics YOLO, logging, metrics, parameters, model artifact This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/callbacks/mlflow.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/callbacks/mlflow.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/callbacks/mlflow.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.utils.callbacks.mlflow.sanitize_dict -

+



## ::: ultralytics.utils.callbacks.mlflow.on_pretrain_routine_end -

+



## ::: ultralytics.utils.callbacks.mlflow.on_train_epoch_end -

+



## ::: ultralytics.utils.callbacks.mlflow.on_fit_epoch_end -

+



## ::: ultralytics.utils.callbacks.mlflow.on_train_end diff --git a/docs/en/reference/utils/callbacks/neptune.md b/docs/en/reference/utils/callbacks/neptune.md index 3788906d74..caeba558b3 100644 --- a/docs/en/reference/utils/callbacks/neptune.md +++ b/docs/en/reference/utils/callbacks/neptune.md @@ -9,35 +9,35 @@ keywords: Ultralytics, NeptuneAI, YOLO, experiment logging, machine learning, AI This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/callbacks/neptune.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/callbacks/neptune.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/callbacks/neptune.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.utils.callbacks.neptune._log_scalars -

+



## ::: ultralytics.utils.callbacks.neptune._log_images -

+



## ::: ultralytics.utils.callbacks.neptune._log_plot -

+



## ::: ultralytics.utils.callbacks.neptune.on_pretrain_routine_start -

+



## ::: ultralytics.utils.callbacks.neptune.on_train_epoch_end -

+



## ::: ultralytics.utils.callbacks.neptune.on_fit_epoch_end -

+



## ::: ultralytics.utils.callbacks.neptune.on_val_end -

+



## ::: ultralytics.utils.callbacks.neptune.on_train_end diff --git a/docs/en/reference/utils/callbacks/raytune.md b/docs/en/reference/utils/callbacks/raytune.md index e27423d07e..7e26df01c9 100644 --- a/docs/en/reference/utils/callbacks/raytune.md +++ b/docs/en/reference/utils/callbacks/raytune.md @@ -9,7 +9,7 @@ keywords: Ultralytics, Ray Tune, hyperparameter tuning, YOLO, machine learning, This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/callbacks/raytune.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/callbacks/raytune.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/callbacks/raytune.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.utils.callbacks.raytune.on_fit_epoch_end diff --git a/docs/en/reference/utils/callbacks/tensorboard.md b/docs/en/reference/utils/callbacks/tensorboard.md index 91477813eb..ba6cb64b0a 100644 --- a/docs/en/reference/utils/callbacks/tensorboard.md +++ b/docs/en/reference/utils/callbacks/tensorboard.md @@ -9,27 +9,27 @@ keywords: Ultralytics, TensorBoard, callbacks, machine learning, training visual This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/callbacks/tensorboard.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/callbacks/tensorboard.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/callbacks/tensorboard.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.utils.callbacks.tensorboard._log_scalars -

+



## ::: ultralytics.utils.callbacks.tensorboard._log_tensorboard_graph -

+



## ::: ultralytics.utils.callbacks.tensorboard.on_pretrain_routine_start -

+



## ::: ultralytics.utils.callbacks.tensorboard.on_train_start -

+



## ::: ultralytics.utils.callbacks.tensorboard.on_train_epoch_end -

+



## ::: ultralytics.utils.callbacks.tensorboard.on_fit_epoch_end diff --git a/docs/en/reference/utils/callbacks/wb.md b/docs/en/reference/utils/callbacks/wb.md index e2afdf7313..11b5547a47 100644 --- a/docs/en/reference/utils/callbacks/wb.md +++ b/docs/en/reference/utils/callbacks/wb.md @@ -9,31 +9,31 @@ keywords: Ultralytics, YOLO, WandB, callbacks, logging, metrics, visualizations, This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/callbacks/wb.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/callbacks/wb.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/callbacks/wb.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.utils.callbacks.wb._custom_table -

+



## ::: ultralytics.utils.callbacks.wb._plot_curve -

+



## ::: ultralytics.utils.callbacks.wb._log_plots -

+



## ::: ultralytics.utils.callbacks.wb.on_pretrain_routine_start -

+



## ::: ultralytics.utils.callbacks.wb.on_fit_epoch_end -

+



## ::: ultralytics.utils.callbacks.wb.on_train_epoch_end -

+



## ::: ultralytics.utils.callbacks.wb.on_train_end diff --git a/docs/en/reference/utils/checks.md b/docs/en/reference/utils/checks.md index b837360bac..f37f589003 100644 --- a/docs/en/reference/utils/checks.md +++ b/docs/en/reference/utils/checks.md @@ -9,103 +9,103 @@ keywords: Ultralytics, YOLO, utility functions, version checks, requirements, im This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/checks.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/checks.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/checks.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.utils.checks.parse_requirements -

+



## ::: ultralytics.utils.checks.parse_version -

+



## ::: ultralytics.utils.checks.is_ascii -

+



## ::: ultralytics.utils.checks.check_imgsz -

+



## ::: ultralytics.utils.checks.check_version -

+



## ::: ultralytics.utils.checks.check_latest_pypi_version -

+



## ::: ultralytics.utils.checks.check_pip_update_available -

+



## ::: ultralytics.utils.checks.check_font -

+



## ::: ultralytics.utils.checks.check_python -

+



## ::: ultralytics.utils.checks.check_requirements -

+



## ::: ultralytics.utils.checks.check_torchvision -

+



## ::: ultralytics.utils.checks.check_suffix -

+



## ::: ultralytics.utils.checks.check_yolov5u_filename -

+



## ::: ultralytics.utils.checks.check_model_file_from_stem -

+



## ::: ultralytics.utils.checks.check_file -

+



## ::: ultralytics.utils.checks.check_yaml -

+



## ::: ultralytics.utils.checks.check_is_path_safe -

+



## ::: ultralytics.utils.checks.check_imshow -

+



## ::: ultralytics.utils.checks.check_yolo -

+



## ::: ultralytics.utils.checks.collect_system_info -

+



## ::: ultralytics.utils.checks.check_amp -

+



## ::: ultralytics.utils.checks.git_describe -

+



## ::: ultralytics.utils.checks.print_args -

+



## ::: ultralytics.utils.checks.cuda_device_count -

+



## ::: ultralytics.utils.checks.cuda_is_available diff --git a/docs/en/reference/utils/dist.md b/docs/en/reference/utils/dist.md index 2d168ea6dc..18930484ef 100644 --- a/docs/en/reference/utils/dist.md +++ b/docs/en/reference/utils/dist.md @@ -9,19 +9,19 @@ keywords: Ultralytics, distributed training, DDP, multi-node training, network p This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/dist.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/dist.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/dist.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.utils.dist.find_free_network_port -

+



## ::: ultralytics.utils.dist.generate_ddp_file -

+



## ::: ultralytics.utils.dist.generate_ddp_command -

+



## ::: ultralytics.utils.dist.ddp_cleanup diff --git a/docs/en/reference/utils/downloads.md b/docs/en/reference/utils/downloads.md index 663fffe4b5..e04abd1427 100644 --- a/docs/en/reference/utils/downloads.md +++ b/docs/en/reference/utils/downloads.md @@ -9,43 +9,43 @@ keywords: Ultralytics, download utilities, URL validation, zip directory, unzip This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/downloads.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/downloads.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/downloads.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.utils.downloads.is_url -

+



## ::: ultralytics.utils.downloads.delete_dsstore -

+



## ::: ultralytics.utils.downloads.zip_directory -

+



## ::: ultralytics.utils.downloads.unzip_file -

+



## ::: ultralytics.utils.downloads.check_disk_space -

+



## ::: ultralytics.utils.downloads.get_google_drive_file_info -

+



## ::: ultralytics.utils.downloads.safe_download -

+



## ::: ultralytics.utils.downloads.get_github_assets -

+



## ::: ultralytics.utils.downloads.attempt_download_asset -

+



## ::: ultralytics.utils.downloads.download diff --git a/docs/en/reference/utils/errors.md b/docs/en/reference/utils/errors.md index abab81de7f..87b02cc5f1 100644 --- a/docs/en/reference/utils/errors.md +++ b/docs/en/reference/utils/errors.md @@ -9,7 +9,7 @@ keywords: Ultralytics, YOLO, error handling, HUBModelError, model fetching, cust This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/errors.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/errors.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/errors.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.utils.errors.HUBModelError diff --git a/docs/en/reference/utils/files.md b/docs/en/reference/utils/files.md index 612cb883b2..1971245405 100644 --- a/docs/en/reference/utils/files.md +++ b/docs/en/reference/utils/files.md @@ -9,35 +9,35 @@ keywords: Ultralytics, file utilities, Python, WorkingDirectory, increment_path, This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/files.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/files.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/files.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.utils.files.WorkingDirectory -

+



## ::: ultralytics.utils.files.spaces_in_path -

+



## ::: ultralytics.utils.files.increment_path -

+



## ::: ultralytics.utils.files.file_age -

+



## ::: ultralytics.utils.files.file_date -

+



## ::: ultralytics.utils.files.file_size -

+



## ::: ultralytics.utils.files.get_latest_run -

+



## ::: ultralytics.utils.files.update_models diff --git a/docs/en/reference/utils/instance.md b/docs/en/reference/utils/instance.md index 4fa0b08085..12909cfd63 100644 --- a/docs/en/reference/utils/instance.md +++ b/docs/en/reference/utils/instance.md @@ -9,15 +9,15 @@ keywords: Ultralytics, bounding boxes, Instances, bbox formats, conversions, AI, This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/instance.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/instance.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/instance.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.utils.instance.Bboxes -

+



## ::: ultralytics.utils.instance.Instances -

+



## ::: ultralytics.utils.instance._ntuple diff --git a/docs/en/reference/utils/loss.md b/docs/en/reference/utils/loss.md index 79755c1a87..a2f6ece7ce 100644 --- a/docs/en/reference/utils/loss.md +++ b/docs/en/reference/utils/loss.md @@ -9,51 +9,51 @@ keywords: Ultralytics, loss functions, Varifocal Loss, Focal Loss, Bbox Loss, Ro This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/loss.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/loss.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/loss.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.utils.loss.VarifocalLoss -

+



## ::: ultralytics.utils.loss.FocalLoss -

+



## ::: ultralytics.utils.loss.DFLoss -

+



## ::: ultralytics.utils.loss.BboxLoss -

+



## ::: ultralytics.utils.loss.RotatedBboxLoss -

+



## ::: ultralytics.utils.loss.KeypointLoss -

+



## ::: ultralytics.utils.loss.v8DetectionLoss -

+



## ::: ultralytics.utils.loss.v8SegmentationLoss -

+



## ::: ultralytics.utils.loss.v8PoseLoss -

+



## ::: ultralytics.utils.loss.v8ClassificationLoss -

+



## ::: ultralytics.utils.loss.v8OBBLoss -

+



## ::: ultralytics.utils.loss.E2EDetectLoss diff --git a/docs/en/reference/utils/metrics.md b/docs/en/reference/utils/metrics.md index 65f688752b..634636478c 100644 --- a/docs/en/reference/utils/metrics.md +++ b/docs/en/reference/utils/metrics.md @@ -9,87 +9,87 @@ keywords: Ultralytics, metrics, model validation, performance analysis, IoU, con This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/metrics.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/metrics.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/metrics.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.utils.metrics.ConfusionMatrix -

+



## ::: ultralytics.utils.metrics.Metric -

+



## ::: ultralytics.utils.metrics.DetMetrics -

+



## ::: ultralytics.utils.metrics.SegmentMetrics -

+



## ::: ultralytics.utils.metrics.PoseMetrics -

+



## ::: ultralytics.utils.metrics.ClassifyMetrics -

+



## ::: ultralytics.utils.metrics.OBBMetrics -

+



## ::: ultralytics.utils.metrics.bbox_ioa -

+



## ::: ultralytics.utils.metrics.box_iou -

+



## ::: ultralytics.utils.metrics.bbox_iou -

+



## ::: ultralytics.utils.metrics.mask_iou -

+



## ::: ultralytics.utils.metrics.kpt_iou -

+



## ::: ultralytics.utils.metrics._get_covariance_matrix -

+



## ::: ultralytics.utils.metrics.probiou -

+



## ::: ultralytics.utils.metrics.batch_probiou -

+



## ::: ultralytics.utils.metrics.smooth_BCE -

+



## ::: ultralytics.utils.metrics.smooth -

+



## ::: ultralytics.utils.metrics.plot_pr_curve -

+



## ::: ultralytics.utils.metrics.plot_mc_curve -

+



## ::: ultralytics.utils.metrics.compute_ap -

+



## ::: ultralytics.utils.metrics.ap_per_class diff --git a/docs/en/reference/utils/ops.md b/docs/en/reference/utils/ops.md index 0d7028c3fa..2992ce8dcd 100644 --- a/docs/en/reference/utils/ops.md +++ b/docs/en/reference/utils/ops.md @@ -9,123 +9,123 @@ keywords: Ultralytics, utility operations, non-max suppression, bounding box tra This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/ops.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/ops.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/ops.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.utils.ops.Profile -

+



## ::: ultralytics.utils.ops.segment2box -

+



## ::: ultralytics.utils.ops.scale_boxes -

+



## ::: ultralytics.utils.ops.make_divisible -

+



## ::: ultralytics.utils.ops.nms_rotated -

+



## ::: ultralytics.utils.ops.non_max_suppression -

+



## ::: ultralytics.utils.ops.clip_boxes -

+



## ::: ultralytics.utils.ops.clip_coords -

+



## ::: ultralytics.utils.ops.scale_image -

+



## ::: ultralytics.utils.ops.xyxy2xywh -

+



## ::: ultralytics.utils.ops.xywh2xyxy -

+



## ::: ultralytics.utils.ops.xywhn2xyxy -

+



## ::: ultralytics.utils.ops.xyxy2xywhn -

+



## ::: ultralytics.utils.ops.xywh2ltwh -

+



## ::: ultralytics.utils.ops.xyxy2ltwh -

+



## ::: ultralytics.utils.ops.ltwh2xywh -

+



## ::: ultralytics.utils.ops.xyxyxyxy2xywhr -

+



## ::: ultralytics.utils.ops.xywhr2xyxyxyxy -

+



## ::: ultralytics.utils.ops.ltwh2xyxy -

+



## ::: ultralytics.utils.ops.segments2boxes -

+



## ::: ultralytics.utils.ops.resample_segments -

+



## ::: ultralytics.utils.ops.crop_mask -

+



## ::: ultralytics.utils.ops.process_mask -

+



## ::: ultralytics.utils.ops.process_mask_native -

+



## ::: ultralytics.utils.ops.scale_masks -

+



## ::: ultralytics.utils.ops.scale_coords -

+



## ::: ultralytics.utils.ops.regularize_rboxes -

+



## ::: ultralytics.utils.ops.masks2segments -

+



## ::: ultralytics.utils.ops.convert_torch2numpy_batch -

+



## ::: ultralytics.utils.ops.clean_str diff --git a/docs/en/reference/utils/patches.md b/docs/en/reference/utils/patches.md index 5d2858d342..444a274237 100644 --- a/docs/en/reference/utils/patches.md +++ b/docs/en/reference/utils/patches.md @@ -9,19 +9,19 @@ keywords: Ultralytics, utils, patches, imread, imwrite, imshow, torch_save, Open This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/patches.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/patches.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/patches.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.utils.patches.imread -

+



## ::: ultralytics.utils.patches.imwrite -

+



## ::: ultralytics.utils.patches.imshow -

+



## ::: ultralytics.utils.patches.torch_save diff --git a/docs/en/reference/utils/plotting.md b/docs/en/reference/utils/plotting.md index 5a6ee716d2..239ec4d52f 100644 --- a/docs/en/reference/utils/plotting.md +++ b/docs/en/reference/utils/plotting.md @@ -9,47 +9,47 @@ keywords: ultralytics, plotting, utilities, documentation, data visualization, a This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/plotting.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/plotting.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/plotting.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.utils.plotting.Colors -

+



## ::: ultralytics.utils.plotting.Annotator -

+



## ::: ultralytics.utils.plotting.plot_labels -

+



## ::: ultralytics.utils.plotting.save_one_box -

+



## ::: ultralytics.utils.plotting.plot_images -

+



## ::: ultralytics.utils.plotting.plot_results -

+



## ::: ultralytics.utils.plotting.plt_color_scatter -

+



## ::: ultralytics.utils.plotting.plot_tune_results -

+



## ::: ultralytics.utils.plotting.output_to_target -

+



## ::: ultralytics.utils.plotting.output_to_rotated_target -

+



## ::: ultralytics.utils.plotting.feature_visualization diff --git a/docs/en/reference/utils/tal.md b/docs/en/reference/utils/tal.md index 8c23339bab..588182a239 100644 --- a/docs/en/reference/utils/tal.md +++ b/docs/en/reference/utils/tal.md @@ -9,27 +9,27 @@ keywords: Ultralytics, YOLO, TaskAlignedAssigner, object detection, machine lear This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/tal.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/tal.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/tal.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.utils.tal.TaskAlignedAssigner -

+



## ::: ultralytics.utils.tal.RotatedTaskAlignedAssigner -

+



## ::: ultralytics.utils.tal.make_anchors -

+



## ::: ultralytics.utils.tal.dist2bbox -

+



## ::: ultralytics.utils.tal.bbox2dist -

+



## ::: ultralytics.utils.tal.dist2rbox diff --git a/docs/en/reference/utils/torch_utils.md b/docs/en/reference/utils/torch_utils.md index 011a0effff..6a48fec741 100644 --- a/docs/en/reference/utils/torch_utils.md +++ b/docs/en/reference/utils/torch_utils.md @@ -9,115 +9,115 @@ keywords: Ultralytics, torch utils, model optimization, device selection, infere This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/torch_utils.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/torch_utils.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/torch_utils.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.utils.torch_utils.ModelEMA -

+



## ::: ultralytics.utils.torch_utils.EarlyStopping -

+



## ::: ultralytics.utils.torch_utils.torch_distributed_zero_first -

+



## ::: ultralytics.utils.torch_utils.smart_inference_mode -

+



## ::: ultralytics.utils.torch_utils.get_cpu_info -

+



## ::: ultralytics.utils.torch_utils.select_device -

+



## ::: ultralytics.utils.torch_utils.time_sync -

+



## ::: ultralytics.utils.torch_utils.fuse_conv_and_bn -

+



## ::: ultralytics.utils.torch_utils.fuse_deconv_and_bn -

+



## ::: ultralytics.utils.torch_utils.model_info -

+



## ::: ultralytics.utils.torch_utils.get_num_params -

+



## ::: ultralytics.utils.torch_utils.get_num_gradients -

+



## ::: ultralytics.utils.torch_utils.model_info_for_loggers -

+



## ::: ultralytics.utils.torch_utils.get_flops -

+



## ::: ultralytics.utils.torch_utils.get_flops_with_torch_profiler -

+



## ::: ultralytics.utils.torch_utils.initialize_weights -

+



## ::: ultralytics.utils.torch_utils.scale_img -

+



## ::: ultralytics.utils.torch_utils.make_divisible -

+



## ::: ultralytics.utils.torch_utils.copy_attr -

+



## ::: ultralytics.utils.torch_utils.get_latest_opset -

+



## ::: ultralytics.utils.torch_utils.intersect_dicts -

+



## ::: ultralytics.utils.torch_utils.is_parallel -

+



## ::: ultralytics.utils.torch_utils.de_parallel -

+



## ::: ultralytics.utils.torch_utils.one_cycle -

+



## ::: ultralytics.utils.torch_utils.init_seeds -

+



## ::: ultralytics.utils.torch_utils.strip_optimizer -

+



## ::: ultralytics.utils.torch_utils.convert_optimizer_state_dict_to_fp16 -

+



## ::: ultralytics.utils.torch_utils.profile diff --git a/docs/en/reference/utils/triton.md b/docs/en/reference/utils/triton.md index abddb03bef..2dcd749cf3 100644 --- a/docs/en/reference/utils/triton.md +++ b/docs/en/reference/utils/triton.md @@ -9,7 +9,7 @@ keywords: Ultralytics, TritonRemoteModel, Triton Inference Server, model client, This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/triton.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/triton.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/triton.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.utils.triton.TritonRemoteModel diff --git a/docs/en/reference/utils/tuner.md b/docs/en/reference/utils/tuner.md index 52ac7afe5d..a58eba00d4 100644 --- a/docs/en/reference/utils/tuner.md +++ b/docs/en/reference/utils/tuner.md @@ -9,7 +9,7 @@ keywords: Ultralytics, tuner, hyperparameter tuning, Ray Tune, YOLO, machine lea This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/tuner.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/tuner.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/utils/tuner.py) 🛠️. Thank you 🙏! -

+
## ::: ultralytics.utils.tuner.run_ray_tune diff --git a/examples/YOLOv8-SAHI-Inference-Video/readme.md b/examples/YOLOv8-SAHI-Inference-Video/readme.md index f24df303a9..525aca5ac0 100644 --- a/examples/YOLOv8-SAHI-Inference-Video/readme.md +++ b/examples/YOLOv8-SAHI-Inference-Video/readme.md @@ -46,7 +46,7 @@ python yolov8_sahi.py --source "path/to/video.mp4" --save-img --weights "yolov8n **1. What is SAHI?** -SAHI stands for Slicing, Analysis, and Healing of Images. It is a library designed to optimize object detection algorithms for large-scale and high-resolution images. The library source code is available on [GitHub](https://github.com/obss/sahi). +SAHI stands for Slicing Aided Hyper Inference. It is a library designed to optimize object detection algorithms for large-scale and high-resolution images. The library source code is available on [GitHub](https://github.com/obss/sahi). **2. Why use SAHI with YOLOv8?** diff --git a/mkdocs.yml b/mkdocs.yml index be1be24fdb..e3f38ce13d 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -620,6 +620,12 @@ plugins: docstring_style: google show_root_heading: true show_source: true + separate_signature: true + line_length: 80 + show_signature_annotations: true + show_symbol_type_heading: true # insiders + show_symbol_type_toc: true # insiders + show_inheritance_diagram: true # insiders - ultralytics: add_desc: False add_image: True diff --git a/ultralytics/cfg/__init__.py b/ultralytics/cfg/__init__.py index d624e0a1c0..c7c73c06ed 100644 --- a/ultralytics/cfg/__init__.py +++ b/ultralytics/cfg/__init__.py @@ -187,11 +187,11 @@ CFG_BOOL_KEYS = { # boolean-only arguments def cfg2dict(cfg): """ - Convert a configuration object to a dictionary, whether it is a file path, a string, or a SimpleNamespace object. + Converts a configuration object to a dictionary. Args: - cfg (str | Path | Dict | SimpleNamespace): Configuration object to be converted to a dictionary. This may be a - path to a configuration file, a dictionary, or a SimpleNamespace object. + cfg (str | Path | Dict | SimpleNamespace): Configuration object to be converted. Can be a file path, + a string, a dictionary, or a SimpleNamespace object. Returns: (Dict): Configuration object in dictionary format. @@ -209,8 +209,9 @@ def cfg2dict(cfg): >>> config_dict = cfg2dict({'param1': 'value1', 'param2': 'value2'}) Notes: - - If `cfg` is a path or a string, it will be loaded as YAML and converted to a dictionary. - - If `cfg` is a SimpleNamespace object, it will be converted to a dictionary using `vars()`. + - If cfg is a path or string, it's loaded as YAML and converted to a dictionary. + - If cfg is a SimpleNamespace object, it's converted to a dictionary using vars(). + - If cfg is already a dictionary, it's returned unchanged. """ if isinstance(cfg, (str, Path)): cfg = yaml_load(cfg) # load dict @@ -224,24 +225,23 @@ def get_cfg(cfg: Union[str, Path, Dict, SimpleNamespace] = DEFAULT_CFG_DICT, ove Load and merge configuration data from a file or dictionary, with optional overrides. Args: - cfg (str | Path | Dict | SimpleNamespace): Configuration data source. + cfg (str | Path | Dict | SimpleNamespace): Configuration data source. Can be a file path, dictionary, or + SimpleNamespace object. overrides (Dict | None): Dictionary containing key-value pairs to override the base configuration. Returns: - (SimpleNamespace): Namespace containing the merged training arguments. - - Notes: - - If both `cfg` and `overrides` are provided, the values in `overrides` will take precedence. - - Special handling ensures alignment and correctness of the configuration, such as converting numeric `project` - and `name` to strings and validating configuration keys and values. + (SimpleNamespace): Namespace containing the merged configuration arguments. Examples: - Load default configuration: >>> from ultralytics import get_cfg - >>> config = get_cfg() - - Load from a custom file with overrides: + >>> config = get_cfg() # Load default configuration >>> config = get_cfg('path/to/config.yaml', overrides={'epochs': 50, 'batch_size': 16}) + + Notes: + - If both `cfg` and `overrides` are provided, the values in `overrides` will take precedence. + - Special handling ensures alignment and correctness of the configuration, such as converting numeric + `project` and `name` to strings and validating configuration keys and values. + - The function performs type and value checks on the configuration data. """ cfg = cfg2dict(cfg) @@ -270,24 +270,31 @@ def get_cfg(cfg: Union[str, Path, Dict, SimpleNamespace] = DEFAULT_CFG_DICT, ove def check_cfg(cfg, hard=True): """ - Checks configuration argument types and values for the Ultralytics library, ensuring correctness and converting them - if necessary. + Checks configuration argument types and values for the Ultralytics library. + + This function validates the types and values of configuration arguments, ensuring correctness and converting + them if necessary. It checks for specific key types defined in global variables such as CFG_FLOAT_KEYS, + CFG_FRACTION_KEYS, CFG_INT_KEYS, and CFG_BOOL_KEYS. Args: cfg (Dict): Configuration dictionary to validate. hard (bool): If True, raises exceptions for invalid types and values; if False, attempts to convert them. Examples: - Validate a configuration with a mix of valid and invalid values: >>> config = { - ... 'epochs': 50, # valid integer - ... 'lr0': 0.01, # valid float - ... 'momentum': 1.2, # invalid float (out of 0.0-1.0 range) - ... 'save': 'true', # invalid bool + ... 'epochs': 50, # valid integer + ... 'lr0': 0.01, # valid float + ... 'momentum': 1.2, # invalid float (out of 0.0-1.0 range) + ... 'save': 'true', # invalid bool ... } >>> check_cfg(config, hard=False) >>> print(config) - {'epochs': 50, 'lr0': 0.01, 'momentum': 1.2, 'save': False} # corrected 'save' key and retained other values + {'epochs': 50, 'lr0': 0.01, 'momentum': 1.2, 'save': False} # corrected 'save' key + + Notes: + - The function modifies the input dictionary in-place. + - None values are ignored as they may be from optional arguments. + - Fraction keys are checked to be within the range [0.0, 1.0]. """ for k, v in cfg.items(): if v is not None: # None values may be from optional args @@ -328,16 +335,15 @@ def get_save_dir(args, name=None): Returns the directory path for saving outputs, derived from arguments or default settings. Args: - args (SimpleNamespace): Namespace object containing configurations such as 'project', 'name', 'task', 'mode', and - 'save_dir'. - name (str | None): Optional name for the output directory. If not provided, it defaults to 'args.name' or the - 'args.mode'. + args (SimpleNamespace): Namespace object containing configurations such as 'project', 'name', 'task', + 'mode', and 'save_dir'. + name (str | None): Optional name for the output directory. If not provided, it defaults to 'args.name' + or the 'args.mode'. Returns: (Path): Directory path where outputs should be saved. Examples: - Generate a save directory using provided arguments >>> from types import SimpleNamespace >>> args = SimpleNamespace(project='my_project', task='detect', mode='train', exist_ok=True) >>> save_dir = get_save_dir(args) @@ -369,6 +375,11 @@ def _handle_deprecation(custom): >>> _handle_deprecation(custom_config) >>> print(custom_config) {'show_boxes': True, 'show_labels': True, 'line_width': 2} + + Notes: + This function modifies the input dictionary in-place, replacing deprecated keys with their current + equivalents. It also handles value conversions where necessary, such as inverting boolean values for + 'hide_labels' and 'hide_conf'. """ for key in custom.copy().keys(): @@ -390,32 +401,29 @@ def _handle_deprecation(custom): def check_dict_alignment(base: Dict, custom: Dict, e=None): """ - Check for key alignment between custom and base configuration dictionaries, handling deprecated keys and providing - informative error messages for mismatched keys. + Checks alignment between custom and base configuration dictionaries, handling deprecated keys and providing error + messages for mismatched keys. Args: base (Dict): The base configuration dictionary containing valid keys. custom (Dict): The custom configuration dictionary to be checked for alignment. - e (Exception | None): Optional error instance passed by the calling function. Default is None. + e (Exception | None): Optional error instance passed by the calling function. Raises: - SystemExit: Terminates the program execution if mismatched keys are found. - - Notes: - - The function suggests corrections for mismatched keys based on similarity to valid keys. - - Deprecated keys in the custom configuration are automatically replaced with their updated equivalents. - - Detailed error messages are printed for each mismatched key to help users identify and correct their custom - configurations. + SystemExit: If mismatched keys are found between the custom and base dictionaries. Examples: >>> base_cfg = {'epochs': 50, 'lr0': 0.01, 'batch_size': 16} >>> custom_cfg = {'epoch': 100, 'lr': 0.02, 'batch_size': 32} - >>> try: ... check_dict_alignment(base_cfg, custom_cfg) ... except SystemExit: - ... # Handle the error or correct the configuration - ... pass + ... print("Mismatched keys found") + + Notes: + - Suggests corrections for mismatched keys based on similarity to valid keys. + - Automatically replaces deprecated keys in the custom configuration with updated equivalents. + - Prints detailed error messages for each mismatched key to help users correct their configurations. """ custom = _handle_deprecation(custom) base_keys, custom_keys = (set(x.keys()) for x in (base, custom)) @@ -434,7 +442,10 @@ def check_dict_alignment(base: Dict, custom: Dict, e=None): def merge_equals_args(args: List[str]) -> List[str]: """ - Merges arguments around isolated '=' in a list of strings. + Merges arguments around isolated '=' in a list of strings, handling three cases: + 1. ['arg', '=', 'val'] becomes ['arg=val'], + 2. ['arg=', 'val'] becomes ['arg=val'], + 3. ['arg', '=val'] becomes ['arg=val']. Args: args (List[str]): A list of strings where each element represents an argument. @@ -443,20 +454,9 @@ def merge_equals_args(args: List[str]) -> List[str]: (List[str]): A list of strings where the arguments around isolated '=' are merged. Examples: - Merge arguments where equals sign is separated: - >>> args = ["arg1", "=", "value"] - >>> merge_equals_args(args) - ["arg1=value"] - - Merge arguments where equals sign is at the end of the first argument: - >>> args = ["arg1=", "value"] + >>> args = ["arg1", "=", "value", "arg2=", "value2", "arg3", "=value3"] >>> merge_equals_args(args) - ["arg1=value"] - - Merge arguments where equals sign is at the beginning of the second argument: - >>> args = ["arg1", "=value"] - >>> merge_equals_args(args) - ["arg1=value"] + ['arg1=value', 'arg2=value2', 'arg3=value3'] """ new_args = [] for i, arg in enumerate(args): @@ -475,18 +475,24 @@ def merge_equals_args(args: List[str]) -> List[str]: def handle_yolo_hub(args: List[str]) -> None: """ - Handle Ultralytics HUB command-line interface (CLI) commands. + Handles Ultralytics HUB command-line interface (CLI) commands for authentication. This function processes Ultralytics HUB CLI commands such as login and logout. It should be called when executing a script with arguments related to HUB authentication. Args: - args (List[str]): A list of command line arguments. + args (List[str]): A list of command line arguments. The first argument should be either 'login' + or 'logout'. For 'login', an optional second argument can be the API key. Examples: ```bash yolo hub login YOUR_API_KEY ``` + + Notes: + - The function imports the 'hub' module from ultralytics to perform login and logout operations. + - For the 'login' command, if no API key is provided, an empty string is passed to the login function. + - The 'logout' command does not require any additional arguments. """ from ultralytics import hub @@ -501,21 +507,26 @@ def handle_yolo_hub(args: List[str]) -> None: def handle_yolo_settings(args: List[str]) -> None: """ - Handle YOLO settings command-line interface (CLI) commands. + Handles YOLO settings command-line interface (CLI) commands. - This function processes YOLO settings CLI commands such as reset. It should be called when executing a script with - arguments related to YOLO settings management. + This function processes YOLO settings CLI commands such as reset and updating individual settings. It should be + called when executing a script with arguments related to YOLO settings management. Args: args (List[str]): A list of command line arguments for YOLO settings management. Examples: - Reset YOLO settings: - >>> yolo settings reset + >>> handle_yolo_settings(["reset"]) # Reset YOLO settings + >>> handle_yolo_settings(["default_cfg_path=yolov8n.yaml"]) # Update a specific setting Notes: - For more information on handling YOLO settings, visit: - https://docs.ultralytics.com/quickstart/#ultralytics-settings + - If no arguments are provided, the function will display the current settings. + - The 'reset' command will delete the existing settings file and create new default settings. + - Other arguments are treated as key-value pairs to update specific settings. + - The function will check for alignment between the provided settings and the existing ones. + - After processing, the updated settings will be displayed. + - For more information on handling YOLO settings, visit: + https://docs.ultralytics.com/quickstart/#ultralytics-settings """ url = "https://docs.ultralytics.com/quickstart/#ultralytics-settings" # help URL try: @@ -539,12 +550,17 @@ def handle_explorer(): """ Open the Ultralytics Explorer GUI for dataset exploration and analysis. - This function launches a graphical user interface that provides tools for interacting with and analyzing datasets - using the Ultralytics Explorer API. + This function launches a graphical user interface that provides tools for interacting with and analyzing + datasets using the Ultralytics Explorer API. It checks for the required 'streamlit' package and informs + the user that the Explorer dashboard is loading. Examples: - Start the Ultralytics Explorer: >>> handle_explorer() + + Notes: + - Requires 'streamlit' package version 1.29.0 or higher. + - The function does not take any arguments or return any values. + - It is typically called from the command line interface using the 'yolo explorer' command. """ checks.check_requirements("streamlit>=1.29.0") LOGGER.info("💡 Loading Explorer dashboard...") @@ -553,18 +569,18 @@ def handle_explorer(): def handle_streamlit_inference(): """ - Open the Ultralytics Live Inference streamlit app for real-time object detection. + Open the Ultralytics Live Inference Streamlit app for real-time object detection. This function initializes and runs a Streamlit application designed for performing live object detection using - Ultralytics models. - - References: - - Streamlit documentation: https://docs.streamlit.io/ - - Ultralytics: https://docs.ultralytics.com + Ultralytics models. It checks for the required Streamlit package and launches the app. Examples: - To run the live inference Streamlit app, execute: >>> handle_streamlit_inference() + + Notes: + - Requires Streamlit version 1.29.0 or higher. + - The app is launched using the 'streamlit run' command. + - The Streamlit app file is located in the Ultralytics package directory. """ checks.check_requirements("streamlit>=1.29.0") LOGGER.info("💡 Loading Ultralytics Live Inference app...") @@ -573,20 +589,32 @@ def handle_streamlit_inference(): def parse_key_value_pair(pair): """ - Parse a 'key=value' pair and return the key and value. + Parses a key-value pair string into separate key and value components. Args: - pair (str): The 'key=value' string to be parsed. + pair (str): A string containing a key-value pair in the format "key=value". Returns: - (tuple[str, str]): A tuple containing the key and value as separate strings. + (tuple): A tuple containing two elements: + - key (str): The parsed key. + - value (str): The parsed value. + + Raises: + AssertionError: If the value is missing or empty. Examples: >>> key, value = parse_key_value_pair("model=yolov8n.pt") - >>> key - 'model' - >>> value - 'yolov8n.pt + >>> print(f"Key: {key}, Value: {value}") + Key: model, Value: yolov8n.pt + + >>> key, value = parse_key_value_pair("epochs=100") + >>> print(f"Key: {key}, Value: {value}") + Key: epochs, Value: 100 + + Notes: + - The function splits the input string on the first '=' character. + - Leading and trailing whitespace is removed from both key and value. + - An assertion error is raised if the value is empty after stripping. """ k, v = pair.split("=", 1) # split on first '=' sign k, v = k.strip(), v.strip() # remove spaces @@ -596,17 +624,19 @@ def parse_key_value_pair(pair): def smart_value(v): """ - Convert a string representation of a value into its appropriate Python type (int, float, bool, None, etc.). + Converts a string representation of a value to its appropriate Python type. + + This function attempts to convert a given string into a Python object of the most appropriate type. It handles + conversions to None, bool, int, float, and other types that can be evaluated safely. Args: - v (str): String representation of the value to be converted. + v (str): The string representation of the value to be converted. Returns: - (Any): The converted value, which can be of type int, float, bool, None, or the original string if no conversion + (Any): The converted value. The type can be None, bool, int, float, or the original string if no conversion is applicable. Examples: - Convert a string to various types: >>> smart_value("42") 42 >>> smart_value("3.14") @@ -617,6 +647,11 @@ def smart_value(v): None >>> smart_value("some_string") 'some_string' + + Notes: + - The function uses a case-insensitive comparison for boolean and None values. + - For other types, it attempts to use Python's eval() function, which can be unsafe if used with untrusted input. + - If no conversion is possible, the original string is returned. """ v_lower = v.lower() if v_lower == "none": @@ -639,7 +674,7 @@ def entrypoint(debug=""): executing the corresponding tasks such as training, validation, prediction, exporting models, and more. Args: - debug (str, optional): Space-separated string of command-line arguments for debugging purposes. + debug (str): Space-separated string of command-line arguments for debugging purposes. Examples: Train a detection model for 10 epochs with an initial learning_rate of 0.01: @@ -652,9 +687,9 @@ def entrypoint(debug=""): >>> entrypoint("val model=yolov8n.pt data=coco8.yaml batch=1 imgsz=640") Notes: - - For a list of all available commands and their arguments, see the provided help messages and the Ultralytics - documentation at https://docs.ultralytics.com. - If no arguments are passed, the function will display the usage help message. + - For a list of all available commands and their arguments, see the provided help messages and the + Ultralytics documentation at https://docs.ultralytics.com. """ args = (debug.split(" ") if debug else ARGV)[1:] if not args: # no arguments passed @@ -793,16 +828,24 @@ def entrypoint(debug=""): # Special modes -------------------------------------------------------------------------------------------------------- def copy_default_cfg(): """ - Copy and create a new default configuration file with '_copy' appended to its name, providing a usage example. + Copies the default configuration file and creates a new one with '_copy' appended to its name. - This function duplicates the existing default configuration file and appends '_copy' to its name in the current - working directory. + This function duplicates the existing default configuration file (DEFAULT_CFG_PATH) and saves it + with '_copy' appended to its name in the current working directory. It provides a convenient way + to create a custom configuration file based on the default settings. Examples: - Copy the default configuration file and use it in a YOLO command: >>> copy_default_cfg() - >>> # Example YOLO command with this new custom cfg: - >>> # yolo cfg='default_copy.yaml' imgsz=320 batch=8 + # Output: default.yaml copied to /path/to/current/directory/default_copy.yaml + # Example YOLO command with this new custom cfg: + # yolo cfg='/path/to/current/directory/default_copy.yaml' imgsz=320 batch=8 + + Notes: + - The new configuration file is created in the current working directory. + - After copying, the function prints a message with the new file's location and an example + YOLO command demonstrating how to use the new configuration file. + - This function is useful for users who want to modify the default configuration without + altering the original file. """ new_file = Path.cwd() / DEFAULT_CFG_PATH.name.replace(".yaml", "_copy.yaml") shutil.copy2(DEFAULT_CFG_PATH, new_file) diff --git a/ultralytics/data/augment.py b/ultralytics/data/augment.py index 3a9802225c..958d1ac678 100644 --- a/ultralytics/data/augment.py +++ b/ultralytics/data/augment.py @@ -23,74 +23,252 @@ DEFAULT_STD = (1.0, 1.0, 1.0) DEFAULT_CROP_FRACTION = 1.0 -# TODO: we might need a BaseTransform to make all these augments be compatible with both classification and semantic class BaseTransform: """ - Base class for image transformations. + Base class for image transformations in the Ultralytics library. - This is a generic transformation class that can be extended for specific image processing needs. - The class is designed to be compatible with both classification and semantic segmentation tasks. + This class serves as a foundation for implementing various image processing operations, designed to be + compatible with both classification and semantic segmentation tasks. Methods: - __init__: Initializes the BaseTransform object. - apply_image: Applies image transformation to labels. + apply_image: Applies image transformations to labels. apply_instances: Applies transformations to object instances in labels. apply_semantic: Applies semantic segmentation to an image. __call__: Applies all label transformations to an image, instances, and semantic masks. + + Examples: + >>> transform = BaseTransform() + >>> labels = {'image': np.array(...), 'instances': [...], 'semantic': np.array(...)} + >>> transformed_labels = transform(labels) """ def __init__(self) -> None: - """Initializes the BaseTransform object.""" + """ + Initializes the BaseTransform object. + + This constructor sets up the base transformation object, which can be extended for specific image + processing tasks. It is designed to be compatible with both classification and semantic segmentation. + + Examples: + >>> transform = BaseTransform() + """ pass def apply_image(self, labels): - """Applies image transformations to labels.""" + """ + Applies image transformations to labels. + + This method is intended to be overridden by subclasses to implement specific image transformation + logic. In its base form, it returns the input labels unchanged. + + Args: + labels (Any): The input labels to be transformed. The exact type and structure of labels may + vary depending on the specific implementation. + + Returns: + (Any): The transformed labels. In the base implementation, this is identical to the input. + + Examples: + >>> transform = BaseTransform() + >>> original_labels = [1, 2, 3] + >>> transformed_labels = transform.apply_image(original_labels) + >>> print(transformed_labels) + [1, 2, 3] + """ pass def apply_instances(self, labels): - """Applies transformations to object instances in labels.""" + """ + Applies transformations to object instances in labels. + + This method is responsible for applying various transformations to object instances within the given + labels. It is designed to be overridden by subclasses to implement specific instance transformation + logic. + + Args: + labels (Dict): A dictionary containing label information, including object instances. + + Returns: + (Dict): The modified labels dictionary with transformed object instances. + + Examples: + >>> transform = BaseTransform() + >>> labels = {'instances': Instances(xyxy=torch.rand(5, 4), cls=torch.randint(0, 80, (5,)))} + >>> transformed_labels = transform.apply_instances(labels) + """ pass def apply_semantic(self, labels): - """Applies semantic segmentation to an image.""" + """ + Applies semantic segmentation transformations to an image. + + This method is intended to be overridden by subclasses to implement specific semantic segmentation + transformations. In its base form, it does not perform any operations. + + Args: + labels (Any): The input labels or semantic segmentation mask to be transformed. + + Returns: + (Any): The transformed semantic segmentation mask or labels. + + Examples: + >>> transform = BaseTransform() + >>> semantic_mask = np.zeros((100, 100), dtype=np.uint8) + >>> transformed_mask = transform.apply_semantic(semantic_mask) + """ pass def __call__(self, labels): - """Applies all label transformations to an image, instances, and semantic masks.""" + """ + Applies all label transformations to an image, instances, and semantic masks. + + This method orchestrates the application of various transformations defined in the BaseTransform class + to the input labels. It sequentially calls the apply_image and apply_instances methods to process the + image and object instances, respectively. + + Args: + labels (Dict): A dictionary containing image data and annotations. Expected keys include 'img' for + the image data, and 'instances' for object instances. + + Returns: + (Dict): The input labels dictionary with transformed image and instances. + + Examples: + >>> transform = BaseTransform() + >>> labels = {'img': np.random.rand(640, 640, 3), 'instances': []} + >>> transformed_labels = transform(labels) + """ self.apply_image(labels) self.apply_instances(labels) self.apply_semantic(labels) class Compose: - """Class for composing multiple image transformations.""" + """ + A class for composing multiple image transformations. + + Attributes: + transforms (List[Callable]): A list of transformation functions to be applied sequentially. + + Methods: + __call__: Applies a series of transformations to input data. + append: Appends a new transform to the existing list of transforms. + insert: Inserts a new transform at a specified index in the list of transforms. + __getitem__: Retrieves a specific transform or a set of transforms using indexing. + __setitem__: Sets a specific transform or a set of transforms using indexing. + tolist: Converts the list of transforms to a standard Python list. + + Examples: + >>> transforms = [RandomFlip(), RandomRotate(30), RandomCrop((224, 224))] + >>> compose = Compose(transforms) + >>> transformed_data = compose(data) + >>> compose.append(Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])) + >>> compose.insert(0, Resize((256, 256))) + """ def __init__(self, transforms): - """Initializes the Compose object with a list of transforms.""" + """ + Initializes the Compose object with a list of transforms. + + Args: + transforms (List[Callable]): A list of callable transform objects to be applied sequentially. + + Examples: + >>> from ultralytics.data.augment import Compose, Resize, RandomFlip + >>> transforms = [Resize(640), RandomFlip()] + >>> compose = Compose(transforms) + """ self.transforms = transforms if isinstance(transforms, list) else [transforms] def __call__(self, data): - """Applies a series of transformations to input data.""" + """ + Applies a series of transformations to input data. This method sequentially applies each transformation in the + Compose object's list of transforms to the input data. + + Args: + data (Any): The input data to be transformed. This can be of any type, depending on the + transformations in the list. + + Returns: + (Any): The transformed data after applying all transformations in sequence. + + Examples: + >>> transforms = [Transform1(), Transform2(), Transform3()] + >>> compose = Compose(transforms) + >>> transformed_data = compose(input_data) + """ for t in self.transforms: data = t(data) return data def append(self, transform): - """Appends a new transform to the existing list of transforms.""" + """ + Appends a new transform to the existing list of transforms. + + Args: + transform (BaseTransform): The transformation to be added to the composition. + + Examples: + >>> compose = Compose([RandomFlip(), RandomRotate()]) + >>> compose.append(RandomHSV()) + """ self.transforms.append(transform) def insert(self, index, transform): - """Inserts a new transform to the existing list of transforms.""" + """ + Inserts a new transform at a specified index in the existing list of transforms. + + Args: + index (int): The index at which to insert the new transform. + transform (BaseTransform): The transform object to be inserted. + + Examples: + >>> compose = Compose([Transform1(), Transform2()]) + >>> compose.insert(1, Transform3()) + >>> len(compose.transforms) + 3 + """ self.transforms.insert(index, transform) def __getitem__(self, index: Union[list, int]) -> "Compose": - """Retrieve a specific transform or a set of transforms using indexing.""" + """ + Retrieve a specific transform or a set of transforms using indexing. + + Args: + index (int | List[int]): Index or list of indices of the transforms to retrieve. + + Returns: + (Compose): A new Compose object containing the selected transform(s). + + Raises: + AssertionError: If the index is not of type int or list. + + Examples: + >>> transforms = [RandomFlip(), RandomRotate(10), RandomHSV(0.5, 0.5, 0.5)] + >>> compose = Compose(transforms) + >>> single_transform = compose[1] # Returns a Compose object with only RandomRotate + >>> multiple_transforms = compose[0:2] # Returns a Compose object with RandomFlip and RandomRotate + """ assert isinstance(index, (int, list)), f"The indices should be either list or int type but got {type(index)}" index = [index] if isinstance(index, int) else index return Compose([self.transforms[i] for i in index]) def __setitem__(self, index: Union[list, int], value: Union[list, int]) -> None: - """Retrieve a specific transform or a set of transforms using indexing.""" + """ + Sets one or more transforms in the composition using indexing. + + Args: + index (int | List[int]): Index or list of indices to set transforms at. + value (Any | List[Any]): Transform or list of transforms to set at the specified index(es). + + Raises: + AssertionError: If index type is invalid, value type doesn't match index type, or index is out of range. + + Examples: + >>> compose = Compose([Transform1(), Transform2(), Transform3()]) + >>> compose[1] = NewTransform() # Replace second transform + >>> compose[0:2] = [NewTransform1(), NewTransform2()] # Replace first two transforms + """ assert isinstance(index, (int, list)), f"The indices should be either list or int type but got {type(index)}" if isinstance(index, list): assert isinstance( @@ -103,29 +281,107 @@ class Compose: self.transforms[i] = v def tolist(self): - """Converts the list of transforms to a standard Python list.""" + """ + Converts the list of transforms to a standard Python list. + + Returns: + (List): A list containing all the transform objects in the Compose instance. + + Examples: + >>> transforms = [RandomFlip(), RandomRotate(10), RandomCrop()] + >>> compose = Compose(transforms) + >>> transform_list = compose.tolist() + >>> print(len(transform_list)) + 3 + """ return self.transforms def __repr__(self): - """Returns a string representation of the object.""" + """ + Returns a string representation of the Compose object. + + Returns: + (str): A string representation of the Compose object, including the list of transforms. + + Examples: + >>> transforms = [RandomFlip(), RandomAffine(degrees=10, translate=0.1, scale=0.1)] + >>> compose = Compose(transforms) + >>> print(compose) + Compose([ + RandomFlip(), + RandomAffine(degrees=10, translate=0.1, scale=0.1) + ]) + """ return f"{self.__class__.__name__}({', '.join([f'{t}' for t in self.transforms])})" class BaseMixTransform: """ - Class for base mix (MixUp/Mosaic) transformations. + Base class for mix transformations like MixUp and Mosaic. + + This class provides a foundation for implementing mix transformations on datasets. It handles the + probability-based application of transforms and manages the mixing of multiple images and labels. + + Attributes: + dataset (Any): The dataset object containing images and labels. + pre_transform (Callable | None): Optional transform to apply before mixing. + p (float): Probability of applying the mix transformation. - This implementation is from mmyolo. + Methods: + __call__: Applies the mix transformation to the input labels. + _mix_transform: Abstract method to be implemented by subclasses for specific mix operations. + get_indexes: Abstract method to get indexes of images to be mixed. + _update_label_text: Updates label text for mixed images. + + Examples: + >>> class CustomMixTransform(BaseMixTransform): + ... def _mix_transform(self, labels): + ... # Implement custom mix logic here + ... return labels + ... def get_indexes(self): + ... return [random.randint(0, len(self.dataset) - 1) for _ in range(3)] + >>> dataset = YourDataset() + >>> transform = CustomMixTransform(dataset, p=0.5) + >>> mixed_labels = transform(original_labels) """ def __init__(self, dataset, pre_transform=None, p=0.0) -> None: - """Initializes the BaseMixTransform object with dataset, pre_transform, and probability.""" + """ + Initializes the BaseMixTransform object for mix transformations like MixUp and Mosaic. + + This class serves as a base for implementing mix transformations in image processing pipelines. + + Args: + dataset (Any): The dataset object containing images and labels for mixing. + pre_transform (Callable | None): Optional transform to apply before mixing. If None, no pre-transform is applied. + p (float): Probability of applying the mix transformation. Should be in the range [0.0, 1.0]. + + Examples: + >>> dataset = YOLODataset("path/to/data") + >>> pre_transform = Compose([RandomFlip(), RandomRotate()]) + >>> mix_transform = BaseMixTransform(dataset, pre_transform, p=0.5) + """ self.dataset = dataset self.pre_transform = pre_transform self.p = p def __call__(self, labels): - """Applies pre-processing transforms and mixup/mosaic transforms to labels data.""" + """ + Applies pre-processing transforms and mixup/mosaic transforms to labels data. + + This method determines whether to apply the mix transform based on a probability factor. If applied, it + selects additional images, applies pre-transforms if specified, and then performs the mix transform. + + Args: + labels (Dict): A dictionary containing label data for an image. + + Returns: + (Dict): The transformed labels dictionary, which may include mixed data from other images. + + Examples: + >>> transform = BaseMixTransform(dataset, pre_transform=None, p=0.5) + >>> result = transform({"image": img, "bboxes": boxes, "cls": classes}) + """ if random.uniform(0, 1) > self.p: return labels @@ -150,15 +406,73 @@ class BaseMixTransform: return labels def _mix_transform(self, labels): - """Applies MixUp or Mosaic augmentation to the label dictionary.""" + """ + Applies MixUp or Mosaic augmentation to the label dictionary. + + This method should be implemented by subclasses to perform specific mix transformations like MixUp or + Mosaic. It modifies the input label dictionary in-place with the augmented data. + + Args: + labels (Dict): A dictionary containing image and label data. Expected to have a 'mix_labels' key + with a list of additional image and label data for mixing. + + Returns: + (Dict): The modified labels dictionary with augmented data after applying the mix transform. + + Examples: + >>> transform = MixUpTransform(dataset) + >>> labels = {'image': img, 'bboxes': boxes, 'mix_labels': [{'image': img2, 'bboxes': boxes2}]} + >>> augmented_labels = transform._mix_transform(labels) + """ raise NotImplementedError def get_indexes(self): - """Gets a list of shuffled indexes for mosaic augmentation.""" + """ + Gets a list of shuffled indexes for mosaic augmentation. + + Returns: + (List[int]): A list of shuffled indexes from the dataset. + + Examples: + >>> transform = BaseMixTransform(dataset) + >>> indexes = transform.get_indexes() + >>> print(indexes) # [3, 18, 7, 2] + """ raise NotImplementedError def _update_label_text(self, labels): - """Update label text.""" + """ + Updates label text and class IDs for mixed labels in image augmentation. + + This method processes the 'texts' and 'cls' fields of the input labels dictionary and any mixed labels, + creating a unified set of text labels and updating class IDs accordingly. + + Args: + labels (Dict): A dictionary containing label information, including 'texts' and 'cls' fields, + and optionally a 'mix_labels' field with additional label dictionaries. + + Returns: + (Dict): The updated labels dictionary with unified text labels and updated class IDs. + + Examples: + >>> labels = { + ... 'texts': [['cat'], ['dog']], + ... 'cls': torch.tensor([[0], [1]]), + ... 'mix_labels': [{ + ... 'texts': [['bird'], ['fish']], + ... 'cls': torch.tensor([[0], [1]]) + ... }] + ... } + >>> updated_labels = self._update_label_text(labels) + >>> print(updated_labels['texts']) + [['cat'], ['dog'], ['bird'], ['fish']] + >>> print(updated_labels['cls']) + tensor([[0], + [1]]) + >>> print(updated_labels['mix_labels'][0]['cls']) + tensor([[2], + [3]]) + """ if "texts" not in labels: return labels @@ -176,20 +490,52 @@ class BaseMixTransform: class Mosaic(BaseMixTransform): """ - Mosaic augmentation. + Mosaic augmentation for image datasets. This class performs mosaic augmentation by combining multiple (4 or 9) images into a single mosaic image. The augmentation is applied to a dataset with a given probability. Attributes: dataset: The dataset on which the mosaic augmentation is applied. - imgsz (int, optional): Image size (height and width) after mosaic pipeline of a single image. Default to 640. - p (float, optional): Probability of applying the mosaic augmentation. Must be in the range 0-1. Default to 1.0. - n (int, optional): The grid size, either 4 (for 2x2) or 9 (for 3x3). + imgsz (int): Image size (height and width) after mosaic pipeline of a single image. + p (float): Probability of applying the mosaic augmentation. Must be in the range 0-1. + n (int): The grid size, either 4 (for 2x2) or 9 (for 3x3). + border (Tuple[int, int]): Border size for width and height. + + Methods: + get_indexes: Returns a list of random indexes from the dataset. + _mix_transform: Applies mixup transformation to the input image and labels. + _mosaic3: Creates a 1x3 image mosaic. + _mosaic4: Creates a 2x2 image mosaic. + _mosaic9: Creates a 3x3 image mosaic. + _update_labels: Updates labels with padding. + _cat_labels: Concatenates labels and clips mosaic border instances. + + Examples: + >>> from ultralytics.data.augment import Mosaic + >>> dataset = YourDataset(...) # Your image dataset + >>> mosaic_aug = Mosaic(dataset, imgsz=640, p=0.5, n=4) + >>> augmented_labels = mosaic_aug(original_labels) """ def __init__(self, dataset, imgsz=640, p=1.0, n=4): - """Initializes the object with a dataset, image size, probability, and border.""" + """ + Initializes the Mosaic augmentation object. + + This class performs mosaic augmentation by combining multiple (4 or 9) images into a single mosaic image. + The augmentation is applied to a dataset with a given probability. + + Args: + dataset (Any): The dataset on which the mosaic augmentation is applied. + imgsz (int): Image size (height and width) after mosaic pipeline of a single image. + p (float): Probability of applying the mosaic augmentation. Must be in the range 0-1. + n (int): The grid size, either 4 (for 2x2) or 9 (for 3x3). + + Examples: + >>> from ultralytics.data.augment import Mosaic + >>> dataset = YourDataset(...) + >>> mosaic_aug = Mosaic(dataset, imgsz=640, p=0.5, n=4) + """ assert 0 <= p <= 1.0, f"The probability should be in range [0, 1], but got {p}." assert n in {4, 9}, "grid must be equal to 4 or 9." super().__init__(dataset=dataset, p=p) @@ -199,14 +545,53 @@ class Mosaic(BaseMixTransform): self.n = n def get_indexes(self, buffer=True): - """Return a list of random indexes from the dataset.""" + """ + Returns a list of random indexes from the dataset for mosaic augmentation. + + This method selects random image indexes either from a buffer or from the entire dataset, depending on + the 'buffer' parameter. It is used to choose images for creating mosaic augmentations. + + Args: + buffer (bool): If True, selects images from the dataset buffer. If False, selects from the entire + dataset. + + Returns: + (List[int]): A list of random image indexes. The length of the list is n-1, where n is the number + of images used in the mosaic (either 3 or 8, depending on whether n is 4 or 9). + + Examples: + >>> mosaic = Mosaic(dataset, imgsz=640, p=1.0, n=4) + >>> indexes = mosaic.get_indexes() + >>> print(len(indexes)) # Output: 3 + """ if buffer: # select images from buffer return random.choices(list(self.dataset.buffer), k=self.n - 1) else: # select any images return [random.randint(0, len(self.dataset) - 1) for _ in range(self.n - 1)] def _mix_transform(self, labels): - """Apply mixup transformation to the input image and labels.""" + """ + Applies mosaic augmentation to the input image and labels. + + This method combines multiple images (3, 4, or 9) into a single mosaic image based on the 'n' attribute. + It ensures that rectangular annotations are not present and that there are other images available for + mosaic augmentation. + + Args: + labels (Dict): A dictionary containing image data and annotations. Expected keys include: + - 'rect_shape': Should be None as rect and mosaic are mutually exclusive. + - 'mix_labels': A list of dictionaries containing data for other images to be used in the mosaic. + + Returns: + (Dict): A dictionary containing the mosaic-augmented image and updated annotations. + + Raises: + AssertionError: If 'rect_shape' is not None or if 'mix_labels' is empty. + + Examples: + >>> mosaic = Mosaic(dataset, imgsz=640, p=1.0, n=4) + >>> augmented_data = mosaic._mix_transform(labels) + """ assert labels.get("rect_shape", None) is None, "rect and mosaic are mutually exclusive." assert len(labels.get("mix_labels", [])), "There are no other images for mosaic augment." return ( @@ -214,7 +599,29 @@ class Mosaic(BaseMixTransform): ) # This code is modified for mosaic3 method. def _mosaic3(self, labels): - """Create a 1x3 image mosaic.""" + """ + Creates a 1x3 image mosaic by combining three images. + + This method arranges three images in a horizontal layout, with the main image in the center and two + additional images on either side. It's part of the Mosaic augmentation technique used in object detection. + + Args: + labels (Dict): A dictionary containing image and label information for the main (center) image. + Must include 'img' key with the image array, and 'mix_labels' key with a list of two + dictionaries containing information for the side images. + + Returns: + (Dict): A dictionary with the mosaic image and updated labels. Keys include: + - 'img' (np.ndarray): The mosaic image array with shape (H, W, C). + - Other keys from the input labels, updated to reflect the new image dimensions. + + Examples: + >>> mosaic = Mosaic(dataset, imgsz=640, p=1.0, n=3) + >>> labels = {'img': np.random.rand(480, 640, 3), 'mix_labels': [{'img': np.random.rand(480, 640, 3)} for _ in range(2)]} + >>> result = mosaic._mosaic3(labels) + >>> print(result['img'].shape) + (640, 640, 3) + """ mosaic_labels = [] s = self.imgsz for i in range(3): @@ -248,7 +655,29 @@ class Mosaic(BaseMixTransform): return final_labels def _mosaic4(self, labels): - """Create a 2x2 image mosaic.""" + """ + Creates a 2x2 image mosaic from four input images. + + This method combines four images into a single mosaic image by placing them in a 2x2 grid. It also + updates the corresponding labels for each image in the mosaic. + + Args: + labels (Dict): A dictionary containing image data and labels for the base image (index 0) and + three additional images (indices 1-3) in the 'mix_labels' key. + + Returns: + (Dict): A dictionary containing the mosaic image and updated labels. The 'img' key contains the + mosaic image as a numpy array, and other keys contain the combined and adjusted labels for + all four images. + + Examples: + >>> mosaic = Mosaic(dataset, imgsz=640, p=1.0, n=4) + >>> labels = {"img": np.random.rand(480, 640, 3), "mix_labels": [ + ... {"img": np.random.rand(480, 640, 3)} for _ in range(3) + ... ]} + >>> result = mosaic._mosaic4(labels) + >>> assert result["img"].shape == (1280, 1280, 3) + """ mosaic_labels = [] s = self.imgsz yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.border) # mosaic center x, y @@ -284,7 +713,31 @@ class Mosaic(BaseMixTransform): return final_labels def _mosaic9(self, labels): - """Create a 3x3 image mosaic.""" + """ + Creates a 3x3 image mosaic from the input image and eight additional images. + + This method combines nine images into a single mosaic image. The input image is placed at the center, + and eight additional images from the dataset are placed around it in a 3x3 grid pattern. + + Args: + labels (Dict): A dictionary containing the input image and its associated labels. It should have + the following keys: + - 'img' (numpy.ndarray): The input image. + - 'resized_shape' (Tuple[int, int]): The shape of the resized image (height, width). + - 'mix_labels' (List[Dict]): A list of dictionaries containing information for the additional + eight images, each with the same structure as the input labels. + + Returns: + (Dict): A dictionary containing the mosaic image and updated labels. It includes the following keys: + - 'img' (numpy.ndarray): The final mosaic image. + - Other keys from the input labels, updated to reflect the new mosaic arrangement. + + Examples: + >>> mosaic = Mosaic(dataset, imgsz=640, p=1.0, n=9) + >>> input_labels = dataset[0] + >>> mosaic_result = mosaic._mosaic9(input_labels) + >>> mosaic_image = mosaic_result['img'] + """ mosaic_labels = [] s = self.imgsz hp, wp = -1, -1 # height, width previous @@ -333,7 +786,25 @@ class Mosaic(BaseMixTransform): @staticmethod def _update_labels(labels, padw, padh): - """Update labels.""" + """ + Updates label coordinates with padding values. + + This method adjusts the bounding box coordinates of object instances in the labels by adding padding + values. It also denormalizes the coordinates if they were previously normalized. + + Args: + labels (Dict): A dictionary containing image and instance information. + padw (int): Padding width to be added to the x-coordinates. + padh (int): Padding height to be added to the y-coordinates. + + Returns: + (Dict): Updated labels dictionary with adjusted instance coordinates. + + Examples: + >>> labels = {"img": np.zeros((100, 100, 3)), "instances": Instances(...)} + >>> padw, padh = 50, 50 + >>> updated_labels = Mosaic._update_labels(labels, padw, padh) + """ nh, nw = labels["img"].shape[:2] labels["instances"].convert_bbox(format="xyxy") labels["instances"].denormalize(nw, nh) @@ -341,7 +812,32 @@ class Mosaic(BaseMixTransform): return labels def _cat_labels(self, mosaic_labels): - """Return labels with mosaic border instances clipped.""" + """ + Concatenates and processes labels for mosaic augmentation. + + This method combines labels from multiple images used in mosaic augmentation, clips instances to the + mosaic border, and removes zero-area boxes. + + Args: + mosaic_labels (List[Dict]): A list of label dictionaries for each image in the mosaic. + + Returns: + (Dict): A dictionary containing concatenated and processed labels for the mosaic image, including: + - im_file (str): File path of the first image in the mosaic. + - ori_shape (Tuple[int, int]): Original shape of the first image. + - resized_shape (Tuple[int, int]): Shape of the mosaic image (imgsz * 2, imgsz * 2). + - cls (np.ndarray): Concatenated class labels. + - instances (Instances): Concatenated instance annotations. + - mosaic_border (Tuple[int, int]): Mosaic border size. + - texts (List[str], optional): Text labels if present in the original labels. + + Examples: + >>> mosaic = Mosaic(dataset, imgsz=640) + >>> mosaic_labels = [{"cls": np.array([0, 1]), "instances": Instances(...)} for _ in range(4)] + >>> result = mosaic._cat_labels(mosaic_labels) + >>> print(result.keys()) + dict_keys(['im_file', 'ori_shape', 'resized_shape', 'cls', 'instances', 'mosaic_border']) + """ if len(mosaic_labels) == 0: return {} cls = [] @@ -368,18 +864,82 @@ class Mosaic(BaseMixTransform): class MixUp(BaseMixTransform): - """Class for applying MixUp augmentation to the dataset.""" + """ + Applies MixUp augmentation to image datasets. + + This class implements the MixUp augmentation technique as described in the paper "mixup: Beyond Empirical Risk + Minimization" (https://arxiv.org/abs/1710.09412). MixUp combines two images and their labels using a random weight. + + Attributes: + dataset (Any): The dataset to which MixUp augmentation will be applied. + pre_transform (Callable | None): Optional transform to apply before MixUp. + p (float): Probability of applying MixUp augmentation. + + Methods: + get_indexes: Returns a random index from the dataset. + _mix_transform: Applies MixUp augmentation to the input labels. + + Examples: + >>> from ultralytics.data.augment import MixUp + >>> dataset = YourDataset(...) # Your image dataset + >>> mixup = MixUp(dataset, p=0.5) + >>> augmented_labels = mixup(original_labels) + """ def __init__(self, dataset, pre_transform=None, p=0.0) -> None: - """Initializes MixUp object with dataset, pre_transform, and probability of applying MixUp.""" + """ + Initializes the MixUp augmentation object. + + MixUp is an image augmentation technique that combines two images by taking a weighted sum of their pixel + values and labels. This implementation is designed for use with the Ultralytics YOLO framework. + + Args: + dataset (Any): The dataset to which MixUp augmentation will be applied. + pre_transform (Callable | None): Optional transform to apply to images before MixUp. + p (float): Probability of applying MixUp augmentation to an image. Must be in the range [0, 1]. + + Examples: + >>> from ultralytics.data.dataset import YOLODataset + >>> dataset = YOLODataset('path/to/data.yaml') + >>> mixup = MixUp(dataset, pre_transform=None, p=0.5) + """ super().__init__(dataset=dataset, pre_transform=pre_transform, p=p) def get_indexes(self): - """Get a random index from the dataset.""" + """ + Get a random index from the dataset. + + This method returns a single random index from the dataset, which is used to select an image for MixUp + augmentation. + + Returns: + (int): A random integer index within the range of the dataset length. + + Examples: + >>> mixup = MixUp(dataset) + >>> index = mixup.get_indexes() + >>> print(index) + 42 + """ return random.randint(0, len(self.dataset) - 1) def _mix_transform(self, labels): - """Applies MixUp augmentation as per https://arxiv.org/pdf/1710.09412.pdf.""" + """ + Applies MixUp augmentation to the input labels. + + This method implements the MixUp augmentation technique as described in the paper + "mixup: Beyond Empirical Risk Minimization" (https://arxiv.org/abs/1710.09412). + + Args: + labels (Dict): A dictionary containing the original image and label information. + + Returns: + (Dict): A dictionary containing the mixed-up image and combined label information. + + Examples: + >>> mixer = MixUp(dataset) + >>> mixed_labels = mixer._mix_transform(labels) + """ r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0 labels2 = labels["mix_labels"][0] labels["img"] = (labels["img"] * r + labels2["img"] * (1 - r)).astype(np.uint8) @@ -390,32 +950,62 @@ class MixUp(BaseMixTransform): class RandomPerspective: """ - Implements random perspective and affine transformations on images and corresponding bounding boxes, segments, and - keypoints. These transformations include rotation, translation, scaling, and shearing. The class also offers the - option to apply these transformations conditionally with a specified probability. + Implements random perspective and affine transformations on images and corresponding annotations. + + This class applies random rotations, translations, scaling, shearing, and perspective transformations + to images and their associated bounding boxes, segments, and keypoints. It can be used as part of an + augmentation pipeline for object detection and instance segmentation tasks. Attributes: - degrees (float): Degree range for random rotations. - translate (float): Fraction of total width and height for random translation. - scale (float): Scaling factor interval, e.g., a scale factor of 0.1 allows a resize between 90%-110%. - shear (float): Shear intensity (angle in degrees). + degrees (float): Maximum absolute degree range for random rotations. + translate (float): Maximum translation as a fraction of the image size. + scale (float): Scaling factor range, e.g., scale=0.1 means 0.9-1.1. + shear (float): Maximum shear angle in degrees. perspective (float): Perspective distortion factor. - border (tuple): Tuple specifying mosaic border. - pre_transform (callable): A function/transform to apply to the image before starting the random transformation. + border (Tuple[int, int]): Mosaic border size as (x, y). + pre_transform (callable): Optional transform to apply before the random perspective. Methods: - affine_transform(img, border): Applies a series of affine transformations to the image. - apply_bboxes(bboxes, M): Transforms bounding boxes using the calculated affine matrix. - apply_segments(segments, M): Transforms segments and generates new bounding boxes. - apply_keypoints(keypoints, M): Transforms keypoints. - __call__(labels): Main method to apply transformations to both images and their corresponding annotations. - box_candidates(box1, box2): Filters out bounding boxes that don't meet certain criteria post-transformation. + affine_transform: Applies affine transformations to the input image. + apply_bboxes: Transforms bounding boxes using the affine matrix. + apply_segments: Transforms segments and generates new bounding boxes. + apply_keypoints: Transforms keypoints using the affine matrix. + __call__: Applies the random perspective transformation to images and annotations. + box_candidates: Filters transformed bounding boxes based on size and aspect ratio. + + Examples: + >>> transform = RandomPerspective(degrees=10, translate=0.1, scale=0.1, shear=10) + >>> image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8) + >>> labels = {'img': image, 'cls': np.array([0, 1]), 'instances': Instances(...)} + >>> result = transform(labels) + >>> transformed_image = result['img'] + >>> transformed_instances = result['instances'] """ def __init__( self, degrees=0.0, translate=0.1, scale=0.5, shear=0.0, perspective=0.0, border=(0, 0), pre_transform=None ): - """Initializes RandomPerspective object with transformation parameters.""" + """ + Initializes RandomPerspective object with transformation parameters. + + This class implements random perspective and affine transformations on images and corresponding + bounding boxes, segments, and keypoints. Transformations include rotation, translation, scaling, + and shearing. + + Args: + degrees (float): Degree range for random rotations. + translate (float): Fraction of total width and height for random translation. + scale (float): Scaling factor interval, e.g., a scale factor of 0.5 allows a resize between 50%-150%. + shear (float): Shear intensity (angle in degrees). + perspective (float): Perspective distortion factor. + border (Tuple[int, int]): Tuple specifying mosaic border (top/bottom, left/right). + pre_transform (Callable | None): Function/transform to apply to the image before starting the + random transformation. + + Examples: + >>> transform = RandomPerspective(degrees=10.0, translate=0.1, scale=0.5, shear=5.0) + >>> result = transform(labels) # Apply random perspective to labels + """ self.degrees = degrees self.translate = translate @@ -429,14 +1019,25 @@ class RandomPerspective: """ Applies a sequence of affine transformations centered around the image center. + This function performs a series of geometric transformations on the input image, including + translation, perspective change, rotation, scaling, and shearing. The transformations are + applied in a specific order to maintain consistency. + Args: - img (ndarray): Input image. - border (tuple): Border dimensions. + img (np.ndarray): Input image to be transformed. + border (Tuple[int, int]): Border dimensions for the transformed image. Returns: - img (ndarray): Transformed image. - M (ndarray): Transformation matrix. - s (float): Scale factor. + (Tuple[np.ndarray, np.ndarray, float]): A tuple containing: + - np.ndarray: Transformed image. + - np.ndarray: 3x3 transformation matrix. + - float: Scale factor applied during the transformation. + + Examples: + >>> import numpy as np + >>> img = np.random.rand(100, 100, 3) + >>> border = (10, 10) + >>> transformed_img, matrix, scale = affine_transform(img, border) """ # Center @@ -480,14 +1081,23 @@ class RandomPerspective: def apply_bboxes(self, bboxes, M): """ - Apply affine to bboxes only. + Apply affine transformation to bounding boxes. + + This function applies an affine transformation to a set of bounding boxes using the provided + transformation matrix. Args: - bboxes (ndarray): list of bboxes, xyxy format, with shape (num_bboxes, 4). - M (ndarray): affine matrix. + bboxes (torch.Tensor): Bounding boxes in xyxy format with shape (N, 4), where N is the number + of bounding boxes. + M (torch.Tensor): Affine transformation matrix with shape (3, 3). Returns: - new_bboxes (ndarray): bboxes after affine, [num_bboxes, 4]. + (torch.Tensor): Transformed bounding boxes in xyxy format with shape (N, 4). + + Examples: + >>> bboxes = torch.tensor([[10, 10, 20, 20], [30, 30, 40, 40]]) + >>> M = torch.eye(3) + >>> transformed_bboxes = apply_bboxes(bboxes, M) """ n = len(bboxes) if n == 0: @@ -505,15 +1115,25 @@ class RandomPerspective: def apply_segments(self, segments, M): """ - Apply affine to segments and generate new bboxes from segments. + Apply affine transformations to segments and generate new bounding boxes. + + This function applies affine transformations to input segments and generates new bounding boxes based on + the transformed segments. It clips the transformed segments to fit within the new bounding boxes. Args: - segments (ndarray): list of segments, [num_samples, 500, 2]. - M (ndarray): affine matrix. + segments (np.ndarray): Input segments with shape (N, M, 2), where N is the number of segments and M + is the number of points in each segment. + M (np.ndarray): Affine transformation matrix with shape (3, 3). Returns: - new_segments (ndarray): list of segments after affine, [num_samples, 500, 2]. - new_bboxes (ndarray): bboxes after affine, [N, 4]. + (Tuple[np.ndarray, np.ndarray]): A tuple containing: + - New bounding boxes with shape (N, 4) in xyxy format. + - Transformed and clipped segments with shape (N, M, 2). + + Examples: + >>> segments = np.random.rand(10, 500, 2) # 10 segments with 500 points each + >>> M = np.eye(3) # Identity transformation matrix + >>> new_bboxes, new_segments = apply_segments(segments, M) """ n, num = segments.shape[:2] if n == 0: @@ -532,14 +1152,25 @@ class RandomPerspective: def apply_keypoints(self, keypoints, M): """ - Apply affine to keypoints. + Applies affine transformation to keypoints. + + This method transforms the input keypoints using the provided affine transformation matrix. It handles + perspective rescaling if necessary and updates the visibility of keypoints that fall outside the image + boundaries after transformation. Args: - keypoints (ndarray): keypoints, [N, 17, 3]. - M (ndarray): affine matrix. + keypoints (np.ndarray): Array of keypoints with shape (N, 17, 3), where N is the number of instances, + 17 is the number of keypoints per instance, and 3 represents (x, y, visibility). + M (np.ndarray): 3x3 affine transformation matrix. Returns: - new_keypoints (ndarray): keypoints after affine, [N, 17, 3]. + (np.ndarray): Transformed keypoints array with the same shape as input (N, 17, 3). + + Examples: + >>> random_perspective = RandomPerspective() + >>> keypoints = np.random.rand(5, 17, 3) # 5 instances, 17 keypoints each + >>> M = np.eye(3) # Identity transformation + >>> transformed_keypoints = random_perspective.apply_keypoints(keypoints, M) """ n, nkpt = keypoints.shape[:2] if n == 0: @@ -555,10 +1186,38 @@ class RandomPerspective: def __call__(self, labels): """ - Affine images and targets. + Applies random perspective and affine transformations to an image and its associated labels. + + This method performs a series of transformations including rotation, translation, scaling, shearing, + and perspective distortion on the input image and adjusts the corresponding bounding boxes, segments, + and keypoints accordingly. Args: - labels (dict): a dict of `bboxes`, `segments`, `keypoints`. + labels (Dict): A dictionary containing image data and annotations. + Must include: + 'img' (ndarray): The input image. + 'cls' (ndarray): Class labels. + 'instances' (Instances): Object instances with bounding boxes, segments, and keypoints. + May include: + 'mosaic_border' (Tuple[int, int]): Border size for mosaic augmentation. + + Returns: + (Dict): Transformed labels dictionary containing: + 'img' (ndarray): The transformed image. + 'cls' (ndarray): Updated class labels. + 'instances' (Instances): Updated object instances. + 'resized_shape' (Tuple[int, int]): New image shape after transformation. + + Examples: + >>> transform = RandomPerspective() + >>> image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8) + >>> labels = { + ... 'img': image, + ... 'cls': np.array([0, 1, 2]), + ... 'instances': Instances(bboxes=np.array([[10, 10, 50, 50], [100, 100, 150, 150]])) + ... } + >>> result = transform(labels) + >>> assert result['img'].shape[:2] == result['resized_shape'] """ if self.pre_transform and "mosaic_border" not in labels: labels = self.pre_transform(labels) @@ -605,19 +1264,36 @@ class RandomPerspective: def box_candidates(self, box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16): """ - Compute box candidates based on a set of thresholds. This method compares the characteristics of the boxes - before and after augmentation to decide whether a box is a candidate for further processing. + Compute candidate boxes for further processing based on size and aspect ratio criteria. + + This method compares boxes before and after augmentation to determine if they meet specified + thresholds for width, height, aspect ratio, and area. It's used to filter out boxes that have + been overly distorted or reduced by the augmentation process. Args: - box1 (numpy.ndarray): The 4,n bounding box before augmentation, represented as [x1, y1, x2, y2]. - box2 (numpy.ndarray): The 4,n bounding box after augmentation, represented as [x1, y1, x2, y2]. - wh_thr (float, optional): The width and height threshold in pixels. Default is 2. - ar_thr (float, optional): The aspect ratio threshold. Default is 100. - area_thr (float, optional): The area ratio threshold. Default is 0.1. - eps (float, optional): A small epsilon value to prevent division by zero. Default is 1e-16. + box1 (numpy.ndarray): Original boxes before augmentation, shape (4, n) where n is the + number of boxes. Format is [x1, y1, x2, y2] in absolute coordinates. + box2 (numpy.ndarray): Augmented boxes after transformation, shape (4, n). Format is + [x1, y1, x2, y2] in absolute coordinates. + wh_thr (float): Width and height threshold in pixels. Boxes smaller than this in either + dimension are rejected. + ar_thr (float): Aspect ratio threshold. Boxes with an aspect ratio greater than this + value are rejected. + area_thr (float): Area ratio threshold. Boxes with an area ratio (new/old) less than + this value are rejected. + eps (float): Small epsilon value to prevent division by zero. Returns: - (numpy.ndarray): A boolean array indicating which boxes are candidates based on the given thresholds. + (numpy.ndarray): Boolean array of shape (n,) indicating which boxes are candidates. + True values correspond to boxes that meet all criteria. + + Examples: + >>> random_perspective = RandomPerspective() + >>> box1 = np.array([[0, 0, 100, 100], [0, 0, 50, 50]]).T + >>> box2 = np.array([[10, 10, 90, 90], [5, 5, 45, 45]]).T + >>> candidates = random_perspective.box_candidates(box1, box2) + >>> print(candidates) + [True True] """ w1, h1 = box1[2] - box1[0], box1[3] - box1[1] w2, h2 = box2[2] - box2[0], box2[3] - box2[1] @@ -627,20 +1303,42 @@ class RandomPerspective: class RandomHSV: """ - This class is responsible for performing random adjustments to the Hue, Saturation, and Value (HSV) channels of an - image. + Randomly adjusts the Hue, Saturation, and Value (HSV) channels of an image. + + This class applies random HSV augmentation to images within predefined limits set by hgain, sgain, and vgain. + + Attributes: + hgain (float): Maximum variation for hue. Range is typically [0, 1]. + sgain (float): Maximum variation for saturation. Range is typically [0, 1]. + vgain (float): Maximum variation for value. Range is typically [0, 1]. - The adjustments are random but within limits set by hgain, sgain, and vgain. + Methods: + __call__: Applies random HSV augmentation to an image. + + Examples: + >>> import numpy as np + >>> from ultralytics.data.augment import RandomHSV + >>> augmenter = RandomHSV(hgain=0.5, sgain=0.5, vgain=0.5) + >>> image = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8) + >>> labels = {"img": image} + >>> augmented_labels = augmenter(labels) + >>> augmented_image = augmented_labels["img"] """ def __init__(self, hgain=0.5, sgain=0.5, vgain=0.5) -> None: """ - Initialize RandomHSV class with gains for each HSV channel. + Initializes the RandomHSV object for random HSV (Hue, Saturation, Value) augmentation. + + This class applies random adjustments to the HSV channels of an image within specified limits. Args: - hgain (float, optional): Maximum variation for hue. Default is 0.5. - sgain (float, optional): Maximum variation for saturation. Default is 0.5. - vgain (float, optional): Maximum variation for value. Default is 0.5. + hgain (float): Maximum variation for hue. Should be in the range [0, 1]. + sgain (float): Maximum variation for saturation. Should be in the range [0, 1]. + vgain (float): Maximum variation for value. Should be in the range [0, 1]. + + Examples: + >>> hsv_aug = RandomHSV(hgain=0.5, sgain=0.5, vgain=0.5) + >>> augmented_image = hsv_aug(image) """ self.hgain = hgain self.sgain = sgain @@ -648,9 +1346,24 @@ class RandomHSV: def __call__(self, labels): """ - Applies random HSV augmentation to an image within the predefined limits. + Applies random HSV augmentation to an image within predefined limits. - The modified image replaces the original image in the input 'labels' dict. + This method modifies the input image by randomly adjusting its Hue, Saturation, and Value (HSV) channels. + The adjustments are made within the limits set by hgain, sgain, and vgain during initialization. + + Args: + labels (Dict): A dictionary containing image data and metadata. Must include an 'img' key with + the image as a numpy array. + + Returns: + (None): The function modifies the input 'labels' dictionary in-place, updating the 'img' key + with the HSV-augmented image. + + Examples: + >>> hsv_augmenter = RandomHSV(hgain=0.5, sgain=0.5, vgain=0.5) + >>> labels = {'img': np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8)} + >>> hsv_augmenter(labels) + >>> augmented_img = labels['img'] """ img = labels["img"] if self.hgain or self.sgain or self.vgain: @@ -672,18 +1385,43 @@ class RandomFlip: """ Applies a random horizontal or vertical flip to an image with a given probability. - Also updates any instances (bounding boxes, keypoints, etc.) accordingly. + This class performs random image flipping and updates corresponding instance annotations such as + bounding boxes and keypoints. + + Attributes: + p (float): Probability of applying the flip. Must be between 0 and 1. + direction (str): Direction of flip, either 'horizontal' or 'vertical'. + flip_idx (array-like): Index mapping for flipping keypoints, if applicable. + + Methods: + __call__: Applies the random flip transformation to an image and its annotations. + + Examples: + >>> transform = RandomFlip(p=0.5, direction='horizontal') + >>> result = transform({"img": image, "instances": instances}) + >>> flipped_image = result["img"] + >>> flipped_instances = result["instances"] """ def __init__(self, p=0.5, direction="horizontal", flip_idx=None) -> None: """ Initializes the RandomFlip class with probability and direction. + This class applies a random horizontal or vertical flip to an image with a given probability. + It also updates any instances (bounding boxes, keypoints, etc.) accordingly. + Args: - p (float, optional): The probability of applying the flip. Must be between 0 and 1. Default is 0.5. - direction (str, optional): The direction to apply the flip. Must be 'horizontal' or 'vertical'. + p (float): The probability of applying the flip. Must be between 0 and 1. Default is 0.5. + direction (str): The direction to apply the flip. Must be 'horizontal' or 'vertical'. Default is 'horizontal'. - flip_idx (array-like, optional): Index mapping for flipping keypoints, if any. + flip_idx (List[int] | None): Index mapping for flipping keypoints, if any. + + Raises: + AssertionError: If direction is not 'horizontal' or 'vertical', or if p is not between 0 and 1. + + Examples: + >>> flip = RandomFlip(p=0.5, direction='horizontal') + >>> flip = RandomFlip(p=0.7, direction='vertical', flip_idx=[1, 0, 3, 2, 5, 4]) """ assert direction in {"horizontal", "vertical"}, f"Support direction `horizontal` or `vertical`, got {direction}" assert 0 <= p <= 1.0, f"The probability should be in range [0, 1], but got {p}." @@ -696,12 +1434,25 @@ class RandomFlip: """ Applies random flip to an image and updates any instances like bounding boxes or keypoints accordingly. + This method randomly flips the input image either horizontally or vertically based on the initialized + probability and direction. It also updates the corresponding instances (bounding boxes, keypoints) to + match the flipped image. + Args: - labels (dict): A dictionary containing the keys 'img' and 'instances'. 'img' is the image to be flipped. - 'instances' is an object containing bounding boxes and optionally keypoints. + labels (Dict): A dictionary containing the following keys: + 'img' (numpy.ndarray): The image to be flipped. + 'instances' (ultralytics.utils.instance.Instances): An object containing bounding boxes and + optionally keypoints. Returns: - (dict): The same dict with the flipped image and updated instances under the 'img' and 'instances' keys. + (Dict): The same dictionary with the flipped image and updated instances: + 'img' (numpy.ndarray): The flipped image. + 'instances' (ultralytics.utils.instance.Instances): Updated instances matching the flipped image. + + Examples: + >>> labels = {'img': np.random.rand(640, 640, 3), 'instances': Instances(...)} + >>> random_flip = RandomFlip(p=0.5, direction='horizontal') + >>> flipped_labels = random_flip(labels) """ img = labels["img"] instances = labels.pop("instances") @@ -726,10 +1477,56 @@ class RandomFlip: class LetterBox: - """Resize image and padding for detection, instance segmentation, pose.""" + """ + Resize image and padding for detection, instance segmentation, pose. + + This class resizes and pads images to a specified shape while preserving aspect ratio. It also updates + corresponding labels and bounding boxes. + + Attributes: + new_shape (tuple): Target shape (height, width) for resizing. + auto (bool): Whether to use minimum rectangle. + scaleFill (bool): Whether to stretch the image to new_shape. + scaleup (bool): Whether to allow scaling up. If False, only scale down. + stride (int): Stride for rounding padding. + center (bool): Whether to center the image or align to top-left. + + Methods: + __call__: Resize and pad image, update labels and bounding boxes. + + Examples: + >>> transform = LetterBox(new_shape=(640, 640)) + >>> result = transform(labels) + >>> resized_img = result['img'] + >>> updated_instances = result['instances'] + """ def __init__(self, new_shape=(640, 640), auto=False, scaleFill=False, scaleup=True, center=True, stride=32): - """Initialize LetterBox object with specific parameters.""" + """ + Initialize LetterBox object for resizing and padding images. + + This class is designed to resize and pad images for object detection, instance segmentation, and pose estimation + tasks. It supports various resizing modes including auto-sizing, scale-fill, and letterboxing. + + Args: + new_shape (Tuple[int, int]): Target size (height, width) for the resized image. + auto (bool): If True, use minimum rectangle to resize. If False, use new_shape directly. + scaleFill (bool): If True, stretch the image to new_shape without padding. + scaleup (bool): If True, allow scaling up. If False, only scale down. + center (bool): If True, center the placed image. If False, place image in top-left corner. + stride (int): Stride of the model (e.g., 32 for YOLOv5). + + Attributes: + new_shape (Tuple[int, int]): Target size for the resized image. + auto (bool): Flag for using minimum rectangle resizing. + scaleFill (bool): Flag for stretching image without padding. + scaleup (bool): Flag for allowing upscaling. + stride (int): Stride value for ensuring image size is divisible by stride. + + Examples: + >>> letterbox = LetterBox(new_shape=(640, 640), auto=False, scaleFill=False, scaleup=True, stride=32) + >>> resized_img = letterbox(original_img) + """ self.new_shape = new_shape self.auto = auto self.scaleFill = scaleFill @@ -738,7 +1535,27 @@ class LetterBox: self.center = center # Put the image in the middle or top-left def __call__(self, labels=None, image=None): - """Return updated labels and image with added border.""" + """ + Resizes and pads an image for object detection, instance segmentation, or pose estimation tasks. + + This method applies letterboxing to the input image, which involves resizing the image while maintaining its aspect + ratio and adding padding to fit the new shape. It also updates any associated labels accordingly. + + Args: + labels (dict | None): A dictionary containing image data and associated labels. If None, an empty dict is used. + image (numpy.ndarray | None): The input image as a numpy array. If None, the image is taken from 'labels'. + + Returns: + (dict | tuple): If 'labels' is provided, returns an updated dictionary with the resized and padded image, + updated labels, and additional metadata. If 'labels' is empty, returns a tuple containing the resized + and padded image, and a tuple of (ratio, (left_pad, top_pad)). + + Examples: + >>> letterbox = LetterBox(new_shape=(640, 640)) + >>> result = letterbox(labels={'img': np.zeros((480, 640, 3)), 'instances': Instances(...)}) + >>> resized_img = result['img'] + >>> updated_instances = result['instances'] + """ if labels is None: labels = {} img = labels.get("img") if image is None else image @@ -786,7 +1603,28 @@ class LetterBox: return img def _update_labels(self, labels, ratio, padw, padh): - """Update labels.""" + """ + Updates labels after applying letterboxing to an image. + + This method modifies the bounding box coordinates of instances in the labels + to account for resizing and padding applied during letterboxing. + + Args: + labels (Dict): A dictionary containing image labels and instances. + ratio (Tuple[float, float]): Scaling ratios (width, height) applied to the image. + padw (float): Padding width added to the image. + padh (float): Padding height added to the image. + + Returns: + (Dict): Updated labels dictionary with modified instance coordinates. + + Examples: + >>> letterbox = LetterBox(new_shape=(640, 640)) + >>> labels = {'instances': Instances(...)} + >>> ratio = (0.5, 0.5) + >>> padw, padh = 10, 20 + >>> updated_labels = letterbox._update_labels(labels, ratio, padw, padh) + """ labels["instances"].convert_bbox(format="xyxy") labels["instances"].denormalize(*labels["img"].shape[:2][::-1]) labels["instances"].scale(*ratio) @@ -796,36 +1634,60 @@ class LetterBox: class CopyPaste: """ - Implements the Copy-Paste augmentation as described in the paper https://arxiv.org/abs/2012.07177. This class is - responsible for applying the Copy-Paste augmentation on images and their corresponding instances. + Implements Copy-Paste augmentation as described in https://arxiv.org/abs/2012.07177. + + This class applies Copy-Paste augmentation on images and their corresponding instances. + + Attributes: + p (float): Probability of applying the Copy-Paste augmentation. Must be between 0 and 1. + + Methods: + __call__: Applies Copy-Paste augmentation to given image and instances. + + Examples: + >>> copypaste = CopyPaste(p=0.5) + >>> augmented_labels = copypaste(labels) + >>> augmented_image = augmented_labels['img'] """ def __init__(self, p=0.5) -> None: """ - Initializes the CopyPaste class with a given probability. + Initializes the CopyPaste augmentation object. + + This class implements the Copy-Paste augmentation as described in the paper "Simple Copy-Paste is a Strong Data + Augmentation Method for Instance Segmentation" (https://arxiv.org/abs/2012.07177). It applies the Copy-Paste + augmentation on images and their corresponding instances with a given probability. Args: - p (float, optional): The probability of applying the Copy-Paste augmentation. Must be between 0 and 1. - Default is 0.5. + p (float): The probability of applying the Copy-Paste augmentation. Must be between 0 and 1. + + Attributes: + p (float): Stores the probability of applying the augmentation. + + Examples: + >>> augment = CopyPaste(p=0.7) + >>> augmented_data = augment(original_data) """ self.p = p def __call__(self, labels): """ - Applies the Copy-Paste augmentation to the given image and instances. + Applies Copy-Paste augmentation to an image and its instances. Args: labels (dict): A dictionary containing: - - 'img': The image to augment. - - 'cls': Class labels associated with the instances. - - 'instances': Object containing bounding boxes, and optionally, keypoints and segments. + - 'img' (numpy.ndarray): The image to augment. + - 'cls' (numpy.ndarray): Class labels for the instances. + - 'instances' (ultralytics.engine.results.Instances): Object containing bounding boxes, segments, etc. Returns: - (dict): Dict with augmented image and updated instances under the 'img', 'cls', and 'instances' keys. + (dict): Dictionary with augmented image and updated instances under 'img', 'cls', and 'instances' keys. - Notes: - 1. Instances are expected to have 'segments' as one of their attributes for this augmentation to work. - 2. This method modifies the input dictionary 'labels' in place. + Examples: + >>> labels = {'img': np.random.rand(640, 640, 3), 'cls': np.array([0, 1, 2]), + ... 'instances': Instances(...)} + >>> augmenter = CopyPaste(p=0.5) + >>> augmented_labels = augmenter(labels) """ im = labels["img"] cls = labels["cls"] @@ -862,15 +1724,61 @@ class CopyPaste: class Albumentations: """ - Albumentations transformations. + Albumentations transformations for image augmentation. + + This class applies various image transformations using the Albumentations library. It includes operations such as + Blur, Median Blur, conversion to grayscale, Contrast Limited Adaptive Histogram Equalization (CLAHE), random changes + in brightness and contrast, RandomGamma, and image quality reduction through compression. + + Attributes: + p (float): Probability of applying the transformations. + transform (albumentations.Compose): Composed Albumentations transforms. + contains_spatial (bool): Indicates if the transforms include spatial operations. + + Methods: + __call__: Applies the Albumentations transformations to the input labels. - Optional, uninstall package to disable. Applies Blur, Median Blur, convert to grayscale, Contrast Limited Adaptive - Histogram Equalization, random change of brightness and contrast, RandomGamma and lowering of image quality by - compression. + Examples: + >>> transform = Albumentations(p=0.5) + >>> augmented_labels = transform(labels) + + Notes: + - The Albumentations package must be installed to use this class. + - If the package is not installed or an error occurs during initialization, the transform will be set to None. + - Spatial transforms are handled differently and require special processing for bounding boxes. """ def __init__(self, p=1.0): - """Initialize the transform object for YOLO bbox formatted params.""" + """ + Initialize the Albumentations transform object for YOLO bbox formatted parameters. + + This class applies various image augmentations using the Albumentations library, including Blur, Median Blur, + conversion to grayscale, Contrast Limited Adaptive Histogram Equalization, random changes of brightness and + contrast, RandomGamma, and image quality reduction through compression. + + Args: + p (float): Probability of applying the augmentations. Must be between 0 and 1. + + Attributes: + p (float): Probability of applying the augmentations. + transform (albumentations.Compose): Composed Albumentations transforms. + contains_spatial (bool): Indicates if the transforms include spatial transformations. + + Raises: + ImportError: If the Albumentations package is not installed. + Exception: For any other errors during initialization. + + Examples: + >>> transform = Albumentations(p=0.5) + >>> augmented = transform(image=image, bboxes=bboxes, class_labels=classes) + >>> augmented_image = augmented['image'] + >>> augmented_bboxes = augmented['bboxes'] + + Notes: + - Requires Albumentations version 1.0.3 or higher. + - Spatial transforms are handled differently to ensure bbox compatibility. + - Some transforms are applied with very low probability (0.01) by default. + """ self.p = p self.transform = None prefix = colorstr("albumentations: ") @@ -949,7 +1857,32 @@ class Albumentations: LOGGER.info(f"{prefix}{e}") def __call__(self, labels): - """Generates object detections and returns a dictionary with detection results.""" + """ + Applies Albumentations transformations to input labels. + + This method applies a series of image augmentations using the Albumentations library. It can perform both + spatial and non-spatial transformations on the input image and its corresponding labels. + + Args: + labels (Dict): A dictionary containing image data and annotations. Expected keys are: + - 'img': numpy.ndarray representing the image + - 'cls': numpy.ndarray of class labels + - 'instances': object containing bounding boxes and other instance information + + Returns: + (Dict): The input dictionary with augmented image and updated annotations. + + Examples: + >>> transform = Albumentations(p=0.5) + >>> augmented = transform({"img": np.random.rand(640, 640, 3), "cls": np.array([0, 1]), + ... "instances": Instances(bboxes=np.array([[0, 0, 1, 1], [0.5, 0.5, 0.8, 0.8]]))}) + >>> assert augmented["img"].shape == (640, 640, 3) + + Notes: + - The method applies transformations with probability self.p. + - Spatial transforms update bounding boxes, while non-spatial transforms only modify the image. + - Requires the Albumentations library to be installed. + """ if self.transform is None or random.random() > self.p: return labels @@ -975,18 +1908,32 @@ class Albumentations: class Format: """ - Formats image annotations for object detection, instance segmentation, and pose estimation tasks. The class - standardizes the image and instance annotations to be used by the `collate_fn` in PyTorch DataLoader. + A class for formatting image annotations for object detection, instance segmentation, and pose estimation tasks. + + This class standardizes image and instance annotations to be used by the `collate_fn` in PyTorch DataLoader. Attributes: - bbox_format (str): Format for bounding boxes. Default is 'xywh'. - normalize (bool): Whether to normalize bounding boxes. Default is True. - return_mask (bool): Return instance masks for segmentation. Default is False. - return_keypoint (bool): Return keypoints for pose estimation. Default is False. - mask_ratio (int): Downsample ratio for masks. Default is 4. - mask_overlap (bool): Whether to overlap masks. Default is True. - batch_idx (bool): Keep batch indexes. Default is True. - bgr (float): The probability to return BGR images. Default is 0.0. + bbox_format (str): Format for bounding boxes. Options are 'xywh' or 'xyxy'. + normalize (bool): Whether to normalize bounding boxes. + return_mask (bool): Whether to return instance masks for segmentation. + return_keypoint (bool): Whether to return keypoints for pose estimation. + return_obb (bool): Whether to return oriented bounding boxes. + mask_ratio (int): Downsample ratio for masks. + mask_overlap (bool): Whether to overlap masks. + batch_idx (bool): Whether to keep batch indexes. + bgr (float): The probability to return BGR images. + + Methods: + __call__: Formats labels dictionary with image, classes, bounding boxes, and optionally masks and keypoints. + _format_img: Converts image from Numpy array to PyTorch tensor. + _format_segments: Converts polygon points to bitmap masks. + + Examples: + >>> formatter = Format(bbox_format='xywh', normalize=True, return_mask=True) + >>> formatted_labels = formatter(labels) + >>> img = formatted_labels['img'] + >>> bboxes = formatted_labels['bboxes'] + >>> masks = formatted_labels['masks'] """ def __init__( @@ -1001,7 +1948,38 @@ class Format: batch_idx=True, bgr=0.0, ): - """Initializes the Format class with given parameters.""" + """ + Initializes the Format class with given parameters for image and instance annotation formatting. + + This class standardizes image and instance annotations for object detection, instance segmentation, and pose + estimation tasks, preparing them for use in PyTorch DataLoader's `collate_fn`. + + Args: + bbox_format (str): Format for bounding boxes. Options are 'xywh', 'xyxy', etc. + normalize (bool): Whether to normalize bounding boxes to [0,1]. + return_mask (bool): If True, returns instance masks for segmentation tasks. + return_keypoint (bool): If True, returns keypoints for pose estimation tasks. + return_obb (bool): If True, returns oriented bounding boxes. + mask_ratio (int): Downsample ratio for masks. + mask_overlap (bool): If True, allows mask overlap. + batch_idx (bool): If True, keeps batch indexes. + bgr (float): Probability of returning BGR images instead of RGB. + + Attributes: + bbox_format (str): Format for bounding boxes. + normalize (bool): Whether bounding boxes are normalized. + return_mask (bool): Whether to return instance masks. + return_keypoint (bool): Whether to return keypoints. + return_obb (bool): Whether to return oriented bounding boxes. + mask_ratio (int): Downsample ratio for masks. + mask_overlap (bool): Whether masks can overlap. + batch_idx (bool): Whether to keep batch indexes. + + Examples: + >>> format = Format(bbox_format='xyxy', return_mask=True, return_keypoint=False) + >>> print(format.bbox_format) + xyxy + """ self.bbox_format = bbox_format self.normalize = normalize self.return_mask = return_mask # set False when training detection only @@ -1013,7 +1991,34 @@ class Format: self.bgr = bgr def __call__(self, labels): - """Return formatted image, classes, bounding boxes & keypoints to be used by 'collate_fn'.""" + """ + Formats image annotations for object detection, instance segmentation, and pose estimation tasks. + + This method standardizes the image and instance annotations to be used by the `collate_fn` in PyTorch DataLoader. + It processes the input labels dictionary, converting annotations to the specified format and applying + normalization if required. + + Args: + labels (Dict): A dictionary containing image and annotation data with the following keys: + - 'img': The input image as a numpy array. + - 'cls': Class labels for instances. + - 'instances': An Instances object containing bounding boxes, segments, and keypoints. + + Returns: + (Dict): A dictionary with formatted data, including: + - 'img': Formatted image tensor. + - 'cls': Class labels tensor. + - 'bboxes': Bounding boxes tensor in the specified format. + - 'masks': Instance masks tensor (if return_mask is True). + - 'keypoints': Keypoints tensor (if return_keypoint is True). + - 'batch_idx': Batch index tensor (if batch_idx is True). + + Examples: + >>> formatter = Format(bbox_format='xywh', normalize=True, return_mask=True) + >>> labels = {'img': np.random.rand(640, 640, 3), 'cls': np.array([0, 1]), 'instances': Instances(...)} + >>> formatted_labels = formatter(labels) + >>> print(formatted_labels.keys()) + """ img = labels.pop("img") h, w = img.shape[:2] cls = labels.pop("cls") @@ -1053,7 +2058,29 @@ class Format: return labels def _format_img(self, img): - """Format the image for YOLO from Numpy array to PyTorch tensor.""" + """ + Formats an image for YOLO from a Numpy array to a PyTorch tensor. + + This function performs the following operations: + 1. Ensures the image has 3 dimensions (adds a channel dimension if needed). + 2. Transposes the image from HWC to CHW format. + 3. Optionally flips the color channels from RGB to BGR. + 4. Converts the image to a contiguous array. + 5. Converts the Numpy array to a PyTorch tensor. + + Args: + img (ndarray): Input image as a Numpy array with shape (H, W, C) or (H, W). + + Returns: + (torch.Tensor): Formatted image as a PyTorch tensor with shape (C, H, W). + + Examples: + >>> import numpy as np + >>> img = np.random.rand(100, 100, 3) + >>> formatted_img = self._format_img(img) + >>> print(formatted_img.shape) + torch.Size([3, 100, 100]) + """ if len(img.shape) < 3: img = np.expand_dims(img, -1) img = img.transpose(2, 0, 1) @@ -1062,7 +2089,26 @@ class Format: return img def _format_segments(self, instances, cls, w, h): - """Convert polygon points to bitmap.""" + """ + Converts polygon segments to bitmap masks. + + Args: + instances (Instances): Object containing segment information. + cls (numpy.ndarray): Class labels for each instance. + w (int): Width of the image. + h (int): Height of the image. + + Returns: + (tuple): Tuple containing: + masks (numpy.ndarray): Bitmap masks with shape (N, H, W) or (1, H, W) if mask_overlap is True. + instances (Instances): Updated instances object with sorted segments if mask_overlap is True. + cls (numpy.ndarray): Updated class labels, sorted if mask_overlap is True. + + Notes: + - If self.mask_overlap is True, masks are overlapped and sorted by area. + - If self.mask_overlap is False, each mask is represented separately. + - Masks are downsampled according to self.mask_ratio. + """ segments = instances.segments if self.mask_overlap: masks, sorted_idx = polygons2masks_overlap((h, w), segments, downsample_ratio=self.mask_ratio) @@ -1077,14 +2123,28 @@ class Format: class RandomLoadText: """ - Randomly sample positive texts and negative texts and update the class indices accordingly to the number of samples. + Randomly samples positive and negative texts and updates class indices accordingly. + + This class is responsible for sampling texts from a given set of class texts, including both positive + (present in the image) and negative (not present in the image) samples. It updates the class indices + to reflect the sampled texts and can optionally pad the text list to a fixed length. Attributes: - prompt_format (str): Format for prompt. Default is '{}'. - neg_samples (tuple[int]): A ranger to randomly sample negative texts, Default is (80, 80). - max_samples (int): The max number of different text samples in one image, Default is 80. + prompt_format (str): Format string for text prompts. Default is '{}'. + neg_samples (Tuple[int, int]): Range for randomly sampling negative texts. Default is (80, 80). + max_samples (int): Maximum number of different text samples in one image. Default is 80. padding (bool): Whether to pad texts to max_samples. Default is False. - padding_value (str): The padding text. Default is "". + padding_value (str): The text used for padding when padding is True. Default is "". + + Methods: + __call__: Processes the input labels and returns updated classes and texts. + + Examples: + >>> loader = RandomLoadText(prompt_format="Object: {}", neg_samples=(5, 10), max_samples=20) + >>> labels = {"cls": [0, 1, 2], "texts": [["cat"], ["dog"], ["bird"]], "instances": [...]} + >>> updated_labels = loader(labels) + >>> print(updated_labels["texts"]) + ['Object: cat', 'Object: dog', 'Object: bird', 'Object: elephant', 'Object: car'] """ def __init__( @@ -1095,7 +2155,39 @@ class RandomLoadText: padding: bool = False, padding_value: str = "", ) -> None: - """Initializes the RandomLoadText class with given parameters.""" + """ + Initializes the RandomLoadText class for randomly sampling positive and negative texts. + + This class is designed to randomly sample positive texts and negative texts, and update the class + indices accordingly to the number of samples. It can be used for text-based object detection tasks. + + Args: + prompt_format (str): Format string for the prompt. Default is '{}'. The format string should + contain a single pair of curly braces {} where the text will be inserted. + neg_samples (Tuple[int, int]): A range to randomly sample negative texts. The first integer + specifies the minimum number of negative samples, and the second integer specifies the + maximum. Default is (80, 80). + max_samples (int): The maximum number of different text samples in one image. Default is 80. + padding (bool): Whether to pad texts to max_samples. If True, the number of texts will always + be equal to max_samples. Default is False. + padding_value (str): The padding text to use when padding is True. Default is an empty string. + + Attributes: + prompt_format (str): The format string for the prompt. + neg_samples (Tuple[int, int]): The range for sampling negative texts. + max_samples (int): The maximum number of text samples. + padding (bool): Whether padding is enabled. + padding_value (str): The value used for padding. + + Examples: + >>> random_load_text = RandomLoadText(prompt_format="Object: {}", neg_samples=(50, 100), max_samples=120) + >>> random_load_text.prompt_format + 'Object: {}' + >>> random_load_text.neg_samples + (50, 100) + >>> random_load_text.max_samples + 120 + """ self.prompt_format = prompt_format self.neg_samples = neg_samples self.max_samples = max_samples @@ -1103,7 +2195,24 @@ class RandomLoadText: self.padding_value = padding_value def __call__(self, labels: dict) -> dict: - """Return updated classes and texts.""" + """ + Randomly samples positive and negative texts and updates class indices accordingly. + + This method samples positive texts based on the existing class labels in the image, and randomly + selects negative texts from the remaining classes. It then updates the class indices to match the + new sampled text order. + + Args: + labels (Dict): A dictionary containing image labels and metadata. Must include 'texts' and 'cls' keys. + + Returns: + (Dict): Updated labels dictionary with new 'cls' and 'texts' entries. + + Examples: + >>> loader = RandomLoadText(prompt_format="A photo of {}", neg_samples=(5, 10), max_samples=20) + >>> labels = {"cls": np.array([[0], [1], [2]]), "texts": [["dog"], ["cat"], ["bird"]]} + >>> updated_labels = loader(labels) + """ assert "texts" in labels, "No texts found in labels." class_texts = labels["texts"] num_classes = len(class_texts) @@ -1150,7 +2259,28 @@ class RandomLoadText: def v8_transforms(dataset, imgsz, hyp, stretch=False): - """Convert images to a size suitable for YOLOv8 training.""" + """ + Applies a series of image transformations for YOLOv8 training. + + This function creates a composition of image augmentation techniques to prepare images for YOLOv8 training. + It includes operations such as mosaic, copy-paste, random perspective, mixup, and various color adjustments. + + Args: + dataset (Dataset): The dataset object containing image data and annotations. + imgsz (int): The target image size for resizing. + hyp (dict): A dictionary of hyperparameters controlling various aspects of the transformations. + stretch (bool): If True, applies stretching to the image. If False, uses LetterBox resizing. + + Returns: + (Compose): A composition of image transformations to be applied to the dataset. + + Examples: + >>> from ultralytics.data.dataset import Dataset + >>> dataset = Dataset(img_path='path/to/images', imgsz=640) + >>> hyp = {'mosaic': 1.0, 'copy_paste': 0.5, 'degrees': 10.0, 'translate': 0.2, 'scale': 0.9} + >>> transforms = v8_transforms(dataset, imgsz=640, hyp=hyp) + >>> augmented_data = transforms(dataset[0]) + """ pre_transform = Compose( [ Mosaic(dataset, imgsz=imgsz, p=hyp.mosaic), @@ -1195,17 +2325,30 @@ def classify_transforms( crop_fraction: float = DEFAULT_CROP_FRACTION, ): """ - Classification transforms for evaluation/inference. Inspired by timm/data/transforms_factory.py. + Creates a composition of image transforms for classification tasks. + + This function generates a sequence of torchvision transforms suitable for preprocessing images + for classification models during evaluation or inference. The transforms include resizing, + center cropping, conversion to tensor, and normalization. Args: - size (int): image size - mean (tuple): mean values of RGB channels - std (tuple): std values of RGB channels - interpolation (T.InterpolationMode): interpolation mode. default is T.InterpolationMode.BILINEAR. - crop_fraction (float): fraction of image to crop. default is 1.0. + size (int | tuple): The target size for the transformed image. If an int, it defines the + shortest edge. If a tuple, it defines (height, width). + mean (tuple): Mean values for each RGB channel used in normalization. Defaults to + DEFAULT_MEAN. + std (tuple): Standard deviation values for each RGB channel used in normalization. + Defaults to DEFAULT_STD. + interpolation (int): Interpolation method for resizing. Defaults to Image.BILINEAR. + crop_fraction (float): Fraction of the image to be cropped. Defaults to + DEFAULT_CROP_FRACTION. Returns: - (T.Compose): torchvision transforms + (torchvision.transforms.Compose): A composition of torchvision transforms. + + Examples: + >>> transforms = classify_transforms(size=224) + >>> img = Image.open('path/to/image.jpg') + >>> transformed_img = transforms(img) """ import torchvision.transforms as T # scope for faster 'import ultralytics' @@ -1251,26 +2394,33 @@ def classify_augmentations( interpolation=Image.BILINEAR, ): """ - Classification transforms with augmentation for training. Inspired by timm/data/transforms_factory.py. + Creates a composition of image augmentation transforms for classification tasks. + + This function generates a set of image transformations suitable for training classification models. It includes + options for resizing, flipping, color jittering, auto augmentation, and random erasing. Args: - size (int): image size - scale (tuple): scale range of the image. default is (0.08, 1.0) - ratio (tuple): aspect ratio range of the image. default is (3./4., 4./3.) - mean (tuple): mean values of RGB channels - std (tuple): std values of RGB channels - hflip (float): probability of horizontal flip - vflip (float): probability of vertical flip - auto_augment (str): auto augmentation policy. can be 'randaugment', 'augmix', 'autoaugment' or None. - hsv_h (float): image HSV-Hue augmentation (fraction) - hsv_s (float): image HSV-Saturation augmentation (fraction) - hsv_v (float): image HSV-Value augmentation (fraction) - force_color_jitter (bool): force to apply color jitter even if auto augment is enabled - erasing (float): probability of random erasing - interpolation (T.InterpolationMode): interpolation mode. default is T.InterpolationMode.BILINEAR. + size (int): Target size for the image after transformations. + mean (tuple): Mean values for normalization, one per channel. + std (tuple): Standard deviation values for normalization, one per channel. + scale (tuple | None): Range of size of the origin size cropped. + ratio (tuple | None): Range of aspect ratio of the origin aspect ratio cropped. + hflip (float): Probability of horizontal flip. + vflip (float): Probability of vertical flip. + auto_augment (str | None): Auto augmentation policy. Can be 'randaugment', 'augmix', 'autoaugment' or None. + hsv_h (float): Image HSV-Hue augmentation factor. + hsv_s (float): Image HSV-Saturation augmentation factor. + hsv_v (float): Image HSV-Value augmentation factor. + force_color_jitter (bool): Whether to apply color jitter even if auto augment is enabled. + erasing (float): Probability of random erasing. + interpolation (int): Interpolation method. Returns: - (T.Compose): torchvision transforms + (torchvision.transforms.Compose): A composition of image augmentation transforms. + + Examples: + >>> transforms = classify_augmentations(size=224, auto_augment='randaugment') + >>> augmented_image = transforms(original_image) """ # Transforms to apply if Albumentations not installed import torchvision.transforms as T # scope for faster 'import ultralytics' @@ -1332,24 +2482,53 @@ def classify_augmentations( # NOTE: keep this class for backward compatibility class ClassifyLetterBox: """ - YOLOv8 LetterBox class for image preprocessing, designed to be part of a transformation pipeline, e.g., - T.Compose([LetterBox(size), ToTensor()]). + YOLOv8 LetterBox class for image preprocessing in classification tasks. + + This class is designed to be part of a transformation pipeline, e.g., T.Compose([LetterBox(size), ToTensor()]). + It resizes and pads images to a specified size while maintaining the original aspect ratio. Attributes: h (int): Target height of the image. w (int): Target width of the image. - auto (bool): If True, automatically solves for short side using stride. + auto (bool): If True, automatically calculates the short side using stride. stride (int): The stride value, used when 'auto' is True. + + Methods: + __call__: Applies the letterbox transformation to an input image. + + Examples: + >>> transform = ClassifyLetterBox(size=(640, 640), auto=False, stride=32) + >>> img = np.random.randint(0, 255, (480, 640, 3), dtype=np.uint8) + >>> result = transform(img) + >>> print(result.shape) + (640, 640, 3) """ def __init__(self, size=(640, 640), auto=False, stride=32): """ - Initializes the ClassifyLetterBox class with a target size, auto-flag, and stride. + Initializes the ClassifyLetterBox object for image preprocessing. + + This class is designed to be part of a transformation pipeline for image classification tasks. It resizes and + pads images to a specified size while maintaining the original aspect ratio. Args: - size (Union[int, Tuple[int, int]]): The target dimensions (height, width) for the letterbox. - auto (bool): If True, automatically calculates the short side based on stride. - stride (int): The stride value, used when 'auto' is True. + size (Union[int, Tuple[int, int]]): Target size for the letterboxed image. If int, a square image of + (size, size) is created. If tuple, it should be (height, width). + auto (bool): If True, automatically calculates the short side based on stride. Default is False. + stride (int): The stride value, used when 'auto' is True. Default is 32. + + Attributes: + h (int): Target height of the letterboxed image. + w (int): Target width of the letterboxed image. + auto (bool): Flag indicating whether to automatically calculate short side. + stride (int): Stride value for automatic short side calculation. + + Examples: + >>> transform = ClassifyLetterBox(size=224) + >>> img = np.random.randint(0, 255, (480, 640, 3), dtype=np.uint8) + >>> result = transform(img) + >>> print(result.shape) + (224, 224, 3) """ super().__init__() self.h, self.w = (size, size) if isinstance(size, int) else size @@ -1358,13 +2537,24 @@ class ClassifyLetterBox: def __call__(self, im): """ - Resizes the image and pads it with a letterbox method. + Resizes and pads an image using the letterbox method. + + This method resizes the input image to fit within the specified dimensions while maintaining its aspect ratio, + then pads the resized image to match the target size. Args: - im (numpy.ndarray): The input image as a numpy array of shape HWC. + im (numpy.ndarray): Input image as a numpy array with shape (H, W, C). Returns: - (numpy.ndarray): The letterboxed and resized image as a numpy array. + (numpy.ndarray): Resized and padded image as a numpy array with shape (hs, ws, 3), where hs and ws are + the target height and width respectively. + + Examples: + >>> letterbox = ClassifyLetterBox(size=(640, 640)) + >>> image = np.random.randint(0, 255, (720, 1280, 3), dtype=np.uint8) + >>> resized_image = letterbox(image) + >>> print(resized_image.shape) + (640, 640, 3) """ imh, imw = im.shape[:2] r = min(self.h / imh, self.w / imw) # ratio of new/old dimensions @@ -1382,24 +2572,70 @@ class ClassifyLetterBox: # NOTE: keep this class for backward compatibility class CenterCrop: - """YOLOv8 CenterCrop class for image preprocessing, designed to be part of a transformation pipeline, e.g., - T.Compose([CenterCrop(size), ToTensor()]). + """ + Applies center cropping to images for classification tasks. + + This class performs center cropping on input images, resizing them to a specified size while maintaining the aspect + ratio. It is designed to be part of a transformation pipeline, e.g., T.Compose([CenterCrop(size), ToTensor()]). + + Attributes: + h (int): Target height of the cropped image. + w (int): Target width of the cropped image. + + Methods: + __call__: Applies the center crop transformation to an input image. + + Examples: + >>> transform = CenterCrop(640) + >>> image = np.random.randint(0, 255, (1080, 1920, 3), dtype=np.uint8) + >>> cropped_image = transform(image) + >>> print(cropped_image.shape) + (640, 640, 3) """ def __init__(self, size=640): - """Converts an image from numpy array to PyTorch tensor.""" + """ + Initializes the CenterCrop object for image preprocessing. + + This class is designed to be part of a transformation pipeline, e.g., T.Compose([CenterCrop(size), ToTensor()]). + It performs a center crop on input images to a specified size. + + Args: + size (int | Tuple[int, int]): The desired output size of the crop. If size is an int, a square crop + (size, size) is made. If size is a sequence like (h, w), it is used as the output size. + + Returns: + (None): This method initializes the object and does not return anything. + + Examples: + >>> transform = CenterCrop(224) + >>> img = np.random.rand(300, 300, 3) + >>> cropped_img = transform(img) + >>> print(cropped_img.shape) + (224, 224, 3) + """ super().__init__() self.h, self.w = (size, size) if isinstance(size, int) else size def __call__(self, im): """ - Resizes and crops the center of the image using a letterbox method. + Applies center cropping to an input image. + + This method resizes and crops the center of the image using a letterbox method. It maintains the aspect + ratio of the original image while fitting it into the specified dimensions. Args: - im (numpy.ndarray): The input image as a numpy array of shape HWC. + im (numpy.ndarray | PIL.Image.Image): The input image as a numpy array of shape (H, W, C) or a + PIL Image object. Returns: - (numpy.ndarray): The center-cropped and resized image as a numpy array. + (numpy.ndarray): The center-cropped and resized image as a numpy array of shape (self.h, self.w, C). + + Examples: + >>> transform = CenterCrop(size=224) + >>> image = np.random.randint(0, 255, (640, 480, 3), dtype=np.uint8) + >>> cropped_image = transform(image) + >>> assert cropped_image.shape == (224, 224, 3) """ if isinstance(im, Image.Image): # convert from PIL to numpy array if required im = np.asarray(im) @@ -1411,22 +2647,71 @@ class CenterCrop: # NOTE: keep this class for backward compatibility class ToTensor: - """YOLOv8 ToTensor class for image preprocessing, i.e., T.Compose([LetterBox(size), ToTensor()]).""" + """ + Converts an image from a numpy array to a PyTorch tensor. + + This class is designed to be part of a transformation pipeline, e.g., T.Compose([LetterBox(size), ToTensor()]). + + Attributes: + half (bool): If True, converts the image to half precision (float16). + + Methods: + __call__: Applies the tensor conversion to an input image. + + Examples: + >>> transform = ToTensor(half=True) + >>> img = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8) + >>> tensor_img = transform(img) + >>> print(tensor_img.shape, tensor_img.dtype) + torch.Size([3, 640, 640]) torch.float16 + + Notes: + The input image is expected to be in BGR format with shape (H, W, C). + The output tensor will be in RGB format with shape (C, H, W), normalized to [0, 1]. + """ def __init__(self, half=False): - """Initialize YOLOv8 ToTensor object with optional half-precision support.""" + """ + Initializes the ToTensor object for converting images to PyTorch tensors. + + This class is designed to be used as part of a transformation pipeline for image preprocessing in the + Ultralytics YOLO framework. It converts numpy arrays or PIL Images to PyTorch tensors, with an option + for half-precision (float16) conversion. + + Args: + half (bool): If True, converts the tensor to half precision (float16). Default is False. + + Examples: + >>> transform = ToTensor(half=True) + >>> img = np.random.rand(640, 640, 3) + >>> tensor_img = transform(img) + >>> print(tensor_img.dtype) + torch.float16 + """ super().__init__() self.half = half def __call__(self, im): """ - Transforms an image from a numpy array to a PyTorch tensor, applying optional half-precision and normalization. + Transforms an image from a numpy array to a PyTorch tensor. + + This method converts the input image from a numpy array to a PyTorch tensor, applying optional + half-precision conversion and normalization. The image is transposed from HWC to CHW format and + the color channels are reversed from BGR to RGB. Args: im (numpy.ndarray): Input image as a numpy array with shape (H, W, C) in BGR order. Returns: - (torch.Tensor): The transformed image as a PyTorch tensor in float32 or float16, normalized to [0, 1]. + (torch.Tensor): The transformed image as a PyTorch tensor in float32 or float16, normalized + to [0, 1] with shape (C, H, W) in RGB order. + + Examples: + >>> transform = ToTensor(half=True) + >>> img = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8) + >>> tensor_img = transform(img) + >>> print(tensor_img.shape, tensor_img.dtype) + torch.Size([3, 640, 640]) torch.float16 """ im = np.ascontiguousarray(im.transpose((2, 0, 1))[::-1]) # HWC to CHW -> BGR to RGB -> contiguous im = torch.from_numpy(im) # to torch diff --git a/ultralytics/engine/model.py b/ultralytics/engine/model.py index e626f8cabe..85ad8b4436 100644 --- a/ultralytics/engine/model.py +++ b/ultralytics/engine/model.py @@ -30,26 +30,18 @@ class Model(nn.Module): This class provides a common interface for various operations related to YOLO models, such as training, validation, prediction, exporting, and benchmarking. It handles different types of models, including those - loaded from local files, Ultralytics HUB, or Triton Server. The class is designed to be flexible and - extendable for different tasks and model configurations. - - Args: - model (Union[str, Path], optional): Path or name of the model to load or create. This can be a local file - path, a model name from Ultralytics HUB, or a Triton Server model. Defaults to 'yolov8n.pt'. - task (Any, optional): The task type associated with the YOLO model. This can be used to specify the model's - application domain, such as object detection, segmentation, etc. Defaults to None. - verbose (bool, optional): If True, enables verbose output during the model's operations. Defaults to False. + loaded from local files, Ultralytics HUB, or Triton Server. Attributes: - callbacks (dict): A dictionary of callback functions for various events during model operations. + callbacks (Dict): A dictionary of callback functions for various events during model operations. predictor (BasePredictor): The predictor object used for making predictions. model (nn.Module): The underlying PyTorch model. trainer (BaseTrainer): The trainer object used for training the model. - ckpt (dict): The checkpoint data if the model is loaded from a *.pt file. + ckpt (Dict): The checkpoint data if the model is loaded from a *.pt file. cfg (str): The configuration of the model if loaded from a *.yaml file. ckpt_path (str): The path to the checkpoint file. - overrides (dict): A dictionary of overrides for model configuration. - metrics (dict): The latest training/validation metrics. + overrides (Dict): A dictionary of overrides for model configuration. + metrics (Dict): The latest training/validation metrics. session (HUBTrainingSession): The Ultralytics HUB session, if applicable. task (str): The type of task the model is intended for. model_name (str): The name of the model. @@ -75,19 +67,14 @@ class Model(nn.Module): add_callback: Adds a callback function for an event. clear_callback: Clears all callbacks for an event. reset_callbacks: Resets all callbacks to their default functions. - is_triton_model: Checks if a model is a Triton Server model. - is_hub_model: Checks if a model is an Ultralytics HUB model. - _reset_ckpt_args: Resets checkpoint arguments when loading a PyTorch model. - _smart_load: Loads the appropriate module based on the model task. - task_map: Provides a mapping from model tasks to corresponding classes. - - Raises: - FileNotFoundError: If the specified model file does not exist or is inaccessible. - ValueError: If the model file or configuration is invalid or unsupported. - ImportError: If required dependencies for specific model types (like HUB SDK) are not installed. - TypeError: If the model is not a PyTorch model when required. - AttributeError: If required attributes or methods are not implemented or available. - NotImplementedError: If a specific model task or mode is not supported. + + Examples: + >>> from ultralytics import YOLO + >>> model = YOLO('yolov8n.pt') + >>> results = model.predict('image.jpg') + >>> model.train(data='coco128.yaml', epochs=3) + >>> metrics = model.val() + >>> model.export(format='onnx') """ def __init__( @@ -99,22 +86,27 @@ class Model(nn.Module): """ Initializes a new instance of the YOLO model class. - This constructor sets up the model based on the provided model path or name. It handles various types of model - sources, including local files, Ultralytics HUB models, and Triton Server models. The method initializes several - important attributes of the model and prepares it for operations like training, prediction, or export. + This constructor sets up the model based on the provided model path or name. It handles various types of + model sources, including local files, Ultralytics HUB models, and Triton Server models. The method + initializes several important attributes of the model and prepares it for operations like training, + prediction, or export. Args: - model (Union[str, Path], optional): The path or model file to load or create. This can be a local - file path, a model name from Ultralytics HUB, or a Triton Server model. Defaults to 'yolov8n.pt'. - task (Any, optional): The task type associated with the YOLO model, specifying its application domain. - Defaults to None. - verbose (bool, optional): If True, enables verbose output during the model's initialization and subsequent - operations. Defaults to False. + model (Union[str, Path]): Path or name of the model to load or create. Can be a local file path, a + model name from Ultralytics HUB, or a Triton Server model. + task (str | None): The task type associated with the YOLO model, specifying its application domain. + verbose (bool): If True, enables verbose output during the model's initialization and subsequent + operations. Raises: FileNotFoundError: If the specified model file does not exist or is inaccessible. ValueError: If the model file or configuration is invalid or unsupported. ImportError: If required dependencies for specific model types (like HUB SDK) are not installed. + + Examples: + >>> model = Model("yolov8n.pt") + >>> model = Model("path/to/model.yaml", task="detect") + >>> model = Model("hub_model", verbose=True) """ super().__init__() self.callbacks = callbacks.get_default_callbacks() @@ -155,27 +147,50 @@ class Model(nn.Module): **kwargs, ) -> list: """ - An alias for the predict method, enabling the model instance to be callable. + Alias for the predict method, enabling the model instance to be callable for predictions. - This method simplifies the process of making predictions by allowing the model instance to be called directly - with the required arguments for prediction. + This method simplifies the process of making predictions by allowing the model instance to be called + directly with the required arguments. Args: - source (str | Path | int | PIL.Image | np.ndarray, optional): The source of the image for making - predictions. Accepts various types, including file paths, URLs, PIL images, and numpy arrays. - Defaults to None. - stream (bool, optional): If True, treats the input source as a continuous stream for predictions. - Defaults to False. - **kwargs (any): Additional keyword arguments for configuring the prediction process. + source (str | Path | int | PIL.Image | np.ndarray | torch.Tensor | List | Tuple): The source of + the image(s) to make predictions on. Can be a file path, URL, PIL image, numpy array, PyTorch + tensor, or a list/tuple of these. + stream (bool): If True, treat the input source as a continuous stream for predictions. + **kwargs (Any): Additional keyword arguments to configure the prediction process. Returns: - (List[ultralytics.engine.results.Results]): A list of prediction results, encapsulated in the Results class. + (List[ultralytics.engine.results.Results]): A list of prediction results, each encapsulated in a + Results object. + + Examples: + >>> model = YOLO('yolov8n.pt') + >>> results = model('https://ultralytics.com/images/bus.jpg') + >>> for r in results: + ... print(f"Detected {len(r)} objects in image") """ return self.predict(source, stream, **kwargs) @staticmethod def is_triton_model(model: str) -> bool: - """Is model a Triton Server URL string, i.e. :////""" + """ + Checks if the given model string is a Triton Server URL. + + This static method determines whether the provided model string represents a valid Triton Server URL by + parsing its components using urllib.parse.urlsplit(). + + Args: + model (str): The model string to be checked. + + Returns: + (bool): True if the model string is a valid Triton Server URL, False otherwise. + + Examples: + >>> Model.is_triton_model('http://localhost:8000/v2/models/yolov8n') + True + >>> Model.is_triton_model('yolov8n.pt') + False + """ from urllib.parse import urlsplit url = urlsplit(model) @@ -183,7 +198,30 @@ class Model(nn.Module): @staticmethod def is_hub_model(model: str) -> bool: - """Check if the provided model is a HUB model.""" + """ + Check if the provided model is an Ultralytics HUB model. + + This static method determines whether the given model string represents a valid Ultralytics HUB model + identifier. It checks for three possible formats: a full HUB URL, an API key and model ID combination, + or a standalone model ID. + + Args: + model (str): The model identifier to check. This can be a URL, an API key and model ID + combination, or a standalone model ID. + + Returns: + (bool): True if the model is a valid Ultralytics HUB model, False otherwise. + + Examples: + >>> Model.is_hub_model("https://hub.ultralytics.com/models/example_model") + True + >>> Model.is_hub_model("api_key_example_model_id") + True + >>> Model.is_hub_model("example_model_id") + True + >>> Model.is_hub_model("not_a_hub_model.pt") + False + """ return any( ( model.startswith(f"{HUB_WEB_ROOT}/models/"), # i.e. https://hub.ultralytics.com/models/MODEL_ID @@ -196,11 +234,24 @@ class Model(nn.Module): """ Initializes a new model and infers the task type from the model definitions. + This method creates a new model instance based on the provided configuration file. It loads the model + configuration, infers the task type if not specified, and initializes the model using the appropriate + class from the task map. + Args: - cfg (str): model configuration file - task (str | None): model task - model (BaseModel): Customized model. - verbose (bool): display model info on load + cfg (str): Path to the model configuration file in YAML format. + task (str | None): The specific task for the model. If None, it will be inferred from the config. + model (torch.nn.Module | None): A custom model instance. If provided, it will be used instead of creating + a new one. + verbose (bool): If True, displays model information during loading. + + Raises: + ValueError: If the configuration file is invalid or the task cannot be inferred. + ImportError: If the required dependencies for the specified task are not installed. + + Examples: + >>> model = Model() + >>> model._new('yolov8n.yaml', task='detect', verbose=True) """ cfg_dict = yaml_model_load(cfg) self.cfg = cfg @@ -216,11 +267,23 @@ class Model(nn.Module): def _load(self, weights: str, task=None) -> None: """ - Initializes a new model and infers the task type from the model head. + Loads a model from a checkpoint file or initializes it from a weights file. + + This method handles loading models from either .pt checkpoint files or other weight file formats. It sets + up the model, task, and related attributes based on the loaded weights. Args: - weights (str): model checkpoint to be loaded - task (str | None): model task + weights (str): Path to the model weights file to be loaded. + task (str | None): The task associated with the model. If None, it will be inferred from the model. + + Raises: + FileNotFoundError: If the specified weights file does not exist or is inaccessible. + ValueError: If the weights file format is unsupported or invalid. + + Examples: + >>> model = Model() + >>> model._load('yolov8n.pt') + >>> model._load('path/to/weights.pth', task='detect') """ if weights.lower().startswith(("https://", "http://", "rtsp://", "rtmp://", "tcp://")): weights = checks.check_file(weights) # automatically download and return local filename @@ -241,7 +304,22 @@ class Model(nn.Module): self.model_name = weights def _check_is_pytorch_model(self) -> None: - """Raises TypeError is model is not a PyTorch model.""" + """ + Checks if the model is a PyTorch model and raises a TypeError if it's not. + + This method verifies that the model is either a PyTorch module or a .pt file. It's used to ensure that + certain operations that require a PyTorch model are only performed on compatible model types. + + Raises: + TypeError: If the model is not a PyTorch module or a .pt file. The error message provides detailed + information about supported model formats and operations. + + Examples: + >>> model = Model("yolov8n.pt") + >>> model._check_is_pytorch_model() # No error raised + >>> model = Model("yolov8n.onnx") + >>> model._check_is_pytorch_model() # Raises TypeError + """ pt_str = isinstance(self.model, (str, Path)) and Path(self.model).suffix == ".pt" pt_module = isinstance(self.model, nn.Module) if not (pt_module or pt_str): @@ -255,17 +333,21 @@ class Model(nn.Module): def reset_weights(self) -> "Model": """ - Resets the model parameters to randomly initialized values, effectively discarding all training information. + Resets the model's weights to their initial state. This method iterates through all modules in the model and resets their parameters if they have a - 'reset_parameters' method. It also ensures that all parameters have 'requires_grad' set to True, enabling them - to be updated during training. + 'reset_parameters' method. It also ensures that all parameters have 'requires_grad' set to True, + enabling them to be updated during training. Returns: - self (ultralytics.engine.model.Model): The instance of the class with reset weights. + (Model): The instance of the class with reset weights. Raises: AssertionError: If the model is not a PyTorch model. + + Examples: + >>> model = Model('yolov8n.pt') + >>> model.reset_weights() """ self._check_is_pytorch_model() for m in self.model.modules(): @@ -283,13 +365,18 @@ class Model(nn.Module): name and shape and transfers them to the model. Args: - weights (str | Path): Path to the weights file or a weights object. Defaults to 'yolov8n.pt'. + weights (Union[str, Path]): Path to the weights file or a weights object. Returns: - self (ultralytics.engine.model.Model): The instance of the class with loaded weights. + (Model): The instance of the class with loaded weights. Raises: AssertionError: If the model is not a PyTorch model. + + Examples: + >>> model = Model() + >>> model.load('yolov8n.pt') + >>> model.load(Path('path/to/weights.pt')) """ self._check_is_pytorch_model() if isinstance(weights, (str, Path)): @@ -301,14 +388,19 @@ class Model(nn.Module): """ Saves the current model state to a file. - This method exports the model's checkpoint (ckpt) to the specified filename. + This method exports the model's checkpoint (ckpt) to the specified filename. It includes metadata such as + the date, Ultralytics version, license information, and a link to the documentation. Args: - filename (str | Path): The name of the file to save the model to. Defaults to 'saved_model.pt'. - use_dill (bool): Whether to try using dill for serialization if available. Defaults to True. + filename (Union[str, Path]): The name of the file to save the model to. + use_dill (bool): Whether to try using dill for serialization if available. Raises: AssertionError: If the model is not a PyTorch model. + + Examples: + >>> model = Model('yolov8n.pt') + >>> model.save('my_model.pt') """ self._check_is_pytorch_model() from copy import deepcopy @@ -329,30 +421,47 @@ class Model(nn.Module): """ Logs or returns model information. - This method provides an overview or detailed information about the model, depending on the arguments passed. - It can control the verbosity of the output. + This method provides an overview or detailed information about the model, depending on the arguments + passed. It can control the verbosity of the output and return the information as a list. Args: - detailed (bool): If True, shows detailed information about the model. Defaults to False. - verbose (bool): If True, prints the information. If False, returns the information. Defaults to True. + detailed (bool): If True, shows detailed information about the model layers and parameters. + verbose (bool): If True, prints the information. If False, returns the information as a list. Returns: - (list): Various types of information about the model, depending on the 'detailed' and 'verbose' parameters. + (List[str]): A list of strings containing various types of information about the model, including + model summary, layer details, and parameter counts. Empty if verbose is True. Raises: - AssertionError: If the model is not a PyTorch model. + TypeError: If the model is not a PyTorch model. + + Examples: + >>> model = Model('yolov8n.pt') + >>> model.info() # Prints model summary + >>> info_list = model.info(detailed=True, verbose=False) # Returns detailed info as a list """ self._check_is_pytorch_model() return self.model.info(detailed=detailed, verbose=verbose) def fuse(self): """ - Fuses Conv2d and BatchNorm2d layers in the model. + Fuses Conv2d and BatchNorm2d layers in the model for optimized inference. + + This method iterates through the model's modules and fuses consecutive Conv2d and BatchNorm2d layers + into a single layer. This fusion can significantly improve inference speed by reducing the number of + operations and memory accesses required during forward passes. - This method optimizes the model by fusing Conv2d and BatchNorm2d layers, which can improve inference speed. + The fusion process typically involves folding the BatchNorm2d parameters (mean, variance, weight, and + bias) into the preceding Conv2d layer's weights and biases. This results in a single Conv2d layer that + performs both convolution and normalization in one step. Raises: - AssertionError: If the model is not a PyTorch model. + TypeError: If the model is not a PyTorch nn.Module. + + Examples: + >>> model = Model("yolov8n.pt") + >>> model.fuse() + >>> # Model is now fused and ready for optimized inference """ self._check_is_pytorch_model() self.model.fuse() @@ -366,20 +475,26 @@ class Model(nn.Module): """ Generates image embeddings based on the provided source. - This method is a wrapper around the 'predict()' method, focusing on generating embeddings from an image source. - It allows customization of the embedding process through various keyword arguments. + This method is a wrapper around the 'predict()' method, focusing on generating embeddings from an image + source. It allows customization of the embedding process through various keyword arguments. Args: - source (str | int | PIL.Image | np.ndarray): The source of the image for generating embeddings. - The source can be a file path, URL, PIL image, numpy array, etc. Defaults to None. - stream (bool): If True, predictions are streamed. Defaults to False. - **kwargs (any): Additional keyword arguments for configuring the embedding process. + source (str | Path | int | List | Tuple | np.ndarray | torch.Tensor): The source of the image for + generating embeddings. Can be a file path, URL, PIL image, numpy array, etc. + stream (bool): If True, predictions are streamed. + **kwargs (Any): Additional keyword arguments for configuring the embedding process. Returns: (List[torch.Tensor]): A list containing the image embeddings. Raises: AssertionError: If the model is not a PyTorch model. + + Examples: + >>> model = YOLO('yolov8n.pt') + >>> image = 'https://ultralytics.com/images/bus.jpg' + >>> embeddings = model.embed(image) + >>> print(embeddings[0].shape) """ if not kwargs.get("embed"): kwargs["embed"] = [len(self.model.model) - 2] # embed second-to-last layer if no indices passed @@ -397,28 +512,31 @@ class Model(nn.Module): This method facilitates the prediction process, allowing various configurations through keyword arguments. It supports predictions with custom predictors or the default predictor method. The method handles different - types of image sources and can operate in a streaming mode. It also provides support for SAM-type models - through 'prompts'. - - The method sets up a new predictor if not already present and updates its arguments with each call. - It also issues a warning and uses default assets if the 'source' is not provided. The method determines if it - is being called from the command line interface and adjusts its behavior accordingly, including setting defaults - for confidence threshold and saving behavior. + types of image sources and can operate in a streaming mode. Args: - source (str | int | PIL.Image | np.ndarray, optional): The source of the image for making predictions. - Accepts various types, including file paths, URLs, PIL images, and numpy arrays. Defaults to ASSETS. - stream (bool, optional): Treats the input source as a continuous stream for predictions. Defaults to False. - predictor (BasePredictor, optional): An instance of a custom predictor class for making predictions. - If None, the method uses a default predictor. Defaults to None. - **kwargs (any): Additional keyword arguments for configuring the prediction process. These arguments allow - for further customization of the prediction behavior. + source (str | Path | int | List[str] | List[Path] | List[int] | np.ndarray | torch.Tensor): The source + of the image(s) to make predictions on. Accepts various types including file paths, URLs, PIL + images, numpy arrays, and torch tensors. + stream (bool): If True, treats the input source as a continuous stream for predictions. + predictor (BasePredictor | None): An instance of a custom predictor class for making predictions. + If None, the method uses a default predictor. + **kwargs (Any): Additional keyword arguments for configuring the prediction process. Returns: - (List[ultralytics.engine.results.Results]): A list of prediction results, encapsulated in the Results class. - - Raises: - AttributeError: If the predictor is not properly set up. + (List[ultralytics.engine.results.Results]): A list of prediction results, each encapsulated in a + Results object. + + Examples: + >>> model = YOLO('yolov8n.pt') + >>> results = model.predict(source='path/to/image.jpg', conf=0.25) + >>> for r in results: + ... print(r.boxes.data) # print detection bounding boxes + + Notes: + - If 'source' is not provided, it defaults to the ASSETS constant with a warning. + - The method sets up a new predictor if not already present and updates its arguments with each call. + - For SAM-type models, 'prompts' can be passed as a keyword argument. """ if source is None: source = ASSETS @@ -453,26 +571,33 @@ class Model(nn.Module): """ Conducts object tracking on the specified input source using the registered trackers. - This method performs object tracking using the model's predictors and optionally registered trackers. It is - capable of handling different types of input sources such as file paths or video streams. The method supports - customization of the tracking process through various keyword arguments. It registers trackers if they are not - already present and optionally persists them based on the 'persist' flag. - - The method sets a default confidence threshold specifically for ByteTrack-based tracking, which requires low - confidence predictions as input. The tracking mode is explicitly set in the keyword arguments. + This method performs object tracking using the model's predictors and optionally registered trackers. It handles + various input sources such as file paths or video streams, and supports customization through keyword arguments. + The method registers trackers if not already present and can persist them between calls. Args: - source (str, optional): The input source for object tracking. It can be a file path, URL, or video stream. - stream (bool, optional): Treats the input source as a continuous video stream. Defaults to False. - persist (bool, optional): Persists the trackers between different calls to this method. Defaults to False. - **kwargs (any): Additional keyword arguments for configuring the tracking process. These arguments allow - for further customization of the tracking behavior. + source (Union[str, Path, int, List, Tuple, np.ndarray, torch.Tensor], optional): Input source for object + tracking. Can be a file path, URL, or video stream. + stream (bool): If True, treats the input source as a continuous video stream. Defaults to False. + persist (bool): If True, persists trackers between different calls to this method. Defaults to False. + **kwargs (Any): Additional keyword arguments for configuring the tracking process. Returns: - (List[ultralytics.engine.results.Results]): A list of tracking results, encapsulated in the Results class. + (List[ultralytics.engine.results.Results]): A list of tracking results, each encapsulated in a Results object. Raises: AttributeError: If the predictor does not have registered trackers. + + Examples: + >>> model = YOLO('yolov8n.pt') + >>> results = model.track(source='path/to/video.mp4', show=True) + >>> for r in results: + ... print(r.boxes.id) # print tracking IDs + + Notes: + - This method sets a default confidence threshold of 0.1 for ByteTrack-based tracking. + - The tracking mode is explicitly set in the keyword arguments. + - Batch size is set to 1 for tracking in videos. """ if not hasattr(self.predictor, "trackers"): from ultralytics.trackers import register_tracker @@ -491,26 +616,25 @@ class Model(nn.Module): """ Validates the model using a specified dataset and validation configuration. - This method facilitates the model validation process, allowing for a range of customization through various - settings and configurations. It supports validation with a custom validator or the default validation approach. - The method combines default configurations, method-specific defaults, and user-provided arguments to configure - the validation process. After validation, it updates the model's metrics with the results obtained from the - validator. - - The method supports various arguments that allow customization of the validation process. For a comprehensive - list of all configurable options, users should refer to the 'configuration' section in the documentation. + This method facilitates the model validation process, allowing for customization through various settings. It + supports validation with a custom validator or the default validation approach. The method combines default + configurations, method-specific defaults, and user-provided arguments to configure the validation process. Args: - validator (BaseValidator, optional): An instance of a custom validator class for validating the model. If - None, the method uses a default validator. Defaults to None. - **kwargs (any): Arbitrary keyword arguments representing the validation configuration. These arguments are - used to customize various aspects of the validation process. + validator (ultralytics.engine.validator.BaseValidator | None): An instance of a custom validator class for + validating the model. + **kwargs (Any): Arbitrary keyword arguments for customizing the validation process. Returns: (ultralytics.utils.metrics.DetMetrics): Validation metrics obtained from the validation process. Raises: AssertionError: If the model is not a PyTorch model. + + Examples: + >>> model = YOLO('yolov8n.pt') + >>> results = model.val(data='coco128.yaml', imgsz=640) + >>> print(results.box.map) # Print mAP50-95 """ custom = {"rect": True} # method defaults args = {**self.overrides, **custom, **kwargs, "mode": "val"} # highest priority args on the right @@ -528,23 +652,31 @@ class Model(nn.Module): Benchmarks the model across various export formats to evaluate performance. This method assesses the model's performance in different export formats, such as ONNX, TorchScript, etc. - It uses the 'benchmark' function from the ultralytics.utils.benchmarks module. The benchmarking is configured - using a combination of default configuration values, model-specific arguments, method-specific defaults, and - any additional user-provided keyword arguments. - - The method supports various arguments that allow customization of the benchmarking process, such as dataset - choice, image size, precision modes, device selection, and verbosity. For a comprehensive list of all - configurable options, users should refer to the 'configuration' section in the documentation. + It uses the 'benchmark' function from the ultralytics.utils.benchmarks module. The benchmarking is + configured using a combination of default configuration values, model-specific arguments, method-specific + defaults, and any additional user-provided keyword arguments. Args: - **kwargs (any): Arbitrary keyword arguments to customize the benchmarking process. These are combined with - default configurations, model-specific arguments, and method defaults. + **kwargs (Any): Arbitrary keyword arguments to customize the benchmarking process. These are combined with + default configurations, model-specific arguments, and method defaults. Common options include: + - data (str): Path to the dataset for benchmarking. + - imgsz (int | List[int]): Image size for benchmarking. + - half (bool): Whether to use half-precision (FP16) mode. + - int8 (bool): Whether to use int8 precision mode. + - device (str): Device to run the benchmark on (e.g., 'cpu', 'cuda'). + - verbose (bool): Whether to print detailed benchmark information. Returns: - (dict): A dictionary containing the results of the benchmarking process. + (Dict): A dictionary containing the results of the benchmarking process, including metrics for + different export formats. Raises: AssertionError: If the model is not a PyTorch model. + + Examples: + >>> model = YOLO('yolov8n.pt') + >>> results = model.benchmark(data='coco8.yaml', imgsz=640, half=True) + >>> print(results) """ self._check_is_pytorch_model() from ultralytics.utils.benchmarks import benchmark @@ -570,20 +702,31 @@ class Model(nn.Module): This method facilitates the export of the model to various formats (e.g., ONNX, TorchScript) for deployment purposes. It uses the 'Exporter' class for the export process, combining model-specific overrides, method - defaults, and any additional arguments provided. The combined arguments are used to configure export settings. - - The method supports a wide range of arguments to customize the export process. For a comprehensive list of all - possible arguments, refer to the 'configuration' section in the documentation. + defaults, and any additional arguments provided. Args: - **kwargs (any): Arbitrary keyword arguments to customize the export process. These are combined with the - model's overrides and method defaults. + **kwargs (Dict): Arbitrary keyword arguments to customize the export process. These are combined with + the model's overrides and method defaults. Common arguments include: + format (str): Export format (e.g., 'onnx', 'engine', 'coreml'). + half (bool): Export model in half-precision. + int8 (bool): Export model in int8 precision. + device (str): Device to run the export on. + workspace (int): Maximum memory workspace size for TensorRT engines. + nms (bool): Add Non-Maximum Suppression (NMS) module to model. + simplify (bool): Simplify ONNX model. Returns: - (str): The exported model filename in the specified format, or an object related to the export process. + (str): The path to the exported model file. Raises: AssertionError: If the model is not a PyTorch model. + ValueError: If an unsupported export format is specified. + RuntimeError: If the export process fails due to errors. + + Examples: + >>> model = YOLO('yolov8n.pt') + >>> model.export(format='onnx', dynamic=True, simplify=True) + 'path/to/exported/model.onnx' """ self._check_is_pytorch_model() from .exporter import Exporter @@ -606,29 +749,38 @@ class Model(nn.Module): """ Trains the model using the specified dataset and training configuration. - This method facilitates model training with a range of customizable settings and configurations. It supports - training with a custom trainer or the default training approach defined in the method. The method handles - different scenarios, such as resuming training from a checkpoint, integrating with Ultralytics HUB, and - updating model and configuration after training. + This method facilitates model training with a range of customizable settings. It supports training with a + custom trainer or the default training approach. The method handles scenarios such as resuming training + from a checkpoint, integrating with Ultralytics HUB, and updating model and configuration after training. - When using Ultralytics HUB, if the session already has a loaded model, the method prioritizes HUB training - arguments and issues a warning if local arguments are provided. It checks for pip updates and combines default - configurations, method-specific defaults, and user-provided arguments to configure the training process. After - training, it updates the model and its configurations, and optionally attaches metrics. + When using Ultralytics HUB, if the session has a loaded model, the method prioritizes HUB training + arguments and warns if local arguments are provided. It checks for pip updates and combines default + configurations, method-specific defaults, and user-provided arguments to configure the training process. Args: - trainer (BaseTrainer, optional): An instance of a custom trainer class for training the model. If None, the - method uses a default trainer. Defaults to None. - **kwargs (any): Arbitrary keyword arguments representing the training configuration. These arguments are - used to customize various aspects of the training process. + trainer (BaseTrainer | None): Custom trainer instance for model training. If None, uses default. + **kwargs (Any): Arbitrary keyword arguments for training configuration. Common options include: + data (str): Path to dataset configuration file. + epochs (int): Number of training epochs. + batch_size (int): Batch size for training. + imgsz (int): Input image size. + device (str): Device to run training on (e.g., 'cuda', 'cpu'). + workers (int): Number of worker threads for data loading. + optimizer (str): Optimizer to use for training. + lr0 (float): Initial learning rate. + patience (int): Epochs to wait for no observable improvement for early stopping of training. Returns: - (dict | None): Training metrics if available and training is successful; otherwise, None. + (Dict | None): Training metrics if available and training is successful; otherwise, None. Raises: AssertionError: If the model is not a PyTorch model. PermissionError: If there is a permission issue with the HUB session. ModuleNotFoundError: If the HUB SDK is not installed. + + Examples: + >>> model = YOLO('yolov8n.pt') + >>> results = model.train(data='coco128.yaml', epochs=3) """ self._check_is_pytorch_model() if hasattr(self.session, "model") and self.session.model.id: # Ultralytics HUB session with loaded model @@ -682,14 +834,19 @@ class Model(nn.Module): Args: use_ray (bool): If True, uses Ray Tune for hyperparameter tuning. Defaults to False. iterations (int): The number of tuning iterations to perform. Defaults to 10. - *args (list): Variable length argument list for additional arguments. - **kwargs (any): Arbitrary keyword arguments. These are combined with the model's overrides and defaults. + *args (List): Variable length argument list for additional arguments. + **kwargs (Dict): Arbitrary keyword arguments. These are combined with the model's overrides and defaults. Returns: - (dict): A dictionary containing the results of the hyperparameter search. + (Dict): A dictionary containing the results of the hyperparameter search. Raises: AssertionError: If the model is not a PyTorch model. + + Examples: + >>> model = YOLO('yolov8n.pt') + >>> results = model.tune(use_ray=True, iterations=20) + >>> print(results) """ self._check_is_pytorch_model() if use_ray: @@ -704,7 +861,27 @@ class Model(nn.Module): return Tuner(args=args, _callbacks=self.callbacks)(model=self, iterations=iterations) def _apply(self, fn) -> "Model": - """Apply to(), cpu(), cuda(), half(), float() to model tensors that are not parameters or registered buffers.""" + """ + Applies a function to model tensors that are not parameters or registered buffers. + + This method extends the functionality of the parent class's _apply method by additionally resetting the + predictor and updating the device in the model's overrides. It's typically used for operations like + moving the model to a different device or changing its precision. + + Args: + fn (Callable): A function to be applied to the model's tensors. This is typically a method like + to(), cpu(), cuda(), half(), or float(). + + Returns: + (Model): The model instance with the function applied and updated attributes. + + Raises: + AssertionError: If the model is not a PyTorch model. + + Examples: + >>> model = Model("yolov8n.pt") + >>> model = model._apply(lambda t: t.cuda()) # Move model to GPU + """ self._check_is_pytorch_model() self = super()._apply(fn) # noqa self.predictor = None # reset predictor as device may have changed @@ -717,10 +894,19 @@ class Model(nn.Module): Retrieves the class names associated with the loaded model. This property returns the class names if they are defined in the model. It checks the class names for validity - using the 'check_class_names' function from the ultralytics.nn.autobackend module. + using the 'check_class_names' function from the ultralytics.nn.autobackend module. If the predictor is not + initialized, it sets it up before retrieving the names. Returns: - (list | None): The class names of the model if available, otherwise None. + (List[str]): A list of class names associated with the model. + + Raises: + AttributeError: If the model or predictor does not have a 'names' attribute. + + Examples: + >>> model = YOLO('yolov8n.pt') + >>> print(model.names) + ['person', 'bicycle', 'car', ...] """ from ultralytics.nn.autobackend import check_class_names @@ -736,11 +922,22 @@ class Model(nn.Module): """ Retrieves the device on which the model's parameters are allocated. - This property is used to determine whether the model's parameters are on CPU or GPU. It only applies to models - that are instances of nn.Module. + This property determines the device (CPU or GPU) where the model's parameters are currently stored. It is + applicable only to models that are instances of nn.Module. Returns: - (torch.device | None): The device (CPU/GPU) of the model if it is a PyTorch model, otherwise None. + (torch.device): The device (CPU/GPU) of the model. + + Raises: + AttributeError: If the model is not a PyTorch nn.Module instance. + + Examples: + >>> model = YOLO("yolov8n.pt") + >>> print(model.device) + device(type='cuda', index=0) # if CUDA is available + >>> model = model.to("cpu") + >>> print(model.device) + device(type='cpu') """ return next(self.model.parameters()).device if isinstance(self.model, nn.Module) else None @@ -749,10 +946,20 @@ class Model(nn.Module): """ Retrieves the transformations applied to the input data of the loaded model. - This property returns the transformations if they are defined in the model. + This property returns the transformations if they are defined in the model. The transforms + typically include preprocessing steps like resizing, normalization, and data augmentation + that are applied to input data before it is fed into the model. Returns: (object | None): The transform object of the model if available, otherwise None. + + Examples: + >>> model = YOLO('yolov8n.pt') + >>> transforms = model.transforms + >>> if transforms: + ... print(f"Model transforms: {transforms}") + ... else: + ... print("No transforms defined for this model.") """ return self.model.transforms if hasattr(self.model, "transforms") else None @@ -760,15 +967,25 @@ class Model(nn.Module): """ Adds a callback function for a specified event. - This method allows the user to register a custom callback function that is triggered on a specific event during - model training or inference. + This method allows registering custom callback functions that are triggered on specific events during + model operations such as training or inference. Callbacks provide a way to extend and customize the + behavior of the model at various stages of its lifecycle. Args: - event (str): The name of the event to attach the callback to. - func (callable): The callback function to be registered. + event (str): The name of the event to attach the callback to. Must be a valid event name recognized + by the Ultralytics framework. + func (Callable): The callback function to be registered. This function will be called when the + specified event occurs. Raises: - ValueError: If the event name is not recognized. + ValueError: If the event name is not recognized or is invalid. + + Examples: + >>> def on_train_start(trainer): + ... print("Training is starting!") + >>> model = YOLO('yolov8n.pt') + >>> model.add_callback("on_train_start", on_train_start) + >>> model.train(data='coco128.yaml', epochs=1) """ self.callbacks[event].append(func) @@ -777,12 +994,26 @@ class Model(nn.Module): Clears all callback functions registered for a specified event. This method removes all custom and default callback functions associated with the given event. + It resets the callback list for the specified event to an empty list, effectively removing all + registered callbacks for that event. Args: - event (str): The name of the event for which to clear the callbacks. - - Raises: - ValueError: If the event name is not recognized. + event (str): The name of the event for which to clear the callbacks. This should be a valid event name + recognized by the Ultralytics callback system. + + Examples: + >>> model = YOLO('yolov8n.pt') + >>> model.add_callback('on_train_start', lambda: print('Training started')) + >>> model.clear_callback('on_train_start') + >>> # All callbacks for 'on_train_start' are now removed + + Notes: + - This method affects both custom callbacks added by the user and default callbacks + provided by the Ultralytics framework. + - After calling this method, no callbacks will be executed for the specified event + until new ones are added. + - Use with caution as it removes all callbacks, including essential ones that might + be required for proper functioning of certain operations. """ self.callbacks[event] = [] @@ -791,14 +1022,45 @@ class Model(nn.Module): Resets all callbacks to their default functions. This method reinstates the default callback functions for all events, removing any custom callbacks that were - added previously. + previously added. It iterates through all default callback events and replaces the current callbacks with the + default ones. + + The default callbacks are defined in the 'callbacks.default_callbacks' dictionary, which contains predefined + functions for various events in the model's lifecycle, such as on_train_start, on_epoch_end, etc. + + This method is useful when you want to revert to the original set of callbacks after making custom modifications, + ensuring consistent behavior across different runs or experiments. + + Examples: + >>> model = YOLO('yolov8n.pt') + >>> model.add_callback('on_train_start', custom_function) + >>> model.reset_callbacks() + # All callbacks are now reset to their default functions """ for event in callbacks.default_callbacks.keys(): self.callbacks[event] = [callbacks.default_callbacks[event][0]] @staticmethod def _reset_ckpt_args(args: dict) -> dict: - """Reset arguments when loading a PyTorch model.""" + """ + Resets specific arguments when loading a PyTorch model checkpoint. + + This static method filters the input arguments dictionary to retain only a specific set of keys that are + considered important for model loading. It's used to ensure that only relevant arguments are preserved + when loading a model from a checkpoint, discarding any unnecessary or potentially conflicting settings. + + Args: + args (dict): A dictionary containing various model arguments and settings. + + Returns: + (dict): A new dictionary containing only the specified include keys from the input arguments. + + Examples: + >>> original_args = {'imgsz': 640, 'data': 'coco.yaml', 'task': 'detect', 'batch': 16, 'epochs': 100} + >>> reset_args = Model._reset_ckpt_args(original_args) + >>> print(reset_args) + {'imgsz': 640, 'data': 'coco.yaml', 'task': 'detect'} + """ include = {"imgsz", "data", "task", "single_cls"} # only remember these arguments when loading a PyTorch model return {k: v for k, v in args.items() if k in include} @@ -808,7 +1070,31 @@ class Model(nn.Module): # raise AttributeError(f"'{name}' object has no attribute '{attr}'. See valid attributes below.\n{self.__doc__}") def _smart_load(self, key: str): - """Load model/trainer/validator/predictor.""" + """ + Loads the appropriate module based on the model task. + + This method dynamically selects and returns the correct module (model, trainer, validator, or predictor) + based on the current task of the model and the provided key. It uses the task_map attribute to determine + the correct module to load. + + Args: + key (str): The type of module to load. Must be one of 'model', 'trainer', 'validator', or 'predictor'. + + Returns: + (object): The loaded module corresponding to the specified key and current task. + + Raises: + NotImplementedError: If the specified key is not supported for the current task. + + Examples: + >>> model = Model(task='detect') + >>> predictor = model._smart_load('predictor') + >>> trainer = model._smart_load('trainer') + + Notes: + - This method is typically used internally by other methods of the Model class. + - The task_map attribute should be properly initialized with the correct mappings for each task. + """ try: return self.task_map[self.task][key] except Exception as e: @@ -821,9 +1107,30 @@ class Model(nn.Module): @property def task_map(self) -> dict: """ - Map head to model, trainer, validator, and predictor classes. + Provides a mapping from model tasks to corresponding classes for different modes. + + This property method returns a dictionary that maps each supported task (e.g., detect, segment, classify) + to a nested dictionary. The nested dictionary contains mappings for different operational modes + (model, trainer, validator, predictor) to their respective class implementations. + + The mapping allows for dynamic loading of appropriate classes based on the model's task and the + desired operational mode. This facilitates a flexible and extensible architecture for handling + various tasks and modes within the Ultralytics framework. Returns: - task_map (dict): The map of model task to mode classes. + (Dict[str, Dict[str, Any]]): A dictionary where keys are task names (str) and values are + nested dictionaries. Each nested dictionary has keys 'model', 'trainer', 'validator', and + 'predictor', mapping to their respective class implementations. + + Example: + >>> model = Model() + >>> task_map = model.task_map + >>> detect_class_map = task_map['detect'] + >>> segment_class_map = task_map['segment'] + + Note: + The actual implementation of this method may vary depending on the specific tasks and + classes supported by the Ultralytics framework. The docstring provides a general + description of the expected behavior and structure. """ raise NotImplementedError("Please provide task map for your model!") diff --git a/ultralytics/engine/results.py b/ultralytics/engine/results.py index b4cc21fbbd..ced02f0ab0 100644 --- a/ultralytics/engine/results.py +++ b/ultralytics/engine/results.py @@ -19,7 +19,28 @@ from ultralytics.utils.torch_utils import smart_inference_mode class BaseTensor(SimpleClass): - """Base tensor class with additional methods for easy manipulation and device handling.""" + """ + Base tensor class with additional methods for easy manipulation and device handling. + + Attributes: + data (torch.Tensor | np.ndarray): Prediction data such as bounding boxes, masks, or keypoints. + orig_shape (Tuple[int, int]): Original shape of the image, typically in the format (height, width). + + Methods: + cpu: Return a copy of the tensor stored in CPU memory. + numpy: Returns a copy of the tensor as a numpy array. + cuda: Moves the tensor to GPU memory, returning a new instance if necessary. + to: Return a copy of the tensor with the specified device and dtype. + + Examples: + >>> import torch + >>> data = torch.tensor([[1, 2, 3], [4, 5, 6]]) + >>> orig_shape = (720, 1280) + >>> base_tensor = BaseTensor(data, orig_shape) + >>> cpu_tensor = base_tensor.cpu() + >>> numpy_array = base_tensor.numpy() + >>> gpu_tensor = base_tensor.cuda() + """ def __init__(self, data, orig_shape) -> None: """ @@ -27,20 +48,13 @@ class BaseTensor(SimpleClass): Args: data (torch.Tensor | np.ndarray): Prediction data such as bounding boxes, masks, or keypoints. - orig_shape (tuple): Original shape of the image, typically in the format (height, width). - - Returns: - (None) - - Example: - ```python - import torch - from ultralytics.engine.results import BaseTensor + orig_shape (Tuple[int, int]): Original shape of the image in (height, width) format. - data = torch.tensor([[1, 2, 3], [4, 5, 6]]) - orig_shape = (720, 1280) - base_tensor = BaseTensor(data, orig_shape) - ``` + Examples: + >>> import torch + >>> data = torch.tensor([[1, 2, 3], [4, 5, 6]]) + >>> orig_shape = (720, 1280) + >>> base_tensor = BaseTensor(data, orig_shape) """ assert isinstance(data, (torch.Tensor, np.ndarray)), "data must be torch.Tensor or np.ndarray" self.data = data @@ -48,31 +62,124 @@ class BaseTensor(SimpleClass): @property def shape(self): - """Returns the shape of the underlying data tensor for easier manipulation and device handling.""" + """ + Returns the shape of the underlying data tensor. + + Returns: + (Tuple[int, ...]): The shape of the data tensor. + + Examples: + >>> data = torch.rand(100, 4) + >>> base_tensor = BaseTensor(data, orig_shape=(720, 1280)) + >>> print(base_tensor.shape) + (100, 4) + """ return self.data.shape def cpu(self): - """Return a copy of the tensor stored in CPU memory.""" + """ + Returns a copy of the tensor stored in CPU memory. + + Returns: + (BaseTensor): A new BaseTensor object with the data tensor moved to CPU memory. + + Examples: + >>> data = torch.tensor([[1, 2, 3], [4, 5, 6]]).cuda() + >>> base_tensor = BaseTensor(data, orig_shape=(720, 1280)) + >>> cpu_tensor = base_tensor.cpu() + >>> isinstance(cpu_tensor, BaseTensor) + True + >>> cpu_tensor.data.device + device(type='cpu') + """ return self if isinstance(self.data, np.ndarray) else self.__class__(self.data.cpu(), self.orig_shape) def numpy(self): - """Returns a copy of the tensor as a numpy array for efficient numerical operations.""" + """ + Returns a copy of the tensor as a numpy array. + + Returns: + (np.ndarray): A numpy array containing the same data as the original tensor. + + Examples: + >>> data = torch.tensor([[1, 2, 3], [4, 5, 6]]) + >>> orig_shape = (720, 1280) + >>> base_tensor = BaseTensor(data, orig_shape) + >>> numpy_array = base_tensor.numpy() + >>> print(type(numpy_array)) + + """ return self if isinstance(self.data, np.ndarray) else self.__class__(self.data.numpy(), self.orig_shape) def cuda(self): - """Moves the tensor to GPU memory, returning a new instance if necessary.""" + """ + Moves the tensor to GPU memory. + + Returns: + (BaseTensor): A new BaseTensor instance with the data moved to GPU memory if it's not already a + numpy array, otherwise returns self. + + Examples: + >>> import torch + >>> from ultralytics.engine.results import BaseTensor + >>> data = torch.tensor([[1, 2, 3], [4, 5, 6]]) + >>> base_tensor = BaseTensor(data, orig_shape=(720, 1280)) + >>> gpu_tensor = base_tensor.cuda() + >>> print(gpu_tensor.data.device) + cuda:0 + """ return self.__class__(torch.as_tensor(self.data).cuda(), self.orig_shape) def to(self, *args, **kwargs): - """Return a copy of the tensor with the specified device and dtype.""" + """ + Return a copy of the tensor with the specified device and dtype. + + Args: + *args (Any): Variable length argument list to be passed to torch.Tensor.to(). + **kwargs (Any): Arbitrary keyword arguments to be passed to torch.Tensor.to(). + + Returns: + (BaseTensor): A new BaseTensor instance with the data moved to the specified device and/or dtype. + + Examples: + >>> base_tensor = BaseTensor(torch.randn(3, 4), orig_shape=(480, 640)) + >>> cuda_tensor = base_tensor.to('cuda') + >>> float16_tensor = base_tensor.to(dtype=torch.float16) + """ return self.__class__(torch.as_tensor(self.data).to(*args, **kwargs), self.orig_shape) def __len__(self): # override len(results) - """Return the length of the underlying data tensor.""" + """ + Returns the length of the underlying data tensor. + + Returns: + (int): The number of elements in the first dimension of the data tensor. + + Examples: + >>> data = torch.tensor([[1, 2, 3], [4, 5, 6]]) + >>> base_tensor = BaseTensor(data, orig_shape=(720, 1280)) + >>> len(base_tensor) + 2 + """ return len(self.data) def __getitem__(self, idx): - """Return a new BaseTensor instance containing the specified indexed elements of the data tensor.""" + """ + Returns a new BaseTensor instance containing the specified indexed elements of the data tensor. + + Args: + idx (int | List[int] | torch.Tensor): Index or indices to select from the data tensor. + + Returns: + (BaseTensor): A new BaseTensor instance containing the indexed data. + + Examples: + >>> data = torch.tensor([[1, 2, 3], [4, 5, 6]]) + >>> base_tensor = BaseTensor(data, orig_shape=(720, 1280)) + >>> result = base_tensor[0] # Select the first row + >>> print(result.data) + tensor([1, 2, 3]) + """ return self.__class__(self.data[idx], self.orig_shape) @@ -80,31 +187,43 @@ class Results(SimpleClass): """ A class for storing and manipulating inference results. + This class encapsulates the functionality for handling detection, segmentation, pose estimation, + and classification results from YOLO models. + Attributes: orig_img (numpy.ndarray): Original image as a numpy array. - orig_shape (tuple): Original image shape in (height, width) format. - boxes (Boxes, optional): Object containing detection bounding boxes. - masks (Masks, optional): Object containing detection masks. - probs (Probs, optional): Object containing class probabilities for classification tasks. - keypoints (Keypoints, optional): Object containing detected keypoints for each object. - speed (dict): Dictionary of preprocess, inference, and postprocess speeds (ms/image). - names (dict): Dictionary of class names. + orig_shape (Tuple[int, int]): Original image shape in (height, width) format. + boxes (Boxes | None): Object containing detection bounding boxes. + masks (Masks | None): Object containing detection masks. + probs (Probs | None): Object containing class probabilities for classification tasks. + keypoints (Keypoints | None): Object containing detected keypoints for each object. + obb (OBB | None): Object containing oriented bounding boxes. + speed (Dict[str, float | None]): Dictionary of preprocess, inference, and postprocess speeds. + names (Dict[int, str]): Dictionary mapping class IDs to class names. path (str): Path to the image file. + _keys (Tuple[str, ...]): Tuple of attribute names for internal use. Methods: - update(boxes=None, masks=None, probs=None, obb=None): Updates object attributes with new detection results. - cpu(): Returns a copy of the Results object with all tensors on CPU memory. - numpy(): Returns a copy of the Results object with all tensors as numpy arrays. - cuda(): Returns a copy of the Results object with all tensors on GPU memory. - to(*args, **kwargs): Returns a copy of the Results object with tensors on a specified device and dtype. - new(): Returns a new Results object with the same image, path, and names. - plot(...): Plots detection results on an input image, returning an annotated image. - show(): Show annotated results to screen. - save(filename): Save annotated results to file. - verbose(): Returns a log string for each task, detailing detections and classifications. - save_txt(txt_file, save_conf=False): Saves detection results to a text file. - save_crop(save_dir, file_name=Path("im.jpg")): Saves cropped detection images. - tojson(normalize=False): Converts detection results to JSON format. + update: Updates object attributes with new detection results. + cpu: Returns a copy of the Results object with all tensors on CPU memory. + numpy: Returns a copy of the Results object with all tensors as numpy arrays. + cuda: Returns a copy of the Results object with all tensors on GPU memory. + to: Returns a copy of the Results object with tensors on a specified device and dtype. + new: Returns a new Results object with the same image, path, and names. + plot: Plots detection results on an input image, returning an annotated image. + show: Shows annotated results on screen. + save: Saves annotated results to file. + verbose: Returns a log string for each task, detailing detections and classifications. + save_txt: Saves detection results to a text file. + save_crop: Saves cropped detection images. + tojson: Converts detection results to JSON format. + + Examples: + >>> results = model("path/to/image.jpg") + >>> for result in results: + ... print(result.boxes) # Print detection boxes + ... result.show() # Display the annotated image + ... result.save(filename='result.jpg') # Save annotated image """ def __init__( @@ -116,26 +235,26 @@ class Results(SimpleClass): Args: orig_img (numpy.ndarray): The original image as a numpy array. path (str): The path to the image file. - names (dict): A dictionary of class names. - boxes (torch.tensor, optional): A 2D tensor of bounding box coordinates for each detection. - masks (torch.tensor, optional): A 3D tensor of detection masks, where each mask is a binary image. - probs (torch.tensor, optional): A 1D tensor of probabilities of each class for classification task. - keypoints (torch.tensor, optional): A 2D tensor of keypoint coordinates for each detection. For default pose - model, Keypoint indices for human body pose estimation are: - 0: Nose, 1: Left Eye, 2: Right Eye, 3: Left Ear, 4: Right Ear - 5: Left Shoulder, 6: Right Shoulder, 7: Left Elbow, 8: Right Elbow - 9: Left Wrist, 10: Right Wrist, 11: Left Hip, 12: Right Hip - 13: Left Knee, 14: Right Knee, 15: Left Ankle, 16: Right Ankle - obb (torch.tensor, optional): A 2D tensor of oriented bounding box coordinates for each detection. - speed (dict, optional): A dictionary containing preprocess, inference, and postprocess speeds (ms/image). - - Returns: - None - - Example: - ```python - results = model("path/to/image.jpg") - ``` + names (Dict): A dictionary of class names. + boxes (torch.Tensor | None): A 2D tensor of bounding box coordinates for each detection. + masks (torch.Tensor | None): A 3D tensor of detection masks, where each mask is a binary image. + probs (torch.Tensor | None): A 1D tensor of probabilities of each class for classification task. + keypoints (torch.Tensor | None): A 2D tensor of keypoint coordinates for each detection. + obb (torch.Tensor | None): A 2D tensor of oriented bounding box coordinates for each detection. + speed (Dict | None): A dictionary containing preprocess, inference, and postprocess speeds (ms/image). + + Examples: + >>> results = model("path/to/image.jpg") + >>> result = results[0] # Get the first result + >>> boxes = result.boxes # Get the boxes for the first result + >>> masks = result.masks # Get the masks for the first result + + Notes: + For the default pose model, keypoint indices for human body pose estimation are: + 0: Nose, 1: Left Eye, 2: Right Eye, 3: Left Ear, 4: Right Ear + 5: Left Shoulder, 6: Right Shoulder, 7: Left Elbow, 8: Right Elbow + 9: Left Wrist, 10: Right Wrist, 11: Left Hip, 12: Right Hip + 13: Left Knee, 14: Right Knee, 15: Left Ankle, 16: Right Ankle """ self.orig_img = orig_img self.orig_shape = orig_img.shape[:2] @@ -151,18 +270,59 @@ class Results(SimpleClass): self._keys = "boxes", "masks", "probs", "keypoints", "obb" def __getitem__(self, idx): - """Return a Results object for a specific index of inference results.""" + """ + Return a Results object for a specific index of inference results. + + Args: + idx (int | slice): Index or slice to retrieve from the Results object. + + Returns: + (Results): A new Results object containing the specified subset of inference results. + + Examples: + >>> results = model('path/to/image.jpg') # Perform inference + >>> single_result = results[0] # Get the first result + >>> subset_results = results[1:4] # Get a slice of results + """ return self._apply("__getitem__", idx) def __len__(self): - """Return the number of detections in the Results object from a non-empty attribute set (boxes, masks, etc.).""" + """ + Return the number of detections in the Results object. + + Returns: + (int): The number of detections, determined by the length of the first non-empty attribute + (boxes, masks, probs, keypoints, or obb). + + Examples: + >>> results = Results(orig_img, path, names, boxes=torch.rand(5, 4)) + >>> len(results) + 5 + """ for k in self._keys: v = getattr(self, k) if v is not None: return len(v) def update(self, boxes=None, masks=None, probs=None, obb=None): - """Updates detection results attributes including boxes, masks, probs, and obb with new data.""" + """ + Updates the Results object with new detection data. + + This method allows updating the boxes, masks, probabilities, and oriented bounding boxes (OBB) of the + Results object. It ensures that boxes are clipped to the original image shape. + + Args: + boxes (torch.Tensor | None): A tensor of shape (N, 6) containing bounding box coordinates and + confidence scores. The format is (x1, y1, x2, y2, conf, class). + masks (torch.Tensor | None): A tensor of shape (N, H, W) containing segmentation masks. + probs (torch.Tensor | None): A tensor of shape (num_classes,) containing class probabilities. + obb (torch.Tensor | None): A tensor of shape (N, 5) containing oriented bounding box coordinates. + + Examples: + >>> results = model('image.jpg') + >>> new_boxes = torch.tensor([[100, 100, 200, 200, 0.9, 0]]) + >>> results[0].update(boxes=new_boxes) + """ if boxes is not None: self.boxes = Boxes(ops.clip_boxes(boxes, self.orig_shape), self.orig_shape) if masks is not None: @@ -174,24 +334,23 @@ class Results(SimpleClass): def _apply(self, fn, *args, **kwargs): """ - Applies a function to all non-empty attributes and returns a new Results object with modified attributes. This - function is internally called by methods like .to(), .cuda(), .cpu(), etc. + Applies a function to all non-empty attributes and returns a new Results object with modified attributes. + + This method is internally called by methods like .to(), .cuda(), .cpu(), etc. Args: fn (str): The name of the function to apply. - *args: Variable length argument list to pass to the function. - **kwargs: Arbitrary keyword arguments to pass to the function. + *args (Any): Variable length argument list to pass to the function. + **kwargs (Any): Arbitrary keyword arguments to pass to the function. Returns: (Results): A new Results object with attributes modified by the applied function. - Example: - ```python - results = model("path/to/image.jpg") - for result in results: - result_cuda = result.cuda() - result_cpu = result.cpu() - ``` + Examples: + >>> results = model("path/to/image.jpg") + >>> for result in results: + ... result_cuda = result.cuda() + ... result_cpu = result.cpu() """ r = self.new() for k in self._keys: @@ -201,23 +360,86 @@ class Results(SimpleClass): return r def cpu(self): - """Returns a copy of the Results object with all its tensors moved to CPU memory.""" + """ + Returns a copy of the Results object with all its tensors moved to CPU memory. + + This method creates a new Results object with all tensor attributes (boxes, masks, probs, keypoints, obb) + transferred to CPU memory. It's useful for moving data from GPU to CPU for further processing or saving. + + Returns: + (Results): A new Results object with all tensor attributes on CPU memory. + + Examples: + >>> results = model('path/to/image.jpg') # Perform inference + >>> cpu_result = results[0].cpu() # Move the first result to CPU + >>> print(cpu_result.boxes.device) # Output: cpu + """ return self._apply("cpu") def numpy(self): - """Returns a copy of the Results object with all tensors as numpy arrays.""" + """ + Converts all tensors in the Results object to numpy arrays. + + Returns: + (Results): A new Results object with all tensors converted to numpy arrays. + + Examples: + >>> results = model('path/to/image.jpg') + >>> numpy_result = results[0].numpy() + >>> type(numpy_result.boxes.data) + + + Notes: + This method creates a new Results object, leaving the original unchanged. It's useful for + interoperability with numpy-based libraries or when CPU-based operations are required. + """ return self._apply("numpy") def cuda(self): - """Moves all tensors in the Results object to GPU memory.""" + """ + Moves all tensors in the Results object to GPU memory. + + Returns: + (Results): A new Results object with all tensors moved to CUDA device. + + Examples: + >>> results = model("path/to/image.jpg") + >>> cuda_results = results[0].cuda() # Move first result to GPU + >>> for result in results: + ... result_cuda = result.cuda() # Move each result to GPU + """ return self._apply("cuda") def to(self, *args, **kwargs): - """Moves all tensors in the Results object to the specified device and dtype.""" + """ + Moves all tensors in the Results object to the specified device and dtype. + + Args: + *args (Any): Variable length argument list to be passed to torch.Tensor.to(). + **kwargs (Any): Arbitrary keyword arguments to be passed to torch.Tensor.to(). + + Returns: + (Results): A new Results object with all tensors moved to the specified device and dtype. + + Examples: + >>> results = model("path/to/image.jpg") + >>> result_cuda = results[0].to("cuda") # Move first result to GPU + >>> result_cpu = results[0].to("cpu") # Move first result to CPU + >>> result_half = results[0].to(dtype=torch.float16) # Convert first result to half precision + """ return self._apply("to", *args, **kwargs) def new(self): - """Returns a new Results object with the same image, path, names, and speed attributes.""" + """ + Creates a new Results object with the same image, path, names, and speed attributes. + + Returns: + (Results): A new Results object with copied attributes from the original instance. + + Examples: + >>> results = model("path/to/image.jpg") + >>> new_result = results[0].new() + """ return Results(orig_img=self.orig_img, path=self.path, names=self.names, speed=self.speed) def plot( @@ -240,42 +462,34 @@ class Results(SimpleClass): filename=None, ): """ - Plots the detection results on an input RGB image. Accepts a numpy array (cv2) or a PIL Image. + Plots detection results on an input RGB image. Args: - conf (bool): Whether to plot the detection confidence score. - line_width (float, optional): The line width of the bounding boxes. If None, it is scaled to the image size. - font_size (float, optional): The font size of the text. If None, it is scaled to the image size. - font (str): The font to use for the text. + conf (bool): Whether to plot detection confidence scores. + line_width (float | None): Line width of bounding boxes. If None, scaled to image size. + font_size (float | None): Font size for text. If None, scaled to image size. + font (str): Font to use for text. pil (bool): Whether to return the image as a PIL Image. - img (numpy.ndarray): Plot to another image. if not, plot to original image. - im_gpu (torch.Tensor): Normalized image in gpu with shape (1, 3, 640, 640), for faster mask plotting. - kpt_radius (int, optional): Radius of the drawn keypoints. Default is 5. + img (np.ndarray | None): Image to plot on. If None, uses original image. + im_gpu (torch.Tensor | None): Normalized image on GPU for faster mask plotting. + kpt_radius (int): Radius of drawn keypoints. kpt_line (bool): Whether to draw lines connecting keypoints. - labels (bool): Whether to plot the label of bounding boxes. - boxes (bool): Whether to plot the bounding boxes. - masks (bool): Whether to plot the masks. - probs (bool): Whether to plot classification probability. - show (bool): Whether to display the annotated image directly. - save (bool): Whether to save the annotated image to `filename`. - filename (str): Filename to save image to if save is True. - - Returns: - (numpy.ndarray): A numpy array of the annotated image. - - Example: - ```python - from PIL import Image - from ultralytics import YOLO - - model = YOLO('yolov8n.pt') - results = model('bus.jpg') # results list - for r in results: - im_array = r.plot() # plot a BGR numpy array of predictions - im = Image.fromarray(im_array[..., ::-1]) # RGB PIL image - im.show() # show image - im.save('results.jpg') # save image - ``` + labels (bool): Whether to plot labels of bounding boxes. + boxes (bool): Whether to plot bounding boxes. + masks (bool): Whether to plot masks. + probs (bool): Whether to plot classification probabilities. + show (bool): Whether to display the annotated image. + save (bool): Whether to save the annotated image. + filename (str | None): Filename to save image if save is True. + + Returns: + (np.ndarray): Annotated image as a numpy array. + + Examples: + >>> results = model('image.jpg') + >>> for result in results: + ... im = result.plot() + ... im.show() """ if img is None and isinstance(self.orig_img, torch.Tensor): img = (self.orig_img[0].detach().permute(1, 2, 0).contiguous() * 255).to(torch.uint8).cpu().numpy() @@ -339,18 +553,73 @@ class Results(SimpleClass): return annotator.result() def show(self, *args, **kwargs): - """Show the image with annotated inference results.""" + """ + Display the image with annotated inference results. + + This method plots the detection results on the original image and displays it. It's a convenient way to + visualize the model's predictions directly. + + Args: + *args (Any): Variable length argument list to be passed to the `plot()` method. + **kwargs (Any): Arbitrary keyword arguments to be passed to the `plot()` method. + + Examples: + >>> results = model('path/to/image.jpg') + >>> results[0].show() # Display the first result + >>> for result in results: + ... result.show() # Display all results + """ self.plot(show=True, *args, **kwargs) def save(self, filename=None, *args, **kwargs): - """Save annotated inference results image to file.""" + """ + Saves annotated inference results image to file. + + This method plots the detection results on the original image and saves the annotated image to a file. It + utilizes the `plot` method to generate the annotated image and then saves it to the specified filename. + + Args: + filename (str | Path | None): The filename to save the annotated image. If None, a default filename + is generated based on the original image path. + *args (Any): Variable length argument list to be passed to the `plot` method. + **kwargs (Any): Arbitrary keyword arguments to be passed to the `plot` method. + + Examples: + >>> results = model('path/to/image.jpg') + >>> for result in results: + ... result.save('annotated_image.jpg') + >>> # Or with custom plot arguments + >>> for result in results: + ... result.save('annotated_image.jpg', conf=False, line_width=2) + """ if not filename: filename = f"results_{Path(self.path).name}" self.plot(save=True, filename=filename, *args, **kwargs) return filename def verbose(self): - """Returns a log string for each task in the results, detailing detection and classification outcomes.""" + """ + Returns a log string for each task in the results, detailing detection and classification outcomes. + + This method generates a human-readable string summarizing the detection and classification results. It includes + the number of detections for each class and the top probabilities for classification tasks. + + Returns: + (str): A formatted string containing a summary of the results. For detection tasks, it includes the + number of detections per class. For classification tasks, it includes the top 5 class probabilities. + + Examples: + >>> results = model('path/to/image.jpg') + >>> for result in results: + ... print(result.verbose()) + 2 persons, 1 car, 3 traffic lights, + dog 0.92, cat 0.78, horse 0.64, + + Notes: + - If there are no detections, the method returns "(no detections), " for detection tasks. + - For classification tasks, it returns the top 5 class probabilities and their corresponding class names. + - The returned string is comma-separated and ends with a comma and a space. + """ log_string = "" probs = self.probs boxes = self.boxes @@ -369,31 +638,26 @@ class Results(SimpleClass): Save detection results to a text file. Args: - txt_file (str): Path to the output text file. + txt_file (str | Path): Path to the output text file. save_conf (bool): Whether to include confidence scores in the output. Returns: (str): Path to the saved text file. - Example: - ```python - from ultralytics import YOLO - - model = YOLO('yolov8n.pt') - results = model("path/to/image.jpg") - for result in results: - result.save_txt("output.txt") - ``` + Examples: + >>> from ultralytics import YOLO + >>> model = YOLO('yolov8n.pt') + >>> results = model("path/to/image.jpg") + >>> for result in results: + ... result.save_txt("output.txt") Notes: - The file will contain one line per detection or classification with the following structure: - - For detections: `class confidence x_center y_center width height` - - For classifications: `confidence class_name` - - For masks and keypoints, the specific formats will vary accordingly. - + - For detections: `class confidence x_center y_center width height` + - For classifications: `confidence class_name` + - For masks and keypoints, the specific formats will vary accordingly. - The function will create the output directory if it does not exist. - If save_conf is False, the confidence scores will be excluded from the output. - - Existing contents of the file will not be overwritten; new results will be appended. """ is_obb = self.obb is not None @@ -426,27 +690,25 @@ class Results(SimpleClass): def save_crop(self, save_dir, file_name=Path("im.jpg")): """ - Save cropped detection images to `save_dir/cls/file_name.jpg`. + Saves cropped detection images to specified directory. + + This method saves cropped images of detected objects to a specified directory. Each crop is saved in a + subdirectory named after the object's class, with the filename based on the input file_name. Args: - save_dir (str | pathlib.Path): Directory path where the cropped images should be saved. - file_name (str | pathlib.Path): Filename for the saved cropped image. + save_dir (str | Path): Directory path where cropped images will be saved. + file_name (str | Path): Base filename for the saved cropped images. Default is Path("im.jpg"). Notes: - This function does not support Classify or Oriented Bounding Box (OBB) tasks. It will warn and exit if - called for such tasks. - - Example: - ```python - from ultralytics import YOLO - - model = YOLO("yolov8n.pt") - results = model("path/to/image.jpg") - - # Save cropped images to the specified directory - for result in results: - result.save_crop(save_dir="path/to/save/crops", file_name="crop") - ``` + - This method does not support Classify or Oriented Bounding Box (OBB) tasks. + - Crops are saved as 'save_dir/class_name/file_name.jpg'. + - The method will create necessary subdirectories if they don't exist. + - Original image is copied before cropping to avoid modifying the original. + + Examples: + >>> results = model("path/to/image.jpg") + >>> for result in results: + ... result.save_crop(save_dir="path/to/crops", file_name="detection") """ if self.probs is not None: LOGGER.warning("WARNING ⚠️ Classify task do not support `save_crop`.") @@ -463,7 +725,28 @@ class Results(SimpleClass): ) def summary(self, normalize=False, decimals=5): - """Convert inference results to a summarized dictionary with optional normalization for box coordinates.""" + """ + Converts inference results to a summarized dictionary with optional normalization for box coordinates. + + This method creates a list of detection dictionaries, each containing information about a single + detection or classification result. For classification tasks, it returns the top class and its + confidence. For detection tasks, it includes class information, bounding box coordinates, and + optionally mask segments and keypoints. + + Args: + normalize (bool): Whether to normalize bounding box coordinates by image dimensions. Defaults to False. + decimals (int): Number of decimal places to round the output values to. Defaults to 5. + + Returns: + (List[Dict]): A list of dictionaries, each containing summarized information for a single + detection or classification result. The structure of each dictionary varies based on the + task type (classification or detection) and available information (boxes, masks, keypoints). + + Examples: + >>> results = model('image.jpg') + >>> summary = results[0].summary() + >>> print(summary) + """ # Create list of detection dictionaries results = [] if self.probs is not None: @@ -507,7 +790,34 @@ class Results(SimpleClass): return results def tojson(self, normalize=False, decimals=5): - """Converts detection results to JSON format.""" + """ + Converts detection results to JSON format. + + This method serializes the detection results into a JSON-compatible format. It includes information + about detected objects such as bounding boxes, class names, confidence scores, and optionally + segmentation masks and keypoints. + + Args: + normalize (bool): Whether to normalize the bounding box coordinates by the image dimensions. + If True, coordinates will be returned as float values between 0 and 1. Defaults to False. + decimals (int): Number of decimal places to round the output values to. Defaults to 5. + + Returns: + (str): A JSON string containing the serialized detection results. + + Examples: + >>> results = model("path/to/image.jpg") + >>> json_result = results[0].tojson() + >>> print(json_result) + + Notes: + - For classification tasks, the JSON will contain class probabilities instead of bounding boxes. + - For object detection tasks, the JSON will include bounding box coordinates, class names, and + confidence scores. + - If available, segmentation masks and keypoints will also be included in the JSON output. + - The method uses the `summary` method internally to generate the data structure before + converting it to JSON. + """ import json return json.dumps(self.summary(normalize=normalize, decimals=decimals), indent=2) @@ -515,43 +825,67 @@ class Results(SimpleClass): class Boxes(BaseTensor): """ - Manages detection boxes, providing easy access and manipulation of box coordinates, confidence scores, class - identifiers, and optional tracking IDs. Supports multiple formats for box coordinates, including both absolute and - normalized forms. + A class for managing and manipulating detection boxes. - Attributes: - data (torch.Tensor): The raw tensor containing detection boxes and their associated data. - orig_shape (tuple): The original image size as a tuple (height, width), used for normalization. - is_track (bool): Indicates whether tracking IDs are included in the box data. + This class provides functionality for handling detection boxes, including their coordinates, confidence scores, + class labels, and optional tracking IDs. It supports various box formats and offers methods for easy manipulation + and conversion between different coordinate systems. Attributes: + data (torch.Tensor | numpy.ndarray): The raw tensor containing detection boxes and associated data. + orig_shape (Tuple[int, int]): The original image dimensions (height, width). + is_track (bool): Indicates whether tracking IDs are included in the box data. xyxy (torch.Tensor | numpy.ndarray): Boxes in [x1, y1, x2, y2] format. conf (torch.Tensor | numpy.ndarray): Confidence scores for each box. cls (torch.Tensor | numpy.ndarray): Class labels for each box. - id (torch.Tensor | numpy.ndarray, optional): Tracking IDs for each box, if available. - xywh (torch.Tensor | numpy.ndarray): Boxes in [x, y, width, height] format, calculated on demand. - xyxyn (torch.Tensor | numpy.ndarray): Normalized [x1, y1, x2, y2] boxes, relative to `orig_shape`. - xywhn (torch.Tensor | numpy.ndarray): Normalized [x, y, width, height] boxes, relative to `orig_shape`. + id (torch.Tensor | numpy.ndarray): Tracking IDs for each box (if available). + xywh (torch.Tensor | numpy.ndarray): Boxes in [x, y, width, height] format. + xyxyn (torch.Tensor | numpy.ndarray): Normalized [x1, y1, x2, y2] boxes relative to orig_shape. + xywhn (torch.Tensor | numpy.ndarray): Normalized [x, y, width, height] boxes relative to orig_shape. Methods: - cpu(): Moves the boxes to CPU memory. - numpy(): Converts the boxes to a numpy array format. - cuda(): Moves the boxes to CUDA (GPU) memory. - to(device, dtype=None): Moves the boxes to the specified device. + cpu(): Returns a copy of the object with all tensors on CPU memory. + numpy(): Returns a copy of the object with all tensors as numpy arrays. + cuda(): Returns a copy of the object with all tensors on GPU memory. + to(*args, **kwargs): Returns a copy of the object with tensors on specified device and dtype. + + Examples: + >>> import torch + >>> boxes_data = torch.tensor([[100, 50, 150, 100, 0.9, 0], [200, 150, 300, 250, 0.8, 1]]) + >>> orig_shape = (480, 640) # height, width + >>> boxes = Boxes(boxes_data, orig_shape) + >>> print(boxes.xyxy) + >>> print(boxes.conf) + >>> print(boxes.cls) + >>> print(boxes.xywhn) """ def __init__(self, boxes, orig_shape) -> None: """ Initialize the Boxes class with detection box data and the original image shape. - Args: - boxes (torch.Tensor | np.ndarray): A tensor or numpy array with detection boxes of shape (num_boxes, 6) - or (num_boxes, 7). Columns should contain [x1, y1, x2, y2, confidence, class, (optional) track_id]. - The track ID column is included if present. - orig_shape (tuple): The original image shape as (height, width). Used for normalization. + This class manages detection boxes, providing easy access and manipulation of box coordinates, + confidence scores, class identifiers, and optional tracking IDs. It supports multiple formats + for box coordinates, including both absolute and normalized forms. - Returns: - (None) + Args: + boxes (torch.Tensor | np.ndarray): A tensor or numpy array with detection boxes of shape + (num_boxes, 6) or (num_boxes, 7). Columns should contain + [x1, y1, x2, y2, confidence, class, (optional) track_id]. + orig_shape (Tuple[int, int]): The original image shape as (height, width). Used for normalization. + + Attributes: + data (torch.Tensor): The raw tensor containing detection boxes and their associated data. + orig_shape (Tuple[int, int]): The original image size, used for normalization. + is_track (bool): Indicates whether tracking IDs are included in the box data. + + Examples: + >>> import torch + >>> boxes = torch.tensor([[100, 50, 150, 100, 0.9, 0]]) + >>> orig_shape = (480, 640) + >>> detection_boxes = Boxes(boxes, orig_shape) + >>> print(detection_boxes.xyxy) + tensor([[100., 50., 150., 100.]]) """ if boxes.ndim == 1: boxes = boxes[None, :] @@ -563,34 +897,119 @@ class Boxes(BaseTensor): @property def xyxy(self): - """Returns bounding boxes in [x1, y1, x2, y2] format.""" + """ + Returns bounding boxes in [x1, y1, x2, y2] format. + + Returns: + (torch.Tensor | numpy.ndarray): A tensor or numpy array of shape (n, 4) containing bounding box + coordinates in [x1, y1, x2, y2] format, where n is the number of boxes. + + Examples: + >>> results = model('image.jpg') + >>> boxes = results[0].boxes + >>> xyxy = boxes.xyxy + >>> print(xyxy) + """ return self.data[:, :4] @property def conf(self): - """Returns the confidence scores for each detection box.""" + """ + Returns the confidence scores for each detection box. + + Returns: + (torch.Tensor | numpy.ndarray): A 1D tensor or array containing confidence scores for each detection, + with shape (N,) where N is the number of detections. + + Examples: + >>> boxes = Boxes(torch.tensor([[10, 20, 30, 40, 0.9, 0]]), orig_shape=(100, 100)) + >>> conf_scores = boxes.conf + >>> print(conf_scores) + tensor([0.9000]) + """ return self.data[:, -2] @property def cls(self): - """Class ID tensor representing category predictions for each bounding box.""" + """ + Returns the class ID tensor representing category predictions for each bounding box. + + Returns: + (torch.Tensor | numpy.ndarray): A tensor or numpy array containing the class IDs for each detection box. + The shape is (N,), where N is the number of boxes. + + Examples: + >>> results = model('image.jpg') + >>> boxes = results[0].boxes + >>> class_ids = boxes.cls + >>> print(class_ids) # tensor([0., 2., 1.]) + """ return self.data[:, -1] @property def id(self): - """Return the tracking IDs for each box if available.""" + """ + Returns the tracking IDs for each detection box if available. + + Returns: + (torch.Tensor | None): A tensor containing tracking IDs for each box if tracking is enabled, + otherwise None. Shape is (N,) where N is the number of boxes. + + Examples: + >>> results = model.track('path/to/video.mp4') + >>> for result in results: + ... boxes = result.boxes + ... if boxes.is_track: + ... track_ids = boxes.id + ... print(f"Tracking IDs: {track_ids}") + ... else: + ... print("Tracking is not enabled for these boxes.") + + Notes: + - This property is only available when tracking is enabled (i.e., when `is_track` is True). + - The tracking IDs are typically used to associate detections across multiple frames in video analysis. + """ return self.data[:, -3] if self.is_track else None @property @lru_cache(maxsize=2) # maxsize 1 should suffice def xywh(self): - """Returns boxes in [x, y, width, height] format.""" + """ + Convert bounding boxes from [x1, y1, x2, y2] format to [x, y, width, height] format. + + Returns: + (torch.Tensor | numpy.ndarray): Boxes in [x, y, width, height] format, where x, y are the coordinates of + the top-left corner of the bounding box, width, height are the dimensions of the bounding box and the + shape of the returned tensor is (N, 4), where N is the number of boxes. + + Examples: + >>> boxes = Boxes(torch.tensor([[100, 50, 150, 100], [200, 150, 300, 250]]), orig_shape=(480, 640)) + >>> xywh = boxes.xywh + >>> print(xywh) + tensor([[100.0000, 50.0000, 50.0000, 50.0000], + [200.0000, 150.0000, 100.0000, 100.0000]]) + """ return ops.xyxy2xywh(self.xyxy) @property @lru_cache(maxsize=2) def xyxyn(self): - """Normalize box coordinates to [x1, y1, x2, y2] relative to the original image size.""" + """ + Returns normalized bounding box coordinates relative to the original image size. + + This property calculates and returns the bounding box coordinates in [x1, y1, x2, y2] format, + normalized to the range [0, 1] based on the original image dimensions. + + Returns: + (torch.Tensor | numpy.ndarray): Normalized bounding box coordinates with shape (N, 4), where N is + the number of boxes. Each row contains [x1, y1, x2, y2] values normalized to [0, 1]. + + Examples: + >>> boxes = Boxes(torch.tensor([[100, 50, 300, 400, 0.9, 0]]), orig_shape=(480, 640)) + >>> normalized = boxes.xyxyn + >>> print(normalized) + tensor([[0.1562, 0.1042, 0.4688, 0.8333]]) + """ xyxy = self.xyxy.clone() if isinstance(self.xyxy, torch.Tensor) else np.copy(self.xyxy) xyxy[..., [0, 2]] /= self.orig_shape[1] xyxy[..., [1, 3]] /= self.orig_shape[0] @@ -599,7 +1018,23 @@ class Boxes(BaseTensor): @property @lru_cache(maxsize=2) def xywhn(self): - """Returns normalized bounding boxes in [x, y, width, height] format.""" + """ + Returns normalized bounding boxes in [x, y, width, height] format. + + This property calculates and returns the normalized bounding box coordinates in the format + [x_center, y_center, width, height], where all values are relative to the original image dimensions. + + Returns: + (torch.Tensor | numpy.ndarray): Normalized bounding boxes with shape (N, 4), where N is the + number of boxes. Each row contains [x_center, y_center, width, height] values normalized + to [0, 1] based on the original image dimensions. + + Examples: + >>> boxes = Boxes(torch.tensor([[100, 50, 150, 100, 0.9, 0]]), orig_shape=(480, 640)) + >>> normalized = boxes.xywhn + >>> print(normalized) + tensor([[0.1953, 0.1562, 0.0781, 0.1042]]) + """ xywh = ops.xyxy2xywh(self.xyxy) xywh[..., [0, 2]] /= self.orig_shape[1] xywh[..., [1, 3]] /= self.orig_shape[0] @@ -610,19 +1045,44 @@ class Masks(BaseTensor): """ A class for storing and manipulating detection masks. + This class extends BaseTensor and provides functionality for handling segmentation masks, + including methods for converting between pixel and normalized coordinates. + Attributes: - xy (list): A list of segments in pixel coordinates. - xyn (list): A list of normalized segments. + data (torch.Tensor | numpy.ndarray): The raw tensor or array containing mask data. + orig_shape (tuple): Original image shape in (height, width) format. + xy (List[numpy.ndarray]): A list of segments in pixel coordinates. + xyn (List[numpy.ndarray]): A list of normalized segments. Methods: - cpu(): Returns the masks tensor on CPU memory. - numpy(): Returns the masks tensor as a numpy array. - cuda(): Returns the masks tensor on GPU memory. - to(device, dtype): Returns the masks tensor with the specified device and dtype. + cpu(): Returns a copy of the Masks object with the mask tensor on CPU memory. + numpy(): Returns a copy of the Masks object with the mask tensor as a numpy array. + cuda(): Returns a copy of the Masks object with the mask tensor on GPU memory. + to(*args, **kwargs): Returns a copy of the Masks object with the mask tensor on specified device and dtype. + + Examples: + >>> masks_data = torch.rand(1, 160, 160) + >>> orig_shape = (720, 1280) + >>> masks = Masks(masks_data, orig_shape) + >>> pixel_coords = masks.xy + >>> normalized_coords = masks.xyn """ def __init__(self, masks, orig_shape) -> None: - """Initializes the Masks class with a masks tensor and original image shape.""" + """ + Initialize the Masks class with detection mask data and the original image shape. + + Args: + masks (torch.Tensor | np.ndarray): Detection masks with shape (num_masks, height, width). + orig_shape (tuple): The original image shape as (height, width). Used for normalization. + + Examples: + >>> import torch + >>> from ultralytics.engine.results import Masks + >>> masks = torch.rand(10, 160, 160) # 10 masks of 160x160 resolution + >>> orig_shape = (720, 1280) # Original image shape + >>> mask_obj = Masks(masks, orig_shape) + """ if masks.ndim == 2: masks = masks[None, :] super().__init__(masks, orig_shape) @@ -630,7 +1090,23 @@ class Masks(BaseTensor): @property @lru_cache(maxsize=1) def xyn(self): - """Return normalized xy-coordinates of the segmentation masks.""" + """ + Returns normalized xy-coordinates of the segmentation masks. + + This property calculates and caches the normalized xy-coordinates of the segmentation masks. The coordinates + are normalized relative to the original image shape. + + Returns: + (List[numpy.ndarray]): A list of numpy arrays, where each array contains the normalized xy-coordinates + of a single segmentation mask. Each array has shape (N, 2), where N is the number of points in the + mask contour. + + Examples: + >>> results = model('image.jpg') + >>> masks = results[0].masks + >>> normalized_coords = masks.xyn + >>> print(normalized_coords[0]) # Normalized coordinates of the first mask + """ return [ ops.scale_coords(self.data.shape[1:], x, self.orig_shape, normalize=True) for x in ops.masks2segments(self.data) @@ -639,7 +1115,24 @@ class Masks(BaseTensor): @property @lru_cache(maxsize=1) def xy(self): - """Returns the [x, y] normalized mask coordinates for each segment in the mask tensor.""" + """ + Returns the [x, y] pixel coordinates for each segment in the mask tensor. + + This property calculates and returns a list of pixel coordinates for each segmentation mask in the + Masks object. The coordinates are scaled to match the original image dimensions. + + Returns: + (List[numpy.ndarray]): A list of numpy arrays, where each array contains the [x, y] pixel + coordinates for a single segmentation mask. Each array has shape (N, 2), where N is the + number of points in the segment. + + Examples: + >>> results = model('image.jpg') + >>> masks = results[0].masks + >>> xy_coords = masks.xy + >>> print(len(xy_coords)) # Number of masks + >>> print(xy_coords[0].shape) # Shape of first mask's coordinates + """ return [ ops.scale_coords(self.data.shape[1:], x, self.orig_shape, normalize=False) for x in ops.masks2segments(self.data) @@ -650,21 +1143,53 @@ class Keypoints(BaseTensor): """ A class for storing and manipulating detection keypoints. - Attributes - xy (torch.Tensor): A collection of keypoints containing x, y coordinates for each detection. - xyn (torch.Tensor): A normalized version of xy with coordinates in the range [0, 1]. - conf (torch.Tensor): Confidence values associated with keypoints if available, otherwise None. + This class encapsulates functionality for handling keypoint data, including coordinate manipulation, + normalization, and confidence values. + + Attributes: + data (torch.Tensor): The raw tensor containing keypoint data. + orig_shape (Tuple[int, int]): The original image dimensions (height, width). + has_visible (bool): Indicates whether visibility information is available for keypoints. + xy (torch.Tensor): Keypoint coordinates in [x, y] format. + xyn (torch.Tensor): Normalized keypoint coordinates in [x, y] format, relative to orig_shape. + conf (torch.Tensor): Confidence values for each keypoint, if available. Methods: cpu(): Returns a copy of the keypoints tensor on CPU memory. numpy(): Returns a copy of the keypoints tensor as a numpy array. cuda(): Returns a copy of the keypoints tensor on GPU memory. - to(device, dtype): Returns a copy of the keypoints tensor with the specified device and dtype. + to(*args, **kwargs): Returns a copy of the keypoints tensor with specified device and dtype. + + Examples: + >>> import torch + >>> from ultralytics.engine.results import Keypoints + >>> keypoints_data = torch.rand(1, 17, 3) # 1 detection, 17 keypoints, (x, y, conf) + >>> orig_shape = (480, 640) # Original image shape (height, width) + >>> keypoints = Keypoints(keypoints_data, orig_shape) + >>> print(keypoints.xy.shape) # Access xy coordinates + >>> print(keypoints.conf) # Access confidence values + >>> keypoints_cpu = keypoints.cpu() # Move keypoints to CPU """ @smart_inference_mode() # avoid keypoints < conf in-place error def __init__(self, keypoints, orig_shape) -> None: - """Initializes the Keypoints object with detection keypoints and original image dimensions.""" + """ + Initializes the Keypoints object with detection keypoints and original image dimensions. + + This method processes the input keypoints tensor, handling both 2D and 3D formats. For 3D tensors + (x, y, confidence), it masks out low-confidence keypoints by setting their coordinates to zero. + + Args: + keypoints (torch.Tensor): A tensor containing keypoint data. Shape can be either: + - (num_objects, num_keypoints, 2) for x, y coordinates only + - (num_objects, num_keypoints, 3) for x, y coordinates and confidence scores + orig_shape (Tuple[int, int]): The original image dimensions (height, width). + + Examples: + >>> kpts = torch.rand(1, 17, 3) # 1 object, 17 keypoints (COCO format), x,y,conf + >>> orig_shape = (720, 1280) # Original image height, width + >>> keypoints = Keypoints(kpts, orig_shape) + """ if keypoints.ndim == 2: keypoints = keypoints[None, :] if keypoints.shape[2] == 3: # x, y, conf @@ -676,13 +1201,44 @@ class Keypoints(BaseTensor): @property @lru_cache(maxsize=1) def xy(self): - """Returns x, y coordinates of keypoints.""" + """ + Returns x, y coordinates of keypoints. + + Returns: + (torch.Tensor): A tensor containing the x, y coordinates of keypoints with shape (N, K, 2), where N is + the number of detections and K is the number of keypoints per detection. + + Examples: + >>> results = model('image.jpg') + >>> keypoints = results[0].keypoints + >>> xy = keypoints.xy + >>> print(xy.shape) # (N, K, 2) + >>> print(xy[0]) # x, y coordinates of keypoints for first detection + + Notes: + - The returned coordinates are in pixel units relative to the original image dimensions. + - If keypoints were initialized with confidence values, only keypoints with confidence >= 0.5 are returned. + - This property uses LRU caching to improve performance on repeated access. + """ return self.data[..., :2] @property @lru_cache(maxsize=1) def xyn(self): - """Returns normalized coordinates (x, y) of keypoints relative to the original image size.""" + """ + Returns normalized coordinates (x, y) of keypoints relative to the original image size. + + Returns: + (torch.Tensor | numpy.ndarray): A tensor or array of shape (N, K, 2) containing normalized keypoint + coordinates, where N is the number of instances, K is the number of keypoints, and the last + dimension contains [x, y] values in the range [0, 1]. + + Examples: + >>> keypoints = Keypoints(torch.rand(1, 17, 2), orig_shape=(480, 640)) + >>> normalized_kpts = keypoints.xyn + >>> print(normalized_kpts.shape) + torch.Size([1, 17, 2]) + """ xy = self.xy.clone() if isinstance(self.xy, torch.Tensor) else np.copy(self.xy) xy[..., 0] /= self.orig_shape[1] xy[..., 1] /= self.orig_shape[0] @@ -691,53 +1247,160 @@ class Keypoints(BaseTensor): @property @lru_cache(maxsize=1) def conf(self): - """Returns confidence values for each keypoint.""" + """ + Returns confidence values for each keypoint. + + Returns: + (torch.Tensor | None): A tensor containing confidence scores for each keypoint if available, + otherwise None. Shape is (num_detections, num_keypoints) for batched data or (num_keypoints,) + for single detection. + + Examples: + >>> keypoints = Keypoints(torch.rand(1, 17, 3), orig_shape=(640, 640)) # 1 detection, 17 keypoints + >>> conf = keypoints.conf + >>> print(conf.shape) # torch.Size([1, 17]) + """ return self.data[..., 2] if self.has_visible else None class Probs(BaseTensor): """ - A class for storing and manipulating classification predictions. + A class for storing and manipulating classification probabilities. - Attributes - top1 (int): Index of the top 1 class. - top5 (list[int]): Indices of the top 5 classes. - top1conf (torch.Tensor): Confidence of the top 1 class. - top5conf (torch.Tensor): Confidences of the top 5 classes. + This class extends BaseTensor and provides methods for accessing and manipulating + classification probabilities, including top-1 and top-5 predictions. + + Attributes: + data (torch.Tensor | numpy.ndarray): The raw tensor or array containing classification probabilities. + orig_shape (tuple | None): The original image shape as (height, width). Not used in this class. + top1 (int): Index of the class with the highest probability. + top5 (List[int]): Indices of the top 5 classes by probability. + top1conf (torch.Tensor | numpy.ndarray): Confidence score of the top 1 class. + top5conf (torch.Tensor | numpy.ndarray): Confidence scores of the top 5 classes. Methods: - cpu(): Returns a copy of the probs tensor on CPU memory. - numpy(): Returns a copy of the probs tensor as a numpy array. - cuda(): Returns a copy of the probs tensor on GPU memory. - to(): Returns a copy of the probs tensor with the specified device and dtype. + cpu(): Returns a copy of the probabilities tensor on CPU memory. + numpy(): Returns a copy of the probabilities tensor as a numpy array. + cuda(): Returns a copy of the probabilities tensor on GPU memory. + to(*args, **kwargs): Returns a copy of the probabilities tensor with specified device and dtype. + + Examples: + >>> probs = torch.tensor([0.1, 0.3, 0.6]) + >>> p = Probs(probs) + >>> print(p.top1) + 2 + >>> print(p.top5) + [2, 1, 0] + >>> print(p.top1conf) + tensor(0.6000) + >>> print(p.top5conf) + tensor([0.6000, 0.3000, 0.1000]) """ def __init__(self, probs, orig_shape=None) -> None: - """Initialize Probs with classification probabilities and optional original image shape.""" + """ + Initialize the Probs class with classification probabilities. + + This class stores and manages classification probabilities, providing easy access to top predictions and their + confidences. + + Args: + probs (torch.Tensor | np.ndarray): A 1D tensor or array of classification probabilities. + orig_shape (tuple | None): The original image shape as (height, width). Not used in this class but kept for + consistency with other result classes. + + Attributes: + data (torch.Tensor | np.ndarray): The raw tensor or array containing classification probabilities. + top1 (int): Index of the top 1 class. + top5 (List[int]): Indices of the top 5 classes. + top1conf (torch.Tensor | np.ndarray): Confidence of the top 1 class. + top5conf (torch.Tensor | np.ndarray): Confidences of the top 5 classes. + + Examples: + >>> import torch + >>> probs = torch.tensor([0.1, 0.3, 0.2, 0.4]) + >>> p = Probs(probs) + >>> print(p.top1) + 3 + >>> print(p.top1conf) + tensor(0.4000) + >>> print(p.top5) + [3, 1, 2, 0] + """ super().__init__(probs, orig_shape) @property @lru_cache(maxsize=1) def top1(self): - """Return the index of the class with the highest probability.""" + """ + Returns the index of the class with the highest probability. + + Returns: + (int): Index of the class with the highest probability. + + Examples: + >>> probs = Probs(torch.tensor([0.1, 0.3, 0.6])) + >>> probs.top1 + 2 + """ return int(self.data.argmax()) @property @lru_cache(maxsize=1) def top5(self): - """Return the indices of the top 5 class probabilities.""" + """ + Returns the indices of the top 5 class probabilities. + + Returns: + (List[int]): A list containing the indices of the top 5 class probabilities, sorted in descending order. + + Examples: + >>> probs = Probs(torch.tensor([0.1, 0.2, 0.3, 0.4, 0.5])) + >>> print(probs.top5) + [4, 3, 2, 1, 0] + """ return (-self.data).argsort(0)[:5].tolist() # this way works with both torch and numpy. @property @lru_cache(maxsize=1) def top1conf(self): - """Retrieves the confidence score of the highest probability class.""" + """ + Returns the confidence score of the highest probability class. + + This property retrieves the confidence score (probability) of the class with the highest predicted probability + from the classification results. + + Returns: + (torch.Tensor | numpy.ndarray): A tensor containing the confidence score of the top 1 class. + + Examples: + >>> results = model('image.jpg') # classify an image + >>> probs = results[0].probs # get classification probabilities + >>> top1_confidence = probs.top1conf # get confidence of top 1 class + >>> print(f"Top 1 class confidence: {top1_confidence.item():.4f}") + """ return self.data[self.top1] @property @lru_cache(maxsize=1) def top5conf(self): - """Returns confidence scores for the top 5 classification predictions.""" + """ + Returns confidence scores for the top 5 classification predictions. + + This property retrieves the confidence scores corresponding to the top 5 class probabilities + predicted by the model. It provides a quick way to access the most likely class predictions + along with their associated confidence levels. + + Returns: + (torch.Tensor | numpy.ndarray): A tensor or array containing the confidence scores for the + top 5 predicted classes, sorted in descending order of probability. + + Examples: + >>> results = model('image.jpg') + >>> probs = results[0].probs + >>> top5_conf = probs.top5conf + >>> print(top5_conf) # Prints confidence scores for top 5 classes + """ return self.data[self.top5] @@ -745,31 +1408,63 @@ class OBB(BaseTensor): """ A class for storing and manipulating Oriented Bounding Boxes (OBB). - Args: - boxes (torch.Tensor | numpy.ndarray): A tensor or numpy array containing the detection boxes, - with shape (num_boxes, 7) or (num_boxes, 8). The last two columns contain confidence and class values. - If present, the third last column contains track IDs, and the fifth column from the left contains rotation. - orig_shape (tuple): Original image size, in the format (height, width). - - Attributes - xywhr (torch.Tensor | numpy.ndarray): The boxes in [x_center, y_center, width, height, rotation] format. - conf (torch.Tensor | numpy.ndarray): The confidence values of the boxes. - cls (torch.Tensor | numpy.ndarray): The class values of the boxes. - id (torch.Tensor | numpy.ndarray): The track IDs of the boxes (if available). - xyxyxyxyn (torch.Tensor | numpy.ndarray): The rotated boxes in xyxyxyxy format normalized by orig image size. - xyxyxyxy (torch.Tensor | numpy.ndarray): The rotated boxes in xyxyxyxy format. - xyxy (torch.Tensor | numpy.ndarray): The horizontal boxes in xyxyxyxy format. - data (torch.Tensor): The raw OBB tensor (alias for `boxes`). + This class provides functionality to handle oriented bounding boxes, including conversion between + different formats, normalization, and access to various properties of the boxes. + + Attributes: + data (torch.Tensor): The raw OBB tensor containing box coordinates and associated data. + orig_shape (tuple): Original image size as (height, width). + is_track (bool): Indicates whether tracking IDs are included in the box data. + xywhr (torch.Tensor | numpy.ndarray): Boxes in [x_center, y_center, width, height, rotation] format. + conf (torch.Tensor | numpy.ndarray): Confidence scores for each box. + cls (torch.Tensor | numpy.ndarray): Class labels for each box. + id (torch.Tensor | numpy.ndarray): Tracking IDs for each box, if available. + xyxyxyxy (torch.Tensor | numpy.ndarray): Boxes in 8-point [x1, y1, x2, y2, x3, y3, x4, y4] format. + xyxyxyxyn (torch.Tensor | numpy.ndarray): Normalized 8-point coordinates relative to orig_shape. + xyxy (torch.Tensor | numpy.ndarray): Axis-aligned bounding boxes in [x1, y1, x2, y2] format. Methods: - cpu(): Move the object to CPU memory. - numpy(): Convert the object to a numpy array. - cuda(): Move the object to CUDA memory. - to(*args, **kwargs): Move the object to the specified device. + cpu(): Returns a copy of the OBB object with all tensors on CPU memory. + numpy(): Returns a copy of the OBB object with all tensors as numpy arrays. + cuda(): Returns a copy of the OBB object with all tensors on GPU memory. + to(*args, **kwargs): Returns a copy of the OBB object with tensors on specified device and dtype. + + Examples: + >>> boxes = torch.tensor([[100, 50, 150, 100, 30, 0.9, 0]]) # xywhr, conf, cls + >>> obb = OBB(boxes, orig_shape=(480, 640)) + >>> print(obb.xyxyxyxy) + >>> print(obb.conf) + >>> print(obb.cls) """ def __init__(self, boxes, orig_shape) -> None: - """Initialize an OBB instance with oriented bounding box data and original image shape.""" + """ + Initialize an OBB (Oriented Bounding Box) instance with oriented bounding box data and original image shape. + + This class stores and manipulates Oriented Bounding Boxes (OBB) for object detection tasks. It provides + various properties and methods to access and transform the OBB data. + + Args: + boxes (torch.Tensor | numpy.ndarray): A tensor or numpy array containing the detection boxes, + with shape (num_boxes, 7) or (num_boxes, 8). The last two columns contain confidence and class values. + If present, the third last column contains track IDs, and the fifth column contains rotation. + orig_shape (Tuple[int, int]): Original image size, in the format (height, width). + + Attributes: + data (torch.Tensor | numpy.ndarray): The raw OBB tensor. + orig_shape (Tuple[int, int]): The original image shape. + is_track (bool): Whether the boxes include tracking IDs. + + Raises: + AssertionError: If the number of values per box is not 7 or 8. + + Examples: + >>> import torch + >>> boxes = torch.rand(3, 7) # 3 boxes with 7 values each + >>> orig_shape = (640, 480) + >>> obb = OBB(boxes, orig_shape) + >>> print(obb.xywhr) # Access the boxes in xywhr format + """ if boxes.ndim == 1: boxes = boxes[None, :] n = boxes.shape[-1] @@ -780,34 +1475,115 @@ class OBB(BaseTensor): @property def xywhr(self): - """Return boxes in [x_center, y_center, width, height, rotation] format.""" + """ + Returns boxes in [x_center, y_center, width, height, rotation] format. + + Returns: + (torch.Tensor | numpy.ndarray): A tensor or numpy array containing the oriented bounding boxes with format + [x_center, y_center, width, height, rotation]. The shape is (N, 5) where N is the number of boxes. + + Examples: + >>> results = model('image.jpg') + >>> obb = results[0].obb + >>> xywhr = obb.xywhr + >>> print(xywhr.shape) + torch.Size([3, 5]) + """ return self.data[:, :5] @property def conf(self): - """Gets the confidence values of Oriented Bounding Boxes (OBBs).""" + """ + Returns the confidence scores for Oriented Bounding Boxes (OBBs). + + This property retrieves the confidence values associated with each OBB detection. The confidence score + represents the model's certainty in the detection. + + Returns: + (torch.Tensor | numpy.ndarray): A tensor or numpy array of shape (N,) containing confidence scores + for N detections, where each score is in the range [0, 1]. + + Examples: + >>> results = model('image.jpg') + >>> obb_result = results[0].obb + >>> confidence_scores = obb_result.conf + >>> print(confidence_scores) + """ return self.data[:, -2] @property def cls(self): - """Returns the class values of the oriented bounding boxes.""" + """ + Returns the class values of the oriented bounding boxes. + + Returns: + (torch.Tensor | numpy.ndarray): A tensor or numpy array containing the class values for each oriented + bounding box. The shape is (N,), where N is the number of boxes. + + Examples: + >>> results = model('image.jpg') + >>> result = results[0] + >>> obb = result.obb + >>> class_values = obb.cls + >>> print(class_values) + """ return self.data[:, -1] @property def id(self): - """Return the tracking IDs of the oriented bounding boxes (if available).""" + """ + Returns the tracking IDs of the oriented bounding boxes (if available). + + Returns: + (torch.Tensor | numpy.ndarray | None): A tensor or numpy array containing the tracking IDs for each + oriented bounding box. Returns None if tracking IDs are not available. + + Examples: + >>> results = model('image.jpg', tracker=True) # Run inference with tracking + >>> for result in results: + ... if result.obb is not None: + ... track_ids = result.obb.id + ... if track_ids is not None: + ... print(f"Tracking IDs: {track_ids}") + """ return self.data[:, -3] if self.is_track else None @property @lru_cache(maxsize=2) def xyxyxyxy(self): - """Convert OBB format to 8-point (xyxyxyxy) coordinate format of shape (N, 4, 2) for rotated bounding boxes.""" + """ + Converts OBB format to 8-point (xyxyxyxy) coordinate format for rotated bounding boxes. + + Returns: + (torch.Tensor | numpy.ndarray): Rotated bounding boxes in xyxyxyxy format with shape (N, 4, 2), where N is + the number of boxes. Each box is represented by 4 points (x, y), starting from the top-left corner and + moving clockwise. + + Examples: + >>> obb = OBB(torch.tensor([[100, 100, 50, 30, 0.5, 0.9, 0]]), orig_shape=(640, 640)) + >>> xyxyxyxy = obb.xyxyxyxy + >>> print(xyxyxyxy.shape) + torch.Size([1, 4, 2]) + """ return ops.xywhr2xyxyxyxy(self.xywhr) @property @lru_cache(maxsize=2) def xyxyxyxyn(self): - """Converts rotated bounding boxes to normalized xyxyxyxy format of shape (N, 4, 2).""" + """ + Converts rotated bounding boxes to normalized xyxyxyxy format. + + Returns: + (torch.Tensor | numpy.ndarray): Normalized rotated bounding boxes in xyxyxyxy format with shape (N, 4, 2), + where N is the number of boxes. Each box is represented by 4 points (x, y), normalized relative to + the original image dimensions. + + Examples: + >>> obb = OBB(torch.rand(10, 7), orig_shape=(640, 480)) # 10 random OBBs + >>> normalized_boxes = obb.xyxyxyxyn + >>> print(normalized_boxes.shape) + torch.Size([10, 4, 2]) + """ xyxyxyxyn = self.xyxyxyxy.clone() if isinstance(self.xyxyxyxy, torch.Tensor) else np.copy(self.xyxyxyxy) xyxyxyxyn[..., 0] /= self.orig_shape[1] xyxyxyxyn[..., 1] /= self.orig_shape[0] @@ -817,28 +1593,31 @@ class OBB(BaseTensor): @lru_cache(maxsize=2) def xyxy(self): """ - Convert the oriented bounding boxes (OBB) to axis-aligned bounding boxes in xyxy format (x1, y1, x2, y2). + Converts oriented bounding boxes (OBB) to axis-aligned bounding boxes in xyxy format. - Returns: - (torch.Tensor | numpy.ndarray): Axis-aligned bounding boxes in xyxy format with shape (num_boxes, 4). - - Example: - ```python - import torch - from ultralytics import YOLO + This property calculates the minimal enclosing rectangle for each oriented bounding box and returns it in + xyxy format (x1, y1, x2, y2). This is useful for operations that require axis-aligned bounding boxes, such + as IoU calculation with non-rotated boxes. - model = YOLO('yolov8n.pt') - results = model('path/to/image.jpg') - for result in results: - obb = result.obb - if obb is not None: - xyxy_boxes = obb.xyxy - # Do something with xyxy_boxes - ``` + Returns: + (torch.Tensor | numpy.ndarray): Axis-aligned bounding boxes in xyxy format with shape (N, 4), where N + is the number of boxes. Each row contains [x1, y1, x2, y2] coordinates. + + Examples: + >>> import torch + >>> from ultralytics import YOLO + >>> model = YOLO('yolov8n-obb.pt') + >>> results = model('path/to/image.jpg') + >>> for result in results: + ... obb = result.obb + ... if obb is not None: + ... xyxy_boxes = obb.xyxy + ... print(xyxy_boxes.shape) # (N, 4) - Note: - This method is useful to perform operations that require axis-aligned bounding boxes, such as IoU - calculation with non-rotated boxes. The conversion approximates the OBB by the minimal enclosing rectangle. + Notes: + - This method approximates the OBB by its minimal enclosing rectangle. + - The returned format is compatible with standard object detection metrics and visualization tools. + - The property uses caching to improve performance for repeated access. """ x = self.xyxyxyxy[..., 0] y = self.xyxyxyxy[..., 1]