Merge branch 'main' into cli-info

cli-info
Burhan 3 days ago committed by GitHub
commit 05b9da824c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 3
      .github/workflows/ci.yaml
  2. 2
      .github/workflows/format.yml
  3. 16
      README.md
  4. 14
      README.zh-CN.md
  5. 60
      docs/build_docs.py
  6. 4
      docs/en/datasets/segment/carparts-seg.md
  7. 11
      docs/en/guides/model-evaluation-insights.md
  8. 83
      docs/en/guides/region-counting.md
  9. 124
      docs/en/help/contributing.md
  10. 4
      docs/en/integrations/albumentations.md
  11. 2
      docs/en/integrations/sony-imx500.md
  12. 1
      docs/en/macros/train-args.md
  13. 11
      docs/en/models/sam-2.md
  14. 4
      docs/en/modes/benchmark.md
  15. 1
      docs/en/tasks/index.md
  16. 32
      docs/en/tasks/pose.md
  17. 85
      docs/en/usage/simple-utilities.md
  18. BIN
      docs/overrides/assets/favicon.ico
  19. 8
      examples/YOLO-Series-ONNXRuntime-Rust/README.md
  20. 1
      examples/YOLOv8-LibTorch-CPP-Inference/main.cc
  21. 2
      mkdocs.yml
  22. 2
      ultralytics/__init__.py
  23. 1
      ultralytics/cfg/__init__.py
  24. 29
      ultralytics/engine/model.py
  25. 2
      ultralytics/models/yolo/classify/predict.py
  26. 2
      ultralytics/models/yolo/detect/train.py
  27. 2
      ultralytics/nn/modules/head.py
  28. 2
      ultralytics/solutions/ai_gym.py
  29. 6
      ultralytics/solutions/solutions.py
  30. 5
      ultralytics/utils/ops.py
  31. 64
      ultralytics/utils/plotting.py
  32. 16
      ultralytics/utils/torch_utils.py

@ -214,7 +214,8 @@ jobs:
- uses: actions/checkout@v4
- uses: astral-sh/setup-uv@v4
- name: Install requirements
run: uv pip install --system . pytest-cov
shell: bash # for Windows compatibility
run: uv pip install --system -e . pytest-cov
- name: Check environment
run: |
yolo checks

@ -35,7 +35,7 @@ jobs:
If this is a custom training ❓ Question, please provide as much information as possible, including dataset image examples and training logs, and verify you are following our [Tips for Best Training Results](https://docs.ultralytics.com/guides/model-training-tips/).
Join the Ultralytics community where it suits you best. For real-time chat, head to [Discord](https://ultralytics.com/discord) 🎧. Prefer in-depth discussions? Check out [Discourse](https://community.ultralytics.com). Or dive into threads on our [Subreddit](https://reddit.com/r/ultralytics) to share knowledge with the community.
Join the Ultralytics community where it suits you best. For real-time chat, head to [Discord](https://discord.com/invite/ultralytics) 🎧. Prefer in-depth discussions? Check out [Discourse](https://community.ultralytics.com). Or dive into threads on our [Subreddit](https://reddit.com/r/Ultralytics) to share knowledge with the community.
## Upgrade

@ -80,7 +80,7 @@ YOLO may be used directly in the Command Line Interface (CLI) with a `yolo` comm
yolo predict model=yolo11n.pt source='https://ultralytics.com/images/bus.jpg'
```
`yolo` can be used for a variety of tasks and modes and accepts additional arguments, i.e. `imgsz=640`. See the YOLO [CLI Docs](https://docs.ultralytics.com/usage/cli/) for examples.
`yolo` can be used for a variety of tasks and modes and accepts additional arguments, e.g. `imgsz=640`. See the YOLO [CLI Docs](https://docs.ultralytics.com/usage/cli/) for examples.
### Python
@ -117,11 +117,13 @@ See YOLO [Python Docs](https://docs.ultralytics.com/usage/python/) for more exam
## <div align="center">Models</div>
YOLO11 [Detect](https://docs.ultralytics.com/tasks/detect/), [Segment](https://docs.ultralytics.com/tasks/segment/) and [Pose](https://docs.ultralytics.com/tasks/pose/) models pretrained on the [COCO](https://docs.ultralytics.com/datasets/detect/coco/) dataset are available here, as well as YOLO11 [Classify](https://docs.ultralytics.com/tasks/classify/) models pretrained on the [ImageNet](https://docs.ultralytics.com/datasets/classify/imagenet/) dataset. [Track](https://docs.ultralytics.com/modes/track/) mode is available for all Detect, Segment and Pose models.
YOLO11 [Detect](https://docs.ultralytics.com/tasks/detect/), [Segment](https://docs.ultralytics.com/tasks/segment/) and [Pose](https://docs.ultralytics.com/tasks/pose/) models pretrained on the [COCO](https://docs.ultralytics.com/datasets/detect/coco/) dataset are available here, as well as YOLO11 [Classify](https://docs.ultralytics.com/tasks/classify/) models pretrained on the [ImageNet](https://docs.ultralytics.com/datasets/classify/imagenet/) dataset. [Track](https://docs.ultralytics.com/modes/track/) mode is available for all Detect, Segment and Pose models. All [Models](https://docs.ultralytics.com/models/) download automatically from the latest Ultralytics [release](https://github.com/ultralytics/assets/releases) on first use.
<img width="100%" src="https://raw.githubusercontent.com/ultralytics/assets/main/im/banner-tasks.png" alt="Ultralytics YOLO supported tasks">
All [Models](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models) download automatically from the latest Ultralytics [release](https://github.com/ultralytics/assets/releases) on first use.
<a href="https://docs.ultralytics.com/tasks/" target="_blank">
<img width="100%" src="https://github.com/ultralytics/docs/releases/download/0/ultralytics-yolov8-tasks-banner.avif" alt="Ultralytics YOLO supported tasks">
</a>
<br>
<br>
<details open><summary>Detection (COCO)</summary>
@ -212,9 +214,9 @@ See [OBB Docs](https://docs.ultralytics.com/tasks/obb/) for usage examples with
Our key integrations with leading AI platforms extend the functionality of Ultralytics' offerings, enhancing tasks like dataset labeling, training, visualization, and model management. Discover how Ultralytics, in collaboration with [W&B](https://docs.wandb.ai/guides/integrations/ultralytics/), [Comet](https://bit.ly/yolov8-readme-comet), [Roboflow](https://roboflow.com/?ref=ultralytics) and [OpenVINO](https://docs.ultralytics.com/integrations/openvino/), can optimize your AI workflow.
<br>
<a href="https://www.ultralytics.com/hub" target="_blank">
<img width="100%" src="https://github.com/ultralytics/assets/raw/main/yolov8/banner-integrations.png" alt="Ultralytics active learning integrations"></a>
<img width="100%" src="https://github.com/ultralytics/assets/raw/main/yolov8/banner-integrations.png" alt="Ultralytics active learning integrations">
</a>
<br>
<br>

@ -117,11 +117,13 @@ path = model.export(format="onnx") # 返回导出模型的路径
## <div align="center">模型</div>
YOLO11 [检测](https://docs.ultralytics.com/tasks/detect/)、[分割](https://docs.ultralytics.com/tasks/segment/) 和 [姿态](https://docs.ultralytics.com/tasks/pose/) 模型在 [COCO](https://docs.ultralytics.com/datasets/detect/coco/) 数据集上进行预训练,这些模型可在此处获得,此外还有在 [ImageNet](https://docs.ultralytics.com/datasets/classify/imagenet/) 数据集上预训练的 YOLO11 [分类](https://docs.ultralytics.com/tasks/classify/) 模型。所有检测、分割和姿态模型均支持 [跟踪](https://docs.ultralytics.com/modes/track/) 模式。
YOLO11 [检测](https://docs.ultralytics.com/tasks/detect/)、[分割](https://docs.ultralytics.com/tasks/segment/) 和 [姿态](https://docs.ultralytics.com/tasks/pose/) 模型在 [COCO](https://docs.ultralytics.com/datasets/detect/coco/) 数据集上进行预训练,这些模型可在此处获得,此外还有在 [ImageNet](https://docs.ultralytics.com/datasets/classify/imagenet/) 数据集上预训练的 YOLO11 [分类](https://docs.ultralytics.com/tasks/classify/) 模型。所有检测、分割和姿态模型均支持 [跟踪](https://docs.ultralytics.com/modes/track/) 模式。所有[模型](https://docs.ultralytics.com/models/)在首次使用时自动从最新的 Ultralytics [发布](https://github.com/ultralytics/assets/releases)下载。
<img width="100%" src="https://raw.githubusercontent.com/ultralytics/assets/main/im/banner-tasks.png" alt="Ultralytics YOLO supported tasks">
所有[模型](https://github.com/ultralytics/ultralytics/tree/main/ultralytics/cfg/models)在首次使用时自动从最新的 Ultralytics [发布](https://github.com/ultralytics/assets/releases)下载。
<a href="https://docs.ultralytics.com/tasks/" target="_blank">
<img width="100%" src="https://github.com/ultralytics/docs/releases/download/0/ultralytics-yolov8-tasks-banner.avif" alt="Ultralytics YOLO supported tasks">
</a>
<br>
<br>
<details open><summary>检测 (COCO)</summary>
@ -212,9 +214,9 @@ YOLO11 [检测](https://docs.ultralytics.com/tasks/detect/)、[分割](https://d
我们与领先的 AI 平台的关键集成扩展了 Ultralytics 产品的功能,提升了数据集标注、训练、可视化和模型管理等任务。探索 Ultralytics 如何通过与 [W&B](https://docs.wandb.ai/guides/integrations/ultralytics/)、[Comet](https://bit.ly/yolov8-readme-comet)、[Roboflow](https://roboflow.com/?ref=ultralytics) 和 [OpenVINO](https://docs.ultralytics.com/integrations/openvino/) 的合作,优化您的 AI 工作流程。
<br>
<a href="https://www.ultralytics.com/hub" target="_blank">
<img width="100%" src="https://github.com/ultralytics/assets/raw/main/yolov8/banner-integrations.png" alt="Ultralytics active learning integrations"></a>
<img width="100%" src="https://github.com/ultralytics/assets/raw/main/yolov8/banner-integrations.png" alt="Ultralytics active learning integrations">
</a>
<br>
<br>

@ -238,32 +238,42 @@ def remove_macros():
print(f"Removed {len(macros_indices)} URLs containing '/macros/' from {sitemap}")
def minify_html_files():
"""Minifies all HTML files in the site directory and prints reduction stats."""
def minify_files(html=True, css=True, js=True):
"""Minifies HTML, CSS, and JS files and prints total reduction stats."""
minify, compress, jsmin = None, None, None
try:
from minify_html import minify # pip install minify-html
except ImportError:
if html:
from minify_html import minify
if css:
from csscompressor import compress
if js:
import jsmin
except ImportError as e:
print(f"Missing required package: {str(e)}")
return
total_original_size = 0
total_minified_size = 0
for html_file in tqdm(SITE.rglob("*.html"), desc="Minifying HTML files"):
with open(html_file, encoding="utf-8") as f:
content = f.read()
original_size = len(content)
minified_content = minify(content, keep_closing_tags=True, minify_css=True, minify_js=True)
minified_size = len(minified_content)
total_original_size += original_size
total_minified_size += minified_size
with open(html_file, "w", encoding="utf-8") as f:
f.write(minified_content)
total_reduction = total_original_size - total_minified_size
total_percent_reduction = (total_reduction / total_original_size) * 100
print(f"Minify HTML reduction: {total_percent_reduction:.2f}% " f"({total_reduction / 1024:.2f} KB saved)")
stats = {}
for ext, minifier in {
"html": (lambda x: minify(x, keep_closing_tags=True, minify_css=True, minify_js=True)) if html else None,
"css": compress if css else None,
"js": jsmin.jsmin if js else None,
}.items():
if not minifier:
continue
stats[ext] = {"original": 0, "minified": 0}
directory = "" # "stylesheets" if ext == css else "javascript" if ext == "js" else ""
for f in tqdm((SITE / directory).rglob(f"*.{ext}"), desc=f"Minifying {ext.upper()}"):
content = f.read_text(encoding="utf-8")
minified = minifier(content)
stats[ext]["original"] += len(content)
stats[ext]["minified"] += len(minified)
f.write_text(minified, encoding="utf-8")
for ext, data in stats.items():
if data["original"]:
r = data["original"] - data["minified"] # reduction
print(f"Total {ext.upper()} reduction: {(r / data['original']) * 100:.2f}% ({r / 1024:.2f} KB saved)")
def main():
@ -279,8 +289,8 @@ def main():
# Update docs HTML pages
update_docs_html()
# Minify HTML files
minify_html_files()
# Minify files
minify_files(html=False, css=False, js=False)
# Show command to serve built website
print('Docs built correctly ✅\nServe site at http://localhost:8000 with "python -m http.server --directory site"')

@ -12,13 +12,13 @@ Whether you're working on automotive research, developing AI solutions for vehic
<p align="center">
<br>
<iframe loading="lazy" width="720" height="405" src="https://www.youtube.com/embed/eHuzCNZeu0g"
<iframe loading="lazy" width="720" height="405" src="https://www.youtube.com/embed/HATMPgLYAPU"
title="YouTube video player" frameborder="0"
allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share"
allowfullscreen>
</iframe>
<br>
<strong>Watch:</strong> Carparts <a href="https://www.ultralytics.com/glossary/instance-segmentation">Instance Segmentation</a> Using Ultralytics HUB
<strong>Watch:</strong> Carparts <a href="https://www.ultralytics.com/glossary/instance-segmentation">Instance Segmentation</a> with Ultralytics YOLO11
</p>
## Dataset Structure

@ -10,6 +10,17 @@ keywords: Model Evaluation, Machine Learning Model Evaluation, Fine Tuning Machi
Once you've [trained](./model-training-tips.md) your computer vision model, evaluating and refining it to perform optimally is essential. Just training your model isn't enough. You need to make sure that your model is accurate, efficient, and fulfills the [objective](./defining-project-goals.md) of your computer vision project. By evaluating and fine-tuning your model, you can identify weaknesses, improve its accuracy, and boost overall performance.
<p align="center">
<br>
<iframe loading="lazy" width="720" height="405" src="https://www.youtube.com/embed/-aYO-6VaDrw"
title="YouTube video player" frameborder="0"
allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share"
allowfullscreen>
</iframe>
<br>
<strong>Watch:</strong> Insights into Model Evaluation and Fine-Tuning | Tips for Improving Mean Average Precision
</p>
In this guide, we'll share insights on model evaluation and fine-tuning that'll make this [step of a computer vision project](./steps-of-a-cv-project.md) more approachable. We'll discuss how to understand evaluation metrics and implement fine-tuning techniques, giving you the knowledge to elevate your model's capabilities.
## Evaluating Model Performance Using Metrics

@ -4,7 +4,7 @@ description: Learn how to use Ultralytics YOLOv8 for precise object counting in
keywords: object counting, regions, YOLOv8, computer vision, Ultralytics, efficiency, accuracy, automation, real-time, applications, surveillance, monitoring
---
# Object Counting in Different Regions using Ultralytics YOLOv8 🚀
# Object Counting in Different Regions using Ultralytics YOLO 🚀
## What is Object Counting in Regions?
@ -12,13 +12,13 @@ keywords: object counting, regions, YOLOv8, computer vision, Ultralytics, effici
<p align="center">
<br>
<iframe loading="lazy" width="720" height="405" src="https://www.youtube.com/embed/okItf1iHlV8"
<iframe loading="lazy" width="720" height="405" src="https://www.youtube.com/embed/mzLfC13ISF4"
title="YouTube video player" frameborder="0"
allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share"
allowfullscreen>
</iframe>
<br>
<strong>Watch:</strong> Ultralytics YOLOv8 Object Counting in Multiple & Movable Regions
<strong>Watch:</strong> Object Counting in Different Regions using Ultralytics YOLO11 | Ultralytics Solutions 🚀
</p>
## Advantages of Object Counting in Regions?
@ -39,44 +39,45 @@ keywords: object counting, regions, YOLOv8, computer vision, Ultralytics, effici
=== "Python"
```python
import cv2
from ultralytics import solutions
cap = cv2.VideoCapture("Path/to/video/file.mp4")
assert cap.isOpened(), "Error reading video file"
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
# Define region points
# region_points = [(20, 400), (1080, 400), (1080, 360), (20, 360)] # Pass region as list
# pass region as dictionary
region_points = {
"region-01": [(50, 50), (250, 50), (250, 250), (50, 250)],
"region-02": [(640, 640), (780, 640), (780, 720), (640, 720)]
}
# Video writer
video_writer = cv2.VideoWriter("region_counting.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
# Init Object Counter
region = solutions.RegionCounter(
show=True,
region=region_points,
model="yolo11n.pt",
)
# Process video
while cap.isOpened():
success, im0 = cap.read()
if not success:
print("Video frame is empty or video processing has been successfully completed.")
break
im0 = region.count(im0)
video_writer.write(im0)
cap.release()
video_writer.release()
cv2.destroyAllWindows()
import cv2
from ultralytics import solutions
cap = cv2.VideoCapture("Path/to/video/file.mp4")
assert cap.isOpened(), "Error reading video file"
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
# Define region points
# region_points = [(20, 400), (1080, 400), (1080, 360), (20, 360)] # Pass region as list
# pass region as dictionary
region_points = {
"region-01": [(50, 50), (250, 50), (250, 250), (50, 250)],
"region-02": [(640, 640), (780, 640), (780, 720), (640, 720)],
}
# Video writer
video_writer = cv2.VideoWriter("region_counting.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
# Init Object Counter
region = solutions.RegionCounter(
show=True,
region=region_points,
model="yolo11n.pt",
)
# Process video
while cap.isOpened():
success, im0 = cap.read()
if not success:
print("Video frame is empty or video processing has been successfully completed.")
break
im0 = region.count(im0)
video_writer.write(im0)
cap.release()
video_writer.release()
cv2.destroyAllWindows()
```
!!! tip "Ultralytics Example Code"

@ -11,18 +11,6 @@ Welcome! We're thrilled that you're considering contributing to our [Ultralytics
<a href="https://github.com/ultralytics/ultralytics/graphs/contributors">
<img width="100%" src="https://github.com/ultralytics/docs/releases/download/0/ultralytics-open-source-contributors.avif" alt="Ultralytics open-source contributors"></a>
## Table of Contents
1. [Code of Conduct](#code-of-conduct)
2. [Contributing via Pull Requests](#contributing-via-pull-requests)
- [CLA Signing](#cla-signing)
- [Google-Style Docstrings](#google-style-docstrings)
- [GitHub Actions CI Tests](#github-actions-ci-tests)
3. [Reporting Bugs](#reporting-bugs)
4. [License](#license)
5. [Conclusion](#conclusion)
6. [FAQ](#faq)
## Code of Conduct
To ensure a welcoming and inclusive environment for everyone, all contributors must adhere to our [Code of Conduct](https://docs.ultralytics.com/help/code_of_conduct/). Respect, kindness, and professionalism are at the heart of our community.
@ -131,6 +119,118 @@ Ultralytics uses the [GNU Affero General Public License v3.0 (AGPL-3.0)](https:/
We encourage all contributors to familiarize themselves with the terms of the AGPL-3.0 license to contribute effectively and ethically to the Ultralytics open-source community.
## Open-Sourcing Your Projects with YOLO and AGPL-3.0 Compliance
If you're planning to develop and release your own project using YOLO models, the [GNU Affero General Public License v3.0 (AGPL-3.0)](https://www.gnu.org/licenses/agpl-3.0.html) ensures that all derivative works remain open and accessible. This section provides guidance, including steps, best practices, and requirements, to help you open-source your project while complying with AGPL-3.0.
### Options for Starting Your Project
You can kick-start your project using one of these approaches:
1. **Fork the Ultralytics YOLO Repository**
Fork the official Ultralytics YOLO repository directly from [https://github.com/ultralytics/ultralytics](https://github.com/ultralytics/ultralytics).
- Use this option if you plan to build directly on the latest YOLO implementation.
- Modify the forked code as needed while ensuring compliance with AGPL-3.0.
2. **Start from the Ultralytics Template Repository**
Use the Ultralytics template repository available at [https://github.com/ultralytics/template](https://github.com/ultralytics/template).
- Ideal for starting a clean, modular project with pre-configured best practices.
- This option provides a lightweight starting point for projects that integrate or extend YOLO models.
### What You Need to Open-Source
To comply with AGPL-3.0, you must make the following components of your project openly available:
1. **Your Entire Project Source Code**:
- Include all code for the larger project containing your YOLO models, scripts, and utilities.
2. **Model Weights** (if modified):
- Share any fine-tuned or modified model weights as part of the open-source project.
3. **Configuration Files**:
- Provide configuration files such as `.yaml` or `.json` that define the training setup, hyperparameters, or deployment configurations.
4. **Training Data (if redistributable)**:
- If you include preprocessed or generated data that is redistributable, ensure it is part of the repository or clearly linked.
5. **Web Application Components**:
- Include all backend and frontend source code if your project is a web application, especially server-side components.
6. **Documentation**:
- Include clear documentation on how to use, build, and extend your project.
7. **Build and Deployment Scripts**:
- Share scripts for setting up the environment, building the application, and deploying it, such as `Dockerfiles`, `requirements.txt`, or `Makefiles`.
8. **Testing Framework**:
- Open-source your test cases, such as unit and integration tests, to ensure reproducibility and reliability.
9. **Third-Party Modifications**:
- Provide source code for any third-party libraries you've modified.
### Steps to Open-Source Your Project
1. **Choose Your Starting Point**:
- Fork the Ultralytics YOLO repository or start from the Ultralytics template repository.
2. **Set Your License**:
- Add a `LICENSE` file containing the AGPL-3.0 text.
3. **Credit Upstream Contributions**:
- Include attribution to Ultralytics YOLO in your README. For example:
```
This project builds on [Ultralytics YOLO](https://github.com/ultralytics/ultralytics), licensed under AGPL-3.0.
```
4. **Make Your Code Public**:
- Push your entire project (including the components listed above) to a public GitHub repository.
5. **Document Your Project**:
- Write a clear `README.md` with instructions for setup, usage, and contributions.
6. **Enable Contributions**:
- Set up an issue tracker and contribution guidelines to foster collaboration.
By following these steps and ensuring you include all necessary components, you'll comply with AGPL-3.0 and contribute meaningfully to the open-source community. Let's continue fostering collaboration and innovation in computer vision together! 🚀
### Example Repository Structure
Below is an example structure for an AGPL-3.0 project. See [https://github.com/ultralytics/template](https://github.com/ultralytics/template) for details.
```
my-yolo-project/
├── LICENSE # AGPL-3.0 license text
├── README.md # Project overview and license information
├── src/ # Source code for the project
│ ├── model.py # YOLO-based model implementation
│ ├── utils.py # Utility scripts
│ └── ...
├── pyproject.toml # Python dependencies
├── tests/ # Unit and integration tests
├── .github/ # GitHub Actions for CI
│ └── workflows/
│ └── ci.yml # Continuous integration configuration
└── docs/ # Project documentation
└── index.md
```
By following this guide, you can ensure your project remains compliant with AGPL-3.0 while contributing to the open-source community. Your adherence strengthens the ethos of collaboration, transparency, and accessibility that drives the success of projects like YOLO.
## Conclusion
Thank you for your interest in contributing to [Ultralytics](https://www.ultralytics.com/) [open-source](https://github.com/ultralytics) YOLO projects. Your participation is essential in shaping the future of our software and building a vibrant community of innovation and collaboration. Whether you're enhancing code, reporting bugs, or suggesting new features, your contributions are invaluable.

@ -52,7 +52,7 @@ Now that we've covered what Albumentations is and what it can do, let's look at
### Installation
To use Albumentations with YOLOv11, start by making sure you have the necessary packages installed. If Albumentations isn't installed, the augmentations won't be applied during training. Once set up, you'll be ready to create an augmented dataset for training, with Albumentations integrated to enhance your model automatically.
To use Albumentations with YOLO11, start by making sure you have the necessary packages installed. If Albumentations isn't installed, the augmentations won't be applied during training. Once set up, you'll be ready to create an augmented dataset for training, with Albumentations integrated to enhance your model automatically.
!!! tip "Installation"
@ -67,7 +67,7 @@ For detailed instructions and best practices related to the installation process
### Usage
After installing the necessary packages, you're ready to start using Albumentations with YOLO11. When you train YOLOv11, a set of augmentations is automatically applied through its integration with Albumentations, making it easy to enhance your model's performance.
After installing the necessary packages, you're ready to start using Albumentations with YOLO11. When you train YOLO11, a set of augmentations is automatically applied through its integration with Albumentations, making it easy to enhance your model's performance.
!!! example "Usage"

@ -162,7 +162,7 @@ cd examples/imx500
Step 3: Run YOLOv8 object detection, using the labels.txt file that has been generated during the IMX500 export.
```bash
python imx500_object_detection_demo.py --model <path to network.rpk> --fps 25 --bbox-normalization --ignore-dash-labels --bbox-order xy labels <path to labels.txt>
python imx500_object_detection_demo.py --model <path to network.rpk> --fps 25 --bbox-normalization --ignore-dash-labels --bbox-order xy --labels <path to labels.txt>
```
Then you will be able to see live inference output as follows

@ -20,6 +20,7 @@
| `seed` | `0` | Sets the random seed for training, ensuring reproducibility of results across runs with the same configurations. |
| `deterministic` | `True` | Forces deterministic algorithm use, ensuring reproducibility but may affect performance and speed due to the restriction on non-deterministic algorithms. |
| `single_cls` | `False` | Treats all classes in multi-class datasets as a single class during training. Useful for binary classification tasks or when focusing on object presence rather than classification. |
| `classes` | `None` | Specifies a list of class IDs to train on. Useful for filtering out and focusing only on certain classes during training. |
| `rect` | `False` | Enables rectangular training, optimizing batch composition for minimal padding. Can improve efficiency and speed but may affect model accuracy. |
| `cos_lr` | `False` | Utilizes a cosine [learning rate](https://www.ultralytics.com/glossary/learning-rate) scheduler, adjusting the learning rate following a cosine curve over epochs. Helps in managing learning rate for better convergence. |
| `close_mosaic` | `10` | Disables mosaic [data augmentation](https://www.ultralytics.com/glossary/data-augmentation) in the last N epochs to stabilize training before completion. Setting to 0 disables this feature. |

@ -271,6 +271,17 @@ Auto-annotation is a powerful feature of SAM 2, enabling users to generate segme
### How to Auto-Annotate with SAM 2
<p align="center">
<br>
<iframe loading="lazy" width="720" height="405" src="https://www.youtube.com/embed/M7xWw4Iodhg"
title="YouTube video player" frameborder="0"
allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share"
allowfullscreen>
</iframe>
<br>
<strong>Watch:</strong> Auto Annotation with Meta's Segment Anything 2 Model using Ultralytics | Data Labeling
</p>
To auto-annotate your dataset using SAM 2, follow this example:
!!! example "Auto-Annotation Example"

@ -47,13 +47,13 @@ Once your model is trained and validated, the next logical step is to evaluate i
<p align="center">
<br>
<iframe loading="lazy" width="720" height="405" src="https://www.youtube.com/embed/j8uQc0qB91s?start=105"
<iframe loading="lazy" width="720" height="405" src="https://www.youtube.com/embed/rEQlAaevEFc"
title="YouTube video player" frameborder="0"
allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share"
allowfullscreen>
</iframe>
<br>
<strong>Watch:</strong> Ultralytics Modes Tutorial: Benchmark
<strong>Watch:</strong> Benchmark Ultralytics YOLO11 Models | How to Compare Model Performance on Different Hardware?
</p>
## Why Is Benchmarking Crucial?

@ -6,7 +6,6 @@ keywords: Ultralytics YOLO11, detection, segmentation, classification, oriented
# Ultralytics YOLO11 Tasks
<br>
<img width="1024" src="https://github.com/ultralytics/docs/releases/download/0/ultralytics-yolov8-tasks-banner.avif" alt="Ultralytics YOLO supported tasks">
YOLO11 is an AI framework that supports multiple [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) **tasks**. The framework can be used to perform [detection](detect.md), [segmentation](segment.md), [obb](obb.md), [classification](classify.md), and [pose](pose.md) estimation. Each of these tasks has a different objective and use case.

@ -13,28 +13,16 @@ Pose estimation is a task that involves identifying the location of specific poi
The output of a pose estimation model is a set of points that represent the keypoints on an object in the image, usually along with the confidence scores for each point. Pose estimation is a good choice when you need to identify specific parts of an object in a scene, and their location in relation to each other.
<table>
<tr>
<td align="center">
<iframe loading="lazy" width="720" height="405" src="https://www.youtube.com/embed/Y28xXQmju64?si=pCY4ZwejZFu6Z4kZ"
title="YouTube video player" frameborder="0"
allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share"
allowfullscreen>
</iframe>
<br>
<strong>Watch:</strong> Pose Estimation with Ultralytics YOLO.
</td>
<td align="center">
<iframe loading="lazy" width="720" height="405" src="https://www.youtube.com/embed/aeAX6vWpfR0"
title="YouTube video player" frameborder="0"
allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share"
allowfullscreen>
</iframe>
<br>
<strong>Watch:</strong> Pose Estimation with Ultralytics HUB.
</td>
</tr>
</table>
<p align="center">
<br>
<iframe loading="lazy" width="720" height="405" src="https://www.youtube.com/embed/AAkfToU3nAc"
title="YouTube video player" frameborder="0"
allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share"
allowfullscreen>
</iframe>
<br>
<strong>Watch:</strong> Ultralytics YOLO11 Pose Estimation Tutorial | Real-Time Object Tracking and Human Pose Detection
</p>
!!! tip

@ -374,6 +374,91 @@ See docstring for each function or visit the `ultralytics.utils.ops` [reference
Ultralytics includes an Annotator class that can be used to annotate any kind of data. It's easiest to use with [object detection bounding boxes](../modes/predict.md#boxes), [pose key points](../modes/predict.md#keypoints), and [oriented bounding boxes](../modes/predict.md#obb).
#### Ultralytics Sweep Annotation
!!! example "Python Examples using YOLO11 🚀"
=== "Python"
```python
import cv2
from ultralytics import YOLO
from ultralytics.utils.plotting import Annotator, colors
# User defined video path and model file
cap = cv2.VideoCapture("Path/to/video/file.mp4")
model = YOLO(model="yolo11s-seg.pt") # Model file i.e. yolo11s.pt or yolo11m-seg.pt
if not cap.isOpened():
print("Error: Could not open video.")
exit()
# Initialize the video writer object.
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
video_writer = cv2.VideoWriter("ultralytics.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
masks = None # Initialize variable to store masks data
f = 0 # Initialize frame count variable for enabling mouse event.
line_x = w # Store width of line.
dragging = False # Initialize bool variable for line dragging.
classes = model.names # Store model classes names for plotting.
window_name = "Ultralytics Sweep Annotator"
def drag_line(event, x, y, flags, param): # Mouse callback for dragging line.
global line_x, dragging
if event == cv2.EVENT_LBUTTONDOWN or (flags & cv2.EVENT_FLAG_LBUTTON):
line_x = max(0, min(x, w))
dragging = True
while cap.isOpened(): # Loop over the video capture object.
ret, im0 = cap.read()
if not ret:
break
f = f + 1 # Increment frame count.
count = 0 # Re-initialize count variable on every frame for precise counts.
annotator = Annotator(im0)
results = model.track(im0, persist=True) # Track objects using track method.
if f == 1:
cv2.namedWindow(window_name)
cv2.setMouseCallback(window_name, drag_line)
if results[0].boxes.id is not None:
if results[0].masks is not None:
masks = results[0].masks.xy
track_ids = results[0].boxes.id.int().cpu().tolist()
clss = results[0].boxes.cls.cpu().tolist()
boxes = results[0].boxes.xyxy.cpu()
for mask, box, cls, t_id in zip(masks or [None] * len(boxes), boxes, clss, track_ids):
color = colors(t_id, True) # Assign different color to each tracked object.
if mask is not None and mask.size > 0:
# If you want to overlay the masks
# mask[:, 0] = np.clip(mask[:, 0], line_x, w)
# mask_img = cv2.fillPoly(im0.copy(), [mask.astype(int)], color)
# cv2.addWeighted(mask_img, 0.5, im0, 0.5, 0, im0)
if box[0] > line_x:
count += 1
annotator.seg_bbox(mask=mask, mask_color=color, label=str(classes[cls]))
else:
if box[0] > line_x:
count += 1
annotator.box_label(box=box, color=color, label=str(classes[cls]))
annotator.sweep_annotator(line_x=line_x, line_y=h, label=f"COUNT:{count}") # Display the sweep
cv2.imshow(window_name, im0)
video_writer.write(im0)
if cv2.waitKey(1) & 0xFF == ord("q"):
break
cap.release() # Release the video capture.
video_writer.release() # Release the video writer.
cv2.destroyAllWindows() # Destroy all opened windows.
```
#### Horizontal Bounding Boxes
```{ .py .annotate }

Binary file not shown.

Before

Width:  |  Height:  |  Size: 9.4 KiB

@ -62,7 +62,7 @@ cargo run -r -- --task detect --ver v8 --nc 6 --model xxx.onnx # YOLOv8
# Classify
cargo run -r -- --task classify --ver v5 --scale s --width 224 --height 224 --nc 1000 # YOLOv5
cargo run -r -- --task classify --ver v8 --scale n --width 224 --height 224 --nc 1000 # YOLOv8
cargo run -r -- --task classify --ver v11 --scale n --width 224 --height 224 --nc 1000 # YOLOv11
cargo run -r -- --task classify --ver v11 --scale n --width 224 --height 224 --nc 1000 # YOLO11
# Detect
cargo run -r -- --task detect --ver v5 --scale n # YOLOv5
@ -71,12 +71,12 @@ cargo run -r -- --task detect --ver v7 --scale t # YOLOv7
cargo run -r -- --task detect --ver v8 --scale n # YOLOv8
cargo run -r -- --task detect --ver v9 --scale t # YOLOv9
cargo run -r -- --task detect --ver v10 --scale n # YOLOv10
cargo run -r -- --task detect --ver v11 --scale n # YOLOv11
cargo run -r -- --task detect --ver v11 --scale n # YOLO11
cargo run -r -- --task detect --ver rtdetr --scale l # RTDETR
# Pose
cargo run -r -- --task pose --ver v8 --scale n # YOLOv8-Pose
cargo run -r -- --task pose --ver v11 --scale n # YOLOv11-Pose
cargo run -r -- --task pose --ver v11 --scale n # YOLO11-Pose
# Segment
cargo run -r -- --task segment --ver v5 --scale n # YOLOv5-Segment
@ -86,7 +86,7 @@ cargo run -r -- --task segment --ver v8 --model yolo/FastSAM-s-dyn-f16.onnx # F
# OBB
cargo run -r -- --ver v8 --task obb --scale n --width 1024 --height 1024 --source images/dota.png # YOLOv8-Obb
cargo run -r -- --ver v11 --task obb --scale n --width 1024 --height 1024 --source images/dota.png # YOLOv11-Obb
cargo run -r -- --ver v11 --task obb --scale n --width 1024 --height 1024 --source images/dota.png # YOLO11-Obb
```
**`cargo run -- --help` for more options**

@ -226,6 +226,7 @@ int main() {
cv::Mat image = cv::imread("/path/to/bus.jpg");
cv::Mat input_image;
letterbox(image, input_image, {640, 640});
cv::cvtColor(input_image, input_image, cv::COLOR_BGR2RGB);
torch::Tensor image_tensor = torch::from_blob(input_image.data, {input_image.rows, input_image.cols, 3}, torch::kByte).to(device);
image_tensor = image_tensor.toType(torch::kFloat32).div(255);

@ -22,7 +22,7 @@ theme:
language: en
custom_dir: docs/overrides/
logo: https://raw.githubusercontent.com/ultralytics/assets/main/logo/Ultralytics_Logotype_Reverse.svg
favicon: assets/favicon.ico
favicon: https://raw.githubusercontent.com/ultralytics/assets/refs/heads/main/logo/favicon-yolo.png
icon:
repo: fontawesome/brands/github
# font: # disabled for faster page load times

@ -1,6 +1,6 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
__version__ = "8.3.38"
__version__ = "8.3.39"
import os

@ -11,7 +11,6 @@ import cv2
from ultralytics.utils import (
ASSETS,
ASSETS_URL,
DEFAULT_CFG,
DEFAULT_CFG_DICT,
DEFAULT_CFG_PATH,

@ -144,6 +144,9 @@ class Model(nn.Module):
else:
self._load(model, task=task)
# Delete super().training for accessing self.model.training
del self.training
def __call__(
self,
source: Union[str, Path, int, Image.Image, list, tuple, np.ndarray, torch.Tensor] = None,
@ -1143,3 +1146,29 @@ class Model(nn.Module):
"""
self.model.eval()
return self
def __getattr__(self, name):
"""
Enables accessing model attributes directly through the Model class.
This method provides a way to access attributes of the underlying model directly through the Model class
instance. It first checks if the requested attribute is 'model', in which case it returns the model from
the module dictionary. Otherwise, it delegates the attribute lookup to the underlying model.
Args:
name (str): The name of the attribute to retrieve.
Returns:
(Any): The requested attribute value.
Raises:
AttributeError: If the requested attribute does not exist in the model.
Examples:
>>> model = YOLO("yolo11n.pt")
>>> print(model.stride)
>>> print(model.task)
"""
if name == "model":
return self._modules["model"]
return getattr(self.model, name)

@ -54,6 +54,6 @@ class ClassificationPredictor(BasePredictor):
orig_imgs = ops.convert_torch2numpy_batch(orig_imgs)
return [
Results(orig_img, path=img_path, names=self.model.names, probs=pred)
Results(orig_img, path=img_path, names=self.model.names, probs=pred.softmax(0))
for pred, orig_img, img_path in zip(preds, orig_imgs, self.batch[0])
]

@ -146,5 +146,5 @@ class DetectionTrainer(BaseTrainer):
"""Get batch size by calculating memory occupation of model."""
train_dataset = self.build_dataset(self.trainset, mode="train", batch=16)
# 4 for mosaic augmentation
max_num_obj = max(len(l["cls"]) for l in train_dataset.labels) * 4
max_num_obj = max(len(label["cls"]) for label in train_dataset.labels) * 4
return super().auto_batch(max_num_obj)

@ -296,7 +296,7 @@ class Classify(nn.Module):
if isinstance(x, list):
x = torch.cat(x, 1)
x = self.linear(self.drop(self.pool(self.conv(x)).flatten(1)))
return x if self.training else x.softmax(1)
return x
class WorldDetect(Detect):

@ -71,7 +71,7 @@ class AIGym(BaseSolution):
>>> processed_image = gym.monitor(image)
"""
# Extract tracks
tracks = self.model.track(source=im0, persist=True, classes=self.CFG["classes"])[0]
tracks = self.model.track(source=im0, persist=True, classes=self.CFG["classes"], **self.track_add_args)[0]
if tracks.boxes.id is not None:
# Extract and check keypoints

@ -74,6 +74,10 @@ class BaseSolution:
self.model = YOLO(self.CFG["model"])
self.names = self.model.names
self.track_add_args = { # Tracker additional arguments for advance configuration
k: self.CFG[k] for k in ["verbose", "iou", "conf", "device", "max_det", "half", "tracker"]
}
if IS_CLI and self.CFG["source"] is None:
d_s = "solutions_ci_demo.mp4" if "-pose" not in self.CFG["model"] else "solution_ci_pose_demo.mp4"
LOGGER.warning(f" WARNING: source not provided. using default source {ASSETS_URL}/{d_s}")
@ -98,7 +102,7 @@ class BaseSolution:
>>> frame = cv2.imread("path/to/image.jpg")
>>> solution.extract_tracks(frame)
"""
self.tracks = self.model.track(source=im0, persist=True, classes=self.CFG["classes"])
self.tracks = self.model.track(source=im0, persist=True, classes=self.CFG["classes"], **self.track_add_args)
# Extract tracks for OBB or object detection
self.track_data = self.tracks[0].obb or self.tracks[0].boxes

@ -75,9 +75,8 @@ def segment2box(segment, width=640, height=640):
(np.ndarray): the minimum and maximum x and y values of the segment.
"""
x, y = segment.T # segment xy
inside = (x >= 0) & (y >= 0) & (x <= width) & (y <= height)
x = x[inside]
y = y[inside]
x = x.clip(0, width)
y = y.clip(0, height)
return (
np.array([x.min(), y.min(), x.max(), y.max()], dtype=segment.dtype)
if any(x)

@ -238,7 +238,16 @@ class Annotator:
}
def get_txt_color(self, color=(128, 128, 128), txt_color=(255, 255, 255)):
"""Assign text color based on background color."""
"""
Assign text color based on background color.
Args:
color (tuple, optional): The background color of the rectangle for text (B, G, R).
txt_color (tuple, optional): The color of the text (R, G, B).
Returns:
txt_color (tuple): Text color for label
"""
if color in self.dark_colors:
return 104, 31, 17
elif color in self.light_colors:
@ -544,7 +553,9 @@ class Annotator:
bbox (tuple): Bounding box coordinates in the format (x_min, y_min, x_max, y_max).
Returns:
angle (degree): Degree value of angle between three points
width (float): Width of the bounding box.
height (float): Height of the bounding box.
area (float): Area enclosed by the bounding box.
"""
x_min, y_min, x_max, y_max = bbox
width = x_max - x_min
@ -791,19 +802,52 @@ class Annotator:
cv2.polylines(self.im, [np.int32([mask])], isClosed=True, color=mask_color, thickness=2)
text_size, _ = cv2.getTextSize(label, 0, self.sf, self.tf)
cv2.rectangle(
self.im,
(int(mask[0][0]) - text_size[0] // 2 - 10, int(mask[0][1]) - text_size[1] - 10),
(int(mask[0][0]) + text_size[0] // 2 + 10, int(mask[0][1] + 10)),
mask_color,
-1,
)
if label:
cv2.rectangle(
self.im,
(int(mask[0][0]) - text_size[0] // 2 - 10, int(mask[0][1]) - text_size[1] - 10),
(int(mask[0][0]) + text_size[0] // 2 + 10, int(mask[0][1] + 10)),
mask_color,
-1,
)
cv2.putText(
self.im, label, (int(mask[0][0]) - text_size[0] // 2, int(mask[0][1])), 0, self.sf, txt_color, self.tf
)
def sweep_annotator(self, line_x=0, line_y=0, label=None, color=(221, 0, 186), txt_color=(255, 255, 255)):
"""
Function for drawing a sweep annotation line and an optional label.
Args:
line_x (int): The x-coordinate of the sweep line.
line_y (int): The y-coordinate limit of the sweep line.
label (str, optional): Text label to be drawn in center of sweep line. If None, no label is drawn.
color (tuple): RGB color for the line and label background.
txt_color (tuple): RGB color for the label text.
"""
# Draw the sweep line
cv2.line(self.im, (line_x, 0), (line_x, line_y), color, self.tf * 2)
# Draw label, if provided
if label:
(text_width, text_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, self.sf, self.tf)
cv2.rectangle(
self.im,
(line_x - text_width // 2 - 10, line_y // 2 - text_height // 2 - 10),
(line_x + text_width // 2 + 10, line_y // 2 + text_height // 2 + 10),
color,
-1,
)
cv2.putText(
self.im,
label,
(line_x - text_width // 2, line_y // 2 + text_height // 2),
cv2.FONT_HERSHEY_SIMPLEX,
self.sf,
txt_color,
self.tf,
)
def plot_distance_and_line(
self, pixels_distance, centroids, line_color=(104, 31, 17), centroid_color=(255, 0, 255)
):

@ -301,28 +301,22 @@ def fuse_deconv_and_bn(deconv, bn):
def model_info(model, detailed=False, verbose=True, imgsz=640):
"""
Model information.
imgsz may be int or list, i.e. imgsz=640 or imgsz=[640, 320].
"""
"""Print and return detailed model information layer by layer."""
if not verbose:
return
n_p = get_num_params(model) # number of parameters
n_g = get_num_gradients(model) # number of gradients
n_l = len(list(model.modules())) # number of layers
if detailed:
LOGGER.info(
f"{'layer':>5} {'name':>40} {'gradient':>9} {'parameters':>12} {'shape':>20} {'mu':>10} {'sigma':>10}"
)
LOGGER.info(f"{'layer':>5}{'name':>40}{'gradient':>10}{'parameters':>12}{'shape':>20}{'mu':>10}{'sigma':>10}")
for i, (name, p) in enumerate(model.named_parameters()):
name = name.replace("module_list.", "")
LOGGER.info(
"%5g %40s %9s %12g %20s %10.3g %10.3g %10s"
% (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std(), p.dtype)
f"{i:>5g}{name:>40s}{p.requires_grad!r:>10}{p.numel():>12g}{str(list(p.shape)):>20s}"
f"{p.mean():>10.3g}{p.std():>10.3g}{str(p.dtype):>15s}"
)
flops = get_flops(model, imgsz)
flops = get_flops(model, imgsz) # imgsz may be int or list, i.e. imgsz=640 or imgsz=[640, 320]
fused = " (fused)" if getattr(model, "is_fused", lambda: False)() else ""
fs = f", {flops:.1f} GFLOPs" if flops else ""
yaml_file = getattr(model, "yaml_file", "") or getattr(model, "yaml", {}).get("yaml_file", "")

Loading…
Cancel
Save