Merge branch 'main' into benchmark-format-args

benchmark-format-args
Ultralytics Assistant 6 months ago committed by GitHub
commit 05ab171299
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 10
      .github/workflows/ci.yaml
  2. 86
      .pre-commit-config.yaml
  3. 77
      CONTRIBUTING.md
  4. 2
      README.md
  5. 2
      README.zh-CN.md
  6. 37
      docs/build_docs.py
  7. 2
      docs/en/datasets/detect/lvis.md
  8. 71
      docs/en/help/contributing.md
  9. 16
      docs/en/hub/integrations.md
  10. 16
      docs/en/integrations/index.md
  11. 11
      docs/en/integrations/weights-biases.md
  12. 14
      docs/en/macros/export-args.md
  13. 15
      docs/en/macros/export-table.md
  14. 2
      docs/en/models/yolov7.md
  15. 2
      docs/en/models/yolov9.md
  16. 16
      docs/en/modes/benchmark.md
  17. 31
      docs/en/modes/export.md
  18. 1
      docs/en/modes/predict.md
  19. 17
      docs/en/tasks/classify.md
  20. 16
      docs/en/tasks/detect.md
  21. 17
      docs/en/tasks/obb.md
  22. 17
      docs/en/tasks/pose.md
  23. 17
      docs/en/tasks/segment.md
  24. 14
      docs/en/usage/cfg.md
  25. 16
      docs/en/usage/cli.md
  26. 2
      docs/en/usage/simple-utilities.md
  27. 2
      docs/mkdocs_github_authors.yaml
  28. 97
      docs/overrides/stylesheets/style.css
  29. 175
      examples/YOLOv8-SAHI-Inference-Video/yolov8_sahi.py
  30. 2
      mkdocs.yml
  31. 7
      pyproject.toml
  32. 2
      tests/test_python.py
  33. 2
      ultralytics/__init__.py
  34. 2
      ultralytics/cfg/__init__.py
  35. 6
      ultralytics/data/converter.py
  36. 27
      ultralytics/data/split_dota.py
  37. 24
      ultralytics/engine/results.py
  38. 4
      ultralytics/engine/trainer.py
  39. 2
      ultralytics/models/yolo/detect/train.py
  40. 8
      ultralytics/nn/autobackend.py
  41. 43
      ultralytics/trackers/basetrack.py
  42. 82
      ultralytics/trackers/bot_sort.py
  43. 117
      ultralytics/trackers/byte_tracker.py
  44. 19
      ultralytics/trackers/track.py
  45. 90
      ultralytics/trackers/utils/gmc.py
  46. 193
      ultralytics/trackers/utils/kalman_filter.py
  47. 52
      ultralytics/trackers/utils/matching.py
  48. 2
      ultralytics/utils/__init__.py
  49. 103
      ultralytics/utils/files.py
  50. 14
      ultralytics/utils/plotting.py

@ -56,7 +56,7 @@ jobs:
python-version: "3.x"
cache: "pip" # caching pip dependencies
- name: Install Dependencies
run: pip install tqdm mkdocs-material "mkdocstrings[python]" mkdocs-jupyter mkdocs-redirects mkdocs-ultralytics-plugin
run: pip install tqdm mkdocs-material "mkdocstrings[python]" mkdocs-jupyter mkdocs-redirects mkdocs-ultralytics-plugin mkdocs-macros-plugin
- name: Update Docs Reference Section
run: python docs/build_reference.py
- name: Commit and Push Reference Section Changes
@ -336,6 +336,13 @@ jobs:
channels: conda-forge,defaults
channel-priority: true
activate-environment: anaconda-client-env
- name: Cleanup toolcache
run: |
echo "Free space before deletion:"
df -h /
rm -rf /opt/hostedtoolcache
echo "Free space after deletion:"
df -h /
- name: Install Linux packages
run: |
# Fix cv2 ImportError: 'libEGL.so.1: cannot open shared object file: No such file or directory'
@ -361,6 +368,7 @@ jobs:
yolo val model=yolov8n.pt data=coco8.yaml imgsz=32
yolo export model=yolov8n.pt format=torchscript imgsz=160
- name: Test Python
# Note this step must use the updated default bash environment, not a python environment
run: |
python -c "
from ultralytics import YOLO

@ -1,86 +0,0 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
# Pre-commit hooks. For more information see https://github.com/pre-commit/pre-commit-hooks/blob/main/README.md
# Optionally remove from local hooks with 'rm .git/hooks/pre-commit'
# Define bot property if installed via https://github.com/marketplace/pre-commit-ci
ci:
autofix_prs: true
autoupdate_commit_msg: "[pre-commit.ci] pre-commit suggestions"
autoupdate_schedule: monthly
submodules: true
# Exclude directories (optional)
# exclude: 'docs/'
# Define repos to run
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
hooks:
- id: end-of-file-fixer
- id: trailing-whitespace
- id: check-case-conflict
# - id: check-yaml
- id: check-docstring-first
- id: detect-private-key
- repo: https://github.com/asottile/pyupgrade
rev: v3.15.0
hooks:
- id: pyupgrade
name: Upgrade code
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.1.11
hooks:
- id: ruff
args: [--fix]
- repo: https://github.com/executablebooks/mdformat
rev: 0.7.17
hooks:
- id: mdformat
name: MD formatting
additional_dependencies:
- mdformat-gfm
- mdformat-frontmatter
- mdformat-mkdocs
args:
- --wrap=no
- --number
exclude: 'docs/.*\.md'
# exclude: "README.md|README.zh-CN.md|CONTRIBUTING.md"
- repo: https://github.com/codespell-project/codespell
rev: v2.2.6
hooks:
- id: codespell
exclude: "docs/de|docs/fr|docs/pt|docs/es|docs/mkdocs_de.yml"
args:
- --ignore-words-list=crate,nd,ned,strack,dota,ane,segway,fo,gool,winn,commend,bloc,nam,afterall
- repo: https://github.com/hadialqattan/pycln
rev: v2.4.0
hooks:
- id: pycln
args: [--all]
#
# - repo: https://github.com/PyCQA/docformatter
# rev: v1.7.5
# hooks:
# - id: docformatter
# - repo: https://github.com/asottile/yesqa
# rev: v1.4.0
# hooks:
# - id: yesqa
# - repo: https://github.com/asottile/dead
# rev: v1.5.0
# hooks:
# - id: dead
# - repo: https://github.com/ultralytics/pre-commit
# rev: bd60a414f80a53fb8f593d3bfed4701fc47e4b23
# hooks:
# - id: capitalize-comments

@ -4,9 +4,12 @@ description: Learn how to contribute to Ultralytics YOLO open-source repositorie
keywords: Ultralytics, YOLO, open-source, contribution, pull request, code of conduct, bug reporting, GitHub, CLA, Google-style docstrings
---
# Contributing to Ultralytics Open-Source YOLO Repositories
# Contributing to Ultralytics Open-Source Projects
Thank you for your interest in contributing to Ultralytics open-source YOLO repositories! Your contributions will enhance the project and benefit the entire community. This document provides guidelines and best practices to help you get started.
Welcome! We're thrilled that you're considering contributing to our [Ultralytics](https://ultralytics.com) [open-source](https://github.com/ultralytics) projects. Your involvement not only helps enhance the quality of our repositories but also benefits the entire community. This guide provides clear guidelines and best practices to help you get started.
<a href="https://github.com/ultralytics/ultralytics/graphs/contributors">
<img width="100%" src="https://github.com/ultralytics/assets/raw/main/im/image-contributors.png" alt="Ultralytics open-source contributors"></a>
## Table of Contents
@ -22,29 +25,29 @@ Thank you for your interest in contributing to Ultralytics open-source YOLO repo
## Code of Conduct
All contributors must adhere to the [Code of Conduct](https://docs.ultralytics.com/help/code_of_conduct) to ensure a welcoming and inclusive environment for everyone.
To ensure a welcoming and inclusive environment for everyone, all contributors must adhere to our [Code of Conduct](https://docs.ultralytics.com/help/code_of_conduct). Respect, kindness, and professionalism are at the heart of our community.
## Contributing via Pull Requests
We welcome contributions in the form of pull requests. To streamline the review process, please follow these guidelines:
We greatly appreciate contributions in the form of pull requests. To make the review process as smooth as possible, please follow these steps:
1. **[Fork the repository](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/fork-a-repo)**: Fork the Ultralytics YOLO repository to your GitHub account.
1. **[Fork the repository](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/fork-a-repo):** Start by forking the Ultralytics YOLO repository to your GitHub account.
2. **[Create a branch](https://docs.github.com/en/desktop/making-changes-in-a-branch/managing-branches-in-github-desktop)**: Create a new branch in your forked repository with a descriptive name for your changes.
2. **[Create a branch](https://docs.github.com/en/desktop/making-changes-in-a-branch/managing-branches-in-github-desktop):** Create a new branch in your forked repository with a clear, descriptive name that reflects your changes.
3. **Make your changes**: Ensure that your changes follow the project's coding style and do not introduce new errors or warnings.
3. **Make your changes:** Ensure your code adheres to the project's style guidelines and does not introduce any new errors or warnings.
4. **[Test your changes](https://github.com/ultralytics/ultralytics/tree/main/tests)**: Test your changes locally to ensure they work as expected and do not introduce new issues.
4. **[Test your changes](https://github.com/ultralytics/ultralytics/tree/main/tests):** Before submitting, test your changes locally to confirm they work as expected and don't cause any new issues.
5. **[Commit your changes](https://docs.github.com/en/desktop/making-changes-in-a-branch/committing-and-reviewing-changes-to-your-project-in-github-desktop)**: Commit your changes with a descriptive commit message. Include any relevant issue numbers in your commit message.
5. **[Commit your changes](https://docs.github.com/en/desktop/making-changes-in-a-branch/committing-and-reviewing-changes-to-your-project-in-github-desktop):** Commit your changes with a concise and descriptive commit message. If your changes address a specific issue, include the issue number in your commit message.
6. **[Create a pull request](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request)**: Create a pull request from your forked repository to the main Ultralytics YOLO repository. Provide a clear explanation of your changes and how they improve the project.
6. **[Create a pull request](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request):** Submit a pull request from your forked repository to the main Ultralytics YOLO repository. Provide a clear and detailed explanation of your changes and how they improve the project.
### CLA Signing
Before we can accept your pull request, you must sign a [Contributor License Agreement (CLA)](https://docs.ultralytics.com/help/CLA). This legal document ensures that your contributions are properly licensed and that the project can continue to be distributed under the AGPL-3.0 license.
Before we can merge your pull request, you must sign our [Contributor License Agreement (CLA)](https://docs.ultralytics.com/help/CLA). This legal agreement ensures that your contributions are properly licensed, allowing the project to continue being distributed under the AGPL-3.0 license.
To sign the CLA, follow the instructions provided by the CLA bot after you submit your PR and add a comment in your PR saying:
After submitting your pull request, the CLA bot will guide you through the signing process. To sign the CLA, simply add a comment in your PR stating:
```
I have read the CLA Document and I sign the CLA
@ -52,20 +55,20 @@ I have read the CLA Document and I sign the CLA
### Google-Style Docstrings
When adding new functions or classes, include a [Google-style docstring](https://google.github.io/styleguide/pyguide.html) to provide clear and concise documentation for other developers. This helps ensure your contributions are easy to understand and maintain.
When adding new functions or classes, please include [Google-style docstrings](https://google.github.io/styleguide/pyguide.html). These docstrings provide clear, standardized documentation that helps other developers understand and maintain your code.
#### Google-style
#### Example
This example shows a Google-style docstring. Note that both input and output `types` must always be enclosed by parentheses, i.e., `(bool)`.
This example illustrates a Google-style docstring. Ensure that both input and output `types` are always enclosed in parentheses, e.g., `(bool)`.
```python
def example_function(arg1, arg2=4):
"""
Example function that demonstrates Google-style docstrings.
Example function demonstrating Google-style docstrings.
Args:
arg1 (int): The first argument.
arg2 (int): The second argument. Default value is 4.
arg2 (int): The second argument, with a default value of 4.
Returns:
(bool): True if successful, False otherwise.
@ -78,18 +81,18 @@ def example_function(arg1, arg2=4):
return False
```
#### Google-style with type hints
#### Example with type hints
This example shows both a Google-style docstring and argument and return type hints, though both are not required; one can be used without the other.
This example includes both a Google-style docstring and type hints for arguments and returns, though using either independently is also acceptable.
```python
def example_function(arg1: int, arg2: int = 4) -> bool:
"""
Example function that demonstrates Google-style docstrings.
Example function demonstrating Google-style docstrings.
Args:
arg1: The first argument.
arg2: The second argument. Default value is 4.
arg2: The second argument, with a default value of 4.
Returns:
True if successful, False otherwise.
@ -102,59 +105,61 @@ def example_function(arg1: int, arg2: int = 4) -> bool:
return False
```
#### Single-line
#### Example Single-line
Smaller or simpler functions can utilize a single-line docstring. Note the docstring must use 3 double-quotes and be a complete sentence starting with a capital letter and ending with a period.
For smaller or simpler functions, a single-line docstring may be sufficient. The docstring must use three double-quotes, be a complete sentence, start with a capital letter, and end with a period.
```python
def example_small_function(arg1: int, arg2: int = 4) -> bool:
"""Example function that demonstrates a single-line docstring."""
"""Example function with a single-line docstring."""
return arg1 == arg2
```
### GitHub Actions CI Tests
Before your pull request can be merged, all GitHub Actions [Continuous Integration](https://docs.ultralytics.com/help/CI) (CI) tests must pass. These tests include linting, unit tests, and other checks to ensure your changes meet the project's quality standards. Review the output of the GitHub Actions and fix any issues.
All pull requests must pass the GitHub Actions [Continuous Integration](https://docs.ultralytics.com/help/CI) (CI) tests before they can be merged. These tests include linting, unit tests, and other checks to ensure that your changes meet the project's quality standards. Review the CI output and address any issues that arise.
## Reporting Bugs
We appreciate bug reports as they play a crucial role in maintaining the project's quality. When reporting bugs, it is important to provide a [Minimum Reproducible Example](https://docs.ultralytics.com/help/minimum_reproducible_example): a clear, concise code example that replicates the issue. This helps in quick identification and resolution of the bug.
We highly value bug reports as they help us maintain the quality of our projects. When reporting a bug, please provide a [Minimum Reproducible Example](https://docs.ultralytics.com/help/minimum_reproducible_example)—a simple, clear code example that consistently reproduces the issue. This allows us to quickly identify and resolve the problem.
## License
Ultralytics embraces the [GNU Affero General Public License v3.0 (AGPL-3.0)](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) for its repositories, promoting openness, transparency, and collaborative enhancement in software development. This strong copyleft license ensures that all users and developers retain the freedom to use, modify, and share the software. It fosters community collaboration, ensuring that any improvements remain accessible to all.
Ultralytics uses the [GNU Affero General Public License v3.0 (AGPL-3.0)](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) for its repositories. This license promotes openness, transparency, and collaborative improvement in software development. It ensures that all users have the freedom to use, modify, and share the software, fostering a strong community of collaboration and innovation.
Users and developers are encouraged to familiarize themselves with the terms of AGPL-3.0 to contribute effectively and ethically to the Ultralytics open-source community.
We encourage all contributors to familiarize themselves with the terms of the AGPL-3.0 license to contribute effectively and ethically to the Ultralytics open-source community.
## Conclusion
Thank you for your interest in contributing to [Ultralytics open-source](https://github.com/ultralytics) YOLO projects. Your participation is crucial in shaping the future of our software and fostering a community of innovation and collaboration. Whether you're improving code, reporting bugs, or suggesting features, your contributions make a significant impact.
Thank you for your interest in contributing to [Ultralytics](https://ultralytics.com) [open-source](https://github.com/ultralytics) YOLO projects. Your participation is essential in shaping the future of our software and building a vibrant community of innovation and collaboration. Whether you're enhancing code, reporting bugs, or suggesting new features, your contributions are invaluable.
We look forward to seeing your ideas in action and appreciate your commitment to advancing object detection technology. Let's continue to grow and innovate together in this exciting open-source journey. Happy coding! 🚀🌟
We're excited to see your ideas come to life and appreciate your commitment to advancing object detection technology. Together, let's continue to grow and innovate in this exciting open-source journey. Happy coding! 🚀🌟
## FAQ
### Why should I contribute to Ultralytics YOLO open-source repositories?
Contributing to Ultralytics YOLO open-source repositories helps improve the software, making it more robust and feature-rich for the entire community. Contributions can include code enhancements, bug fixes, documentation improvements, and new feature implementations. Additionally, contributing offers the chance to collaborate with other skilled developers and experts in the field, boosting your own skills and reputation. For information on how to get started, refer to the [Contributing via Pull Requests](#contributing-via-pull-requests) section.
Contributing to Ultralytics YOLO open-source repositories improves the software, making it more robust and feature-rich for the entire community. Contributions can include code enhancements, bug fixes, documentation improvements, and new feature implementations. Additionally, contributing allows you to collaborate with other skilled developers and experts in the field, enhancing your own skills and reputation. For details on how to get started, refer to the [Contributing via Pull Requests](#contributing-via-pull-requests) section.
### How do I sign the Contributor License Agreement (CLA) for Ultralytics YOLO?
To sign the Contributor License Agreement (CLA), follow the instructions provided by the CLA bot after submitting your pull request. This will ensure your contributions are properly licensed under the AGPL-3.0 license, maintaining the legal integrity of the open-source project. Add a comment in your pull request mentioning:
To sign the Contributor License Agreement (CLA), follow the instructions provided by the CLA bot after submitting your pull request. This process ensures that your contributions are properly licensed under the AGPL-3.0 license, maintaining the legal integrity of the open-source project. Add a comment in your pull request stating:
```
I have read the CLA Document and I sign the CLA
I have read the CLA Document and I sign the CLA.
```
For more information, see the [CLA Signing](#cla-signing) section.
### What are Google-style docstrings and why are they required for Ultralytics YOLO contributions?
### What are Google-style docstrings, and why are they required for Ultralytics YOLO contributions?
Google-style docstrings provide clear and concise documentation for functions and classes, enhancing readability and maintainability of the code. These docstrings outline the function's purpose, arguments, and return values with specific formatting rules. When contributing to Ultralytics YOLO, adhering to Google-style docstrings ensures that your additions are comprehensible and well-documented. For examples and guidelines, visit the [Google-Style Docstrings](#google-style-docstrings) section.
Google-style docstrings provide clear, concise documentation for functions and classes, improving code readability and maintainability. These docstrings outline the function's purpose, arguments, and return values with specific formatting rules. When contributing to Ultralytics YOLO, following Google-style docstrings ensures that your additions are well-documented and easily understood. For examples and guidelines, visit the [Google-Style Docstrings](#google-style-docstrings) section.
### How can I ensure my changes pass the GitHub Actions CI tests?
Before your pull request is merged, it must pass all GitHub Actions Continuous Integration (CI) tests. These tests include linting, unit tests, and other checks to ensure the code meets the project's quality standards. Review the output of the GitHub Actions and address any issues. For detailed information on the CI process and troubleshooting tips, see the [GitHub Actions CI Tests](#github-actions-ci-tests) section.
Before your pull request can be merged, it must pass all GitHub Actions Continuous Integration (CI) tests. These tests include linting, unit tests, and other checks to ensure the code meets
the project's quality standards. Review the CI output and fix any issues. For detailed information on the CI process and troubleshooting tips, see the [GitHub Actions CI Tests](#github-actions-ci-tests) section.
### How do I report a bug in Ultralytics YOLO repositories?

@ -265,7 +265,7 @@ We love your input! YOLOv5 and YOLOv8 would not be possible without help from ou
<!-- SVG image from https://opencollective.com/ultralytics/contributors.svg?width=990 -->
<a href="https://github.com/ultralytics/yolov5/graphs/contributors">
<a href="https://github.com/ultralytics/ultralytics/graphs/contributors">
<img width="100%" src="https://github.com/ultralytics/assets/raw/main/im/image-contributors.png" alt="Ultralytics open-source contributors"></a>
## <div align="center">License</div>

@ -267,7 +267,7 @@ Ultralytics 提供了 YOLOv8 的交互式笔记本,涵盖训练、验证、跟
<!-- SVG image from https://opencollective.com/ultralytics/contributors.svg?width=990 -->
<a href="https://github.com/ultralytics/yolov5/graphs/contributors">
<a href="https://github.com/ultralytics/ultralytics/graphs/contributors">
<img width="100%" src="https://github.com/ultralytics/assets/raw/main/im/image-contributors.png" alt="Ultralytics open-source contributors"></a>
## <div align="center">许可证</div>

@ -123,7 +123,7 @@ def update_markdown_files(md_filepath: Path):
content = content.replace("", "'").replace("", "'")
# Add frontmatter if missing
if not content.strip().startswith("---\n"):
if not content.strip().startswith("---\n") and "macros" not in md_filepath.parts: # skip macros directory
header = "---\ncomments: true\ndescription: TODO ADD DESCRIPTION\nkeywords: TODO ADD KEYWORDS\n---\n\n"
content = header + content
@ -153,6 +153,8 @@ def update_markdown_files(md_filepath: Path):
def update_docs_html():
"""Updates titles, edit links, head sections, and converts plaintext links in HTML documentation."""
# Update 404 titles
update_page_title(SITE / "404.html", new_title="Ultralytics Docs - Not Found")
# Update edit links
@ -178,6 +180,12 @@ def update_docs_html():
if any(script):
update_html_head(script)
# Delete the /macros directory from the built site
macros_dir = SITE / "macros"
if macros_dir.exists():
print(f"Removing /macros directory from site: {macros_dir}")
shutil.rmtree(macros_dir)
def convert_plaintext_links_to_html(content):
"""Convert plaintext links to HTML hyperlinks in the main content area only."""
@ -205,6 +213,32 @@ def convert_plaintext_links_to_html(content):
return str(soup) if modified else content
def remove_macros():
# Delete the /macros directory and sitemap.xml.gz from the built site
shutil.rmtree(SITE / "macros", ignore_errors=True)
(SITE / "sitemap.xml.gz").unlink(missing_ok=True)
# Process sitemap.xml
sitemap = SITE / "sitemap.xml"
lines = sitemap.read_text(encoding="utf-8").splitlines(keepends=True)
# Find indices of '/macros/' lines
macros_indices = [i for i, line in enumerate(lines) if "/macros/" in line]
# Create a set of indices to remove (including lines before and after)
indices_to_remove = set()
for i in macros_indices:
indices_to_remove.update(range(i - 1, i + 4)) # i-1, i, i+1, i+2, i+3
# Create new list of lines, excluding the ones to remove
new_lines = [line for i, line in enumerate(lines) if i not in indices_to_remove]
# Write the cleaned content back to the file
sitemap.write_text("".join(new_lines), encoding="utf-8")
print(f"Removed {len(macros_indices)} URLs containing '/macros/' from {sitemap}")
def main():
"""Builds docs, updates titles and edit links, and prints local server command."""
prepare_docs_markdown()
@ -212,6 +246,7 @@ def main():
# Build the main documentation
print(f"Building docs from {DOCS}")
subprocess.run(f"mkdocs build -f {DOCS.parent}/mkdocs.yml --strict", check=True, shell=True)
remove_macros()
print(f"Site built at {SITE}")
# Update docs HTML pages

@ -99,7 +99,7 @@ If you use the LVIS dataset in your research or development work, please cite th
```bibtex
@inproceedings{gupta2019lvis,
title={{LVIS}: A Dataset for Large Vocabulary Instance Segmentation},
title={LVIS: A Dataset for Large Vocabulary Instance Segmentation},
author={Gupta, Agrim and Dollar, Piotr and Girshick, Ross},
booktitle={Proceedings of the {IEEE} Conference on Computer Vision and Pattern Recognition},
year={2019}

@ -4,9 +4,12 @@ description: Learn how to contribute to Ultralytics YOLO open-source repositorie
keywords: Ultralytics, YOLO, open-source, contribution, pull request, code of conduct, bug reporting, GitHub, CLA, Google-style docstrings
---
# Contributing to Ultralytics Open-Source YOLO Repositories
# Contributing to Ultralytics Open-Source Projects
Thank you for your interest in contributing to Ultralytics open-source YOLO repositories! Your contributions will enhance the project and benefit the entire community. This document provides guidelines and best practices to help you get started.
Welcome! We're thrilled that you're considering contributing to our [Ultralytics](https://ultralytics.com) [open-source](https://github.com/ultralytics) projects. Your involvement not only helps enhance the quality of our repositories but also benefits the entire community. This guide provides clear guidelines and best practices to help you get started.
<a href="https://github.com/ultralytics/ultralytics/graphs/contributors">
<img width="100%" src="https://github.com/ultralytics/assets/raw/main/im/image-contributors.png" alt="Ultralytics open-source contributors"></a>
## Table of Contents
@ -22,29 +25,29 @@ Thank you for your interest in contributing to Ultralytics open-source YOLO repo
## Code of Conduct
All contributors must adhere to the [Code of Conduct](https://docs.ultralytics.com/help/code_of_conduct) to ensure a welcoming and inclusive environment for everyone.
To ensure a welcoming and inclusive environment for everyone, all contributors must adhere to our [Code of Conduct](https://docs.ultralytics.com/help/code_of_conduct). Respect, kindness, and professionalism are at the heart of our community.
## Contributing via Pull Requests
We welcome contributions in the form of pull requests. To streamline the review process, please follow these guidelines:
We greatly appreciate contributions in the form of pull requests. To make the review process as smooth as possible, please follow these steps:
1. **[Fork the repository](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/fork-a-repo)**: Fork the Ultralytics YOLO repository to your GitHub account.
1. **[Fork the repository](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/fork-a-repo):** Start by forking the Ultralytics YOLO repository to your GitHub account.
2. **[Create a branch](https://docs.github.com/en/desktop/making-changes-in-a-branch/managing-branches-in-github-desktop)**: Create a new branch in your forked repository with a descriptive name for your changes.
2. **[Create a branch](https://docs.github.com/en/desktop/making-changes-in-a-branch/managing-branches-in-github-desktop):** Create a new branch in your forked repository with a clear, descriptive name that reflects your changes.
3. **Make your changes**: Ensure that your changes follow the project's coding style and do not introduce new errors or warnings.
3. **Make your changes:** Ensure your code adheres to the project's style guidelines and does not introduce any new errors or warnings.
4. **[Test your changes](https://github.com/ultralytics/ultralytics/tree/main/tests)**: Test your changes locally to ensure they work as expected and do not introduce new issues.
4. **[Test your changes](https://github.com/ultralytics/ultralytics/tree/main/tests):** Before submitting, test your changes locally to confirm they work as expected and don't cause any new issues.
5. **[Commit your changes](https://docs.github.com/en/desktop/making-changes-in-a-branch/committing-and-reviewing-changes-to-your-project-in-github-desktop)**: Commit your changes with a descriptive commit message. Include any relevant issue numbers in your commit message.
5. **[Commit your changes](https://docs.github.com/en/desktop/making-changes-in-a-branch/committing-and-reviewing-changes-to-your-project-in-github-desktop):** Commit your changes with a concise and descriptive commit message. If your changes address a specific issue, include the issue number in your commit message.
6. **[Create a pull request](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request)**: Create a pull request from your forked repository to the main Ultralytics YOLO repository. Provide a clear explanation of your changes and how they improve the project.
6. **[Create a pull request](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request):** Submit a pull request from your forked repository to the main Ultralytics YOLO repository. Provide a clear and detailed explanation of your changes and how they improve the project.
### CLA Signing
Before we can accept your pull request, you must sign a [Contributor License Agreement (CLA)](https://docs.ultralytics.com/help/CLA). This legal document ensures that your contributions are properly licensed and that the project can continue to be distributed under the AGPL-3.0 license.
Before we can merge your pull request, you must sign our [Contributor License Agreement (CLA)](https://docs.ultralytics.com/help/CLA). This legal agreement ensures that your contributions are properly licensed, allowing the project to continue being distributed under the AGPL-3.0 license.
To sign the CLA, follow the instructions provided by the CLA bot after you submit your PR and add a comment in your PR saying:
After submitting your pull request, the CLA bot will guide you through the signing process. To sign the CLA, simply add a comment in your PR stating:
```
I have read the CLA Document and I sign the CLA
@ -52,22 +55,22 @@ I have read the CLA Document and I sign the CLA
### Google-Style Docstrings
When adding new functions or classes, include a [Google-style docstring](https://google.github.io/styleguide/pyguide.html) to provide clear and concise documentation for other developers. This helps ensure your contributions are easy to understand and maintain.
When adding new functions or classes, please include [Google-style docstrings](https://google.github.io/styleguide/pyguide.html). These docstrings provide clear, standardized documentation that helps other developers understand and maintain your code.
!!! Example "Example Docstrings"
=== "Google-style"
This example shows a Google-style docstring. Note that both input and output `types` must always be enclosed by parentheses, i.e., `(bool)`.
This example illustrates a Google-style docstring. Ensure that both input and output `types` are always enclosed in parentheses, e.g., `(bool)`.
```python
def example_function(arg1, arg2=4):
"""
Example function that demonstrates Google-style docstrings.
Example function demonstrating Google-style docstrings.
Args:
arg1 (int): The first argument.
arg2 (int): The second argument. Default value is 4.
arg2 (int): The second argument, with a default value of 4.
Returns:
(bool): True if successful, False otherwise.
@ -82,16 +85,16 @@ When adding new functions or classes, include a [Google-style docstring](https:/
=== "Google-style with type hints"
This example shows both a Google-style docstring and argument and return type hints, though both are not required; one can be used without the other.
This example includes both a Google-style docstring and type hints for arguments and returns, though using either independently is also acceptable.
```python
def example_function(arg1: int, arg2: int = 4) -> bool:
"""
Example function that demonstrates Google-style docstrings.
Example function demonstrating Google-style docstrings.
Args:
arg1: The first argument.
arg2: The second argument. Default value is 4.
arg2: The second argument, with a default value of 4.
Returns:
True if successful, False otherwise.
@ -106,57 +109,59 @@ When adding new functions or classes, include a [Google-style docstring](https:/
=== "Single-line"
Smaller or simpler functions can utilize a single-line docstring. Note the docstring must use 3 double-quotes and be a complete sentence starting with a capital letter and ending with a period.
For smaller or simpler functions, a single-line docstring may be sufficient. The docstring must use three double-quotes, be a complete sentence, start with a capital letter, and end with a period.
```python
def example_small_function(arg1: int, arg2: int = 4) -> bool:
"""Example function that demonstrates a single-line docstring."""
"""Example function with a single-line docstring."""
return arg1 == arg2
```
### GitHub Actions CI Tests
Before your pull request can be merged, all GitHub Actions [Continuous Integration](https://docs.ultralytics.com/help/CI) (CI) tests must pass. These tests include linting, unit tests, and other checks to ensure your changes meet the project's quality standards. Review the output of the GitHub Actions and fix any issues.
All pull requests must pass the GitHub Actions [Continuous Integration](https://docs.ultralytics.com/help/CI) (CI) tests before they can be merged. These tests include linting, unit tests, and other checks to ensure that your changes meet the project's quality standards. Review the CI output and address any issues that arise.
## Reporting Bugs
We appreciate bug reports as they play a crucial role in maintaining the project's quality. When reporting bugs, it is important to provide a [Minimum Reproducible Example](https://docs.ultralytics.com/help/minimum_reproducible_example): a clear, concise code example that replicates the issue. This helps in quick identification and resolution of the bug.
We highly value bug reports as they help us maintain the quality of our projects. When reporting a bug, please provide a [Minimum Reproducible Example](https://docs.ultralytics.com/help/minimum_reproducible_example)—a simple, clear code example that consistently reproduces the issue. This allows us to quickly identify and resolve the problem.
## License
Ultralytics embraces the [GNU Affero General Public License v3.0 (AGPL-3.0)](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) for its repositories, promoting openness, transparency, and collaborative enhancement in software development. This strong copyleft license ensures that all users and developers retain the freedom to use, modify, and share the software. It fosters community collaboration, ensuring that any improvements remain accessible to all.
Ultralytics uses the [GNU Affero General Public License v3.0 (AGPL-3.0)](https://github.com/ultralytics/ultralytics/blob/main/LICENSE) for its repositories. This license promotes openness, transparency, and collaborative improvement in software development. It ensures that all users have the freedom to use, modify, and share the software, fostering a strong community of collaboration and innovation.
Users and developers are encouraged to familiarize themselves with the terms of AGPL-3.0 to contribute effectively and ethically to the Ultralytics open-source community.
We encourage all contributors to familiarize themselves with the terms of the AGPL-3.0 license to contribute effectively and ethically to the Ultralytics open-source community.
## Conclusion
Thank you for your interest in contributing to [Ultralytics open-source](https://github.com/ultralytics) YOLO projects. Your participation is crucial in shaping the future of our software and fostering a community of innovation and collaboration. Whether you're improving code, reporting bugs, or suggesting features, your contributions make a significant impact.
Thank you for your interest in contributing to [Ultralytics](https://ultralytics.com) [open-source](https://github.com/ultralytics) YOLO projects. Your participation is essential in shaping the future of our software and building a vibrant community of innovation and collaboration. Whether you're enhancing code, reporting bugs, or suggesting new features, your contributions are invaluable.
We look forward to seeing your ideas in action and appreciate your commitment to advancing object detection technology. Let's continue to grow and innovate together in this exciting open-source journey. Happy coding! 🚀🌟
We're excited to see your ideas come to life and appreciate your commitment to advancing object detection technology. Together, let's continue to grow and innovate in this exciting open-source journey. Happy coding! 🚀🌟
## FAQ
### Why should I contribute to Ultralytics YOLO open-source repositories?
Contributing to Ultralytics YOLO open-source repositories helps improve the software, making it more robust and feature-rich for the entire community. Contributions can include code enhancements, bug fixes, documentation improvements, and new feature implementations. Additionally, contributing offers the chance to collaborate with other skilled developers and experts in the field, boosting your own skills and reputation. For information on how to get started, refer to the [Contributing via Pull Requests](#contributing-via-pull-requests) section.
Contributing to Ultralytics YOLO open-source repositories improves the software, making it more robust and feature-rich for the entire community. Contributions can include code enhancements, bug fixes, documentation improvements, and new feature implementations. Additionally, contributing allows you to collaborate with other skilled developers and experts in the field, enhancing your own skills and reputation. For details on how to get started, refer to the [Contributing via Pull Requests](#contributing-via-pull-requests) section.
### How do I sign the Contributor License Agreement (CLA) for Ultralytics YOLO?
To sign the Contributor License Agreement (CLA), follow the instructions provided by the CLA bot after submitting your pull request. This will ensure your contributions are properly licensed under the AGPL-3.0 license, maintaining the legal integrity of the open-source project. Add a comment in your pull request mentioning:
To sign the Contributor License Agreement (CLA), follow the instructions provided by the CLA bot after submitting your pull request. This process ensures that your contributions are properly licensed under the AGPL-3.0 license, maintaining the legal integrity of the open-source project. Add a comment in your pull request stating:
```
I have read the CLA Document and I sign the CLA
I have read the CLA Document and I sign the CLA.
```
For more information, see the [CLA Signing](#cla-signing) section.
### What are Google-style docstrings and why are they required for Ultralytics YOLO contributions?
### What are Google-style docstrings, and why are they required for Ultralytics YOLO contributions?
Google-style docstrings provide clear and concise documentation for functions and classes, enhancing readability and maintainability of the code. These docstrings outline the function's purpose, arguments, and return values with specific formatting rules. When contributing to Ultralytics YOLO, adhering to Google-style docstrings ensures that your additions are comprehensible and well-documented. For examples and guidelines, visit the [Google-Style Docstrings](#google-style-docstrings) section.
Google-style docstrings provide clear, concise documentation for functions and classes, improving code readability and maintainability. These docstrings outline the function's purpose, arguments, and return values with specific formatting rules. When contributing to Ultralytics YOLO, following Google-style docstrings ensures that your additions are well-documented and easily understood. For examples and guidelines, visit the [Google-Style Docstrings](#google-style-docstrings) section.
### How can I ensure my changes pass the GitHub Actions CI tests?
Before your pull request is merged, it must pass all GitHub Actions Continuous Integration (CI) tests. These tests include linting, unit tests, and other checks to ensure the code meets the project's quality standards. Review the output of the GitHub Actions and address any issues. For detailed information on the CI process and troubleshooting tips, see the [GitHub Actions CI Tests](#github-actions-ci-tests) section.
Before your pull request can be merged, it must pass all GitHub Actions Continuous Integration (CI) tests. These tests include linting, unit tests, and other checks to ensure the code meets
the project's quality standards. Review the CI output and fix any issues. For detailed information on the CI process and troubleshooting tips, see the [GitHub Actions CI Tests](#github-actions-ci-tests) section.
### How do I report a bug in Ultralytics YOLO repositories?

@ -102,21 +102,7 @@ After you [train a model](./models.md#train-model), you can [export it](./models
The available export formats are presented in the table below.
| Format | `format` Argument | Model | Metadata | Arguments |
| ------------------------------------------------- | ----------------- | ------------------------- | -------- | -------------------------------------------------------------------- |
| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - |
| [TorchScript](../integrations/torchscript.md) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize`, `batch` |
| [ONNX](../integrations/onnx.md) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset`, `batch` |
| [OpenVINO](../integrations/openvino.md) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` |
| [TensorRT](../integrations/tensorrt.md) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace`, `int8`, `batch` |
| [CoreML](../integrations/coreml.md) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms`, `batch` |
| [TF SavedModel](../integrations/tf-savedmodel.md) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras`, `int8`, `batch` |
| [TF GraphDef](../integrations/tf-graphdef.md) | `pb` | `yolov8n.pb` | ❌ | `imgsz`, `batch` |
| [TF Lite](../integrations/tflite.md) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8`, `batch` |
| [TF Edge TPU](../integrations/edge-tpu.md) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz`, `batch` |
| [TF.js](../integrations/tfjs.md) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` |
| [PaddlePaddle](../integrations/paddlepaddle.md) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz`, `batch` |
| [NCNN](../integrations/ncnn.md) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half`, `batch` |
{% include "macros/export-table.md" %}
## Exciting New Features on the Way 🎉

@ -91,21 +91,7 @@ Welcome to the Ultralytics Integrations page! This page provides an overview of
We also support a variety of model export formats for deployment in different environments. Here are the available formats:
| Format | `format` Argument | Model | Metadata | Arguments |
| ------------------------------------------------- | ----------------- | ------------------------- | -------- | -------------------------------------------------------------------- |
| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - |
| [TorchScript](../integrations/torchscript.md) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize`, `batch` |
| [ONNX](../integrations/onnx.md) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset`, `batch` |
| [OpenVINO](../integrations/openvino.md) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` |
| [TensorRT](../integrations/tensorrt.md) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace`, `int8`, `batch` |
| [CoreML](../integrations/coreml.md) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms`, `batch` |
| [TF SavedModel](../integrations/tf-savedmodel.md) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras`, `int8`, `batch` |
| [TF GraphDef](../integrations/tf-graphdef.md) | `pb` | `yolov8n.pb` | ❌ | `imgsz`, `batch` |
| [TF Lite](../integrations/tflite.md) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8`, `batch` |
| [TF Edge TPU](../integrations/edge-tpu.md) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz` |
| [TF.js](../integrations/tfjs.md) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` |
| [PaddlePaddle](../integrations/paddlepaddle.md) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz`, `batch` |
| [NCNN](../integrations/ncnn.md) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half`, `batch` |
{% include "macros/export-table.md" %}
Explore the links to learn more about each integration and how to get the most out of them with Ultralytics. See full `export` details in the [Export](../modes/export.md) page.

@ -8,6 +8,17 @@ keywords: YOLOv8, Weights & Biases, model training, experiment tracking, Ultraly
Object detection models like [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics) have become integral to many computer vision applications. However, training, evaluating, and deploying these complex models introduces several challenges. Tracking key training metrics, comparing model variants, analyzing model behavior, and detecting issues require substantial instrumentation and experiment management.
<p align="center">
<br>
<iframe loading="lazy" width="720" height="405" src="https://www.youtube.com/embed/EeDd5P4eS6A"
title="YouTube video player" frameborder="0"
allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share"
allowfullscreen>
</iframe>
<br>
<strong>Watch:</strong> How to use Ultralytics YOLOv8 with Weights and Biases
</p>
This guide showcases Ultralytics YOLOv8 integration with Weights & Biases' for enhanced experiment tracking, model-checkpointing, and visualization of model performance. It also includes instructions for setting up the integration, training, fine-tuning, and visualizing results using Weights & Biases' interactive features.
## Weights & Biases

@ -0,0 +1,14 @@
| Argument | Type | Default | Description |
| ----------- | ---------------- | --------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `format` | `str` | `'torchscript'` | Target format for the exported model, such as `'onnx'`, `'torchscript'`, `'tensorflow'`, or others, defining compatibility with various deployment environments. |
| `imgsz` | `int` or `tuple` | `640` | Desired image size for the model input. Can be an integer for square images or a tuple `(height, width)` for specific dimensions. |
| `keras` | `bool` | `False` | Enables export to Keras format for TensorFlow SavedModel, providing compatibility with TensorFlow serving and APIs. |
| `optimize` | `bool` | `False` | Applies optimization for mobile devices when exporting to TorchScript, potentially reducing model size and improving performance. |
| `half` | `bool` | `False` | Enables FP16 (half-precision) quantization, reducing model size and potentially speeding up inference on supported hardware. |
| `int8` | `bool` | `False` | Activates INT8 quantization, further compressing the model and speeding up inference with minimal accuracy loss, primarily for edge devices. |
| `dynamic` | `bool` | `False` | Allows dynamic input sizes for ONNX, TensorRT and OpenVINO exports, enhancing flexibility in handling varying image dimensions. |
| `simplify` | `bool` | `False` | Simplifies the model graph for ONNX exports with `onnxslim`, potentially improving performance and compatibility. |
| `opset` | `int` | `None` | Specifies the ONNX opset version for compatibility with different ONNX parsers and runtimes. If not set, uses the latest supported version. |
| `workspace` | `float` | `4.0` | Sets the maximum workspace size in GiB for TensorRT optimizations, balancing memory usage and performance. |
| `nms` | `bool` | `False` | Adds Non-Maximum Suppression (NMS) to the CoreML export, essential for accurate and efficient detection post-processing. |
| `batch` | `int` | `1` | Specifies export model batch inference size or the max number of images the exported model will process concurrently in `predict` mode. |

@ -0,0 +1,15 @@
| Format | `format` Argument | Model | Metadata | Arguments |
| ------------------------------------------------- | ----------------- | ----------------------------------------------- | -------- | -------------------------------------------------------------------- |
| [PyTorch](https://pytorch.org/) | - | `{{ model_name or "yolov8n" }}.pt` | ✅ | - |
| [TorchScript](../integrations/torchscript.md) | `torchscript` | `{{ model_name or "yolov8n" }}.torchscript` | ✅ | `imgsz`, `optimize`, `batch` |
| [ONNX](../integrations/onnx.md) | `onnx` | `{{ model_name or "yolov8n" }}.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset`, `batch` |
| [OpenVINO](../integrations/openvino.md) | `openvino` | `{{ model_name or "yolov8n" }}_openvino_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` |
| [TensorRT](../integrations/tensorrt.md) | `engine` | `{{ model_name or "yolov8n" }}.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace`, `int8`, `batch` |
| [CoreML](../integrations/coreml.md) | `coreml` | `{{ model_name or "yolov8n" }}.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms`, `batch` |
| [TF SavedModel](../integrations/tf-savedmodel.md) | `saved_model` | `{{ model_name or "yolov8n" }}_saved_model/` | ✅ | `imgsz`, `keras`, `int8`, `batch` |
| [TF GraphDef](../integrations/tf-graphdef.md) | `pb` | `{{ model_name or "yolov8n" }}.pb` | ❌ | `imgsz`, `batch` |
| [TF Lite](../integrations/tflite.md) | `tflite` | `{{ model_name or "yolov8n" }}.tflite` | ✅ | `imgsz`, `half`, `int8`, `batch` |
| [TF Edge TPU](../integrations/edge-tpu.md) | `edgetpu` | `{{ model_name or "yolov8n" }}_edgetpu.tflite` | ✅ | `imgsz` |
| [TF.js](../integrations/tfjs.md) | `tfjs` | `{{ model_name or "yolov8n" }}_web_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` |
| [PaddlePaddle](../integrations/paddlepaddle.md) | `paddle` | `{{ model_name or "yolov8n" }}_paddle_model/` | ✅ | `imgsz`, `batch` |
| [NCNN](../integrations/ncnn.md) | `ncnn` | `{{ model_name or "yolov8n" }}_ncnn_model/` | ✅ | `imgsz`, `half`, `batch` |

@ -104,7 +104,7 @@ We would like to acknowledge the YOLOv7 authors for their significant contributi
```bibtex
@article{wang2022yolov7,
title={{YOLOv7}: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors},
title={YOLOv7: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors},
author={Wang, Chien-Yao and Bochkovskiy, Alexey and Liao, Hong-Yuan Mark},
journal={arXiv preprint arXiv:2207.02696},
year={2022}

@ -190,7 +190,7 @@ We would like to acknowledge the YOLOv9 authors for their significant contributi
```bibtex
@article{wang2024yolov9,
title={{YOLOv9}: Learning What You Want to Learn Using Programmable Gradient Information},
title={YOLOv9: Learning What You Want to Learn Using Programmable Gradient Information},
author={Wang, Chien-Yao and Liao, Hong-Yuan Mark},
booktitle={arXiv preprint arXiv:2402.13616},
year={2024}

@ -88,21 +88,7 @@ Arguments such as `model`, `data`, `imgsz`, `half`, `device`, and `verbose` prov
Benchmarks will attempt to run automatically on all possible export formats below.
| Format | `format` Argument | Model | Metadata | Arguments |
| ------------------------------------------------- | ----------------- | ------------------------- | -------- | -------------------------------------------------------------------- |
| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - |
| [TorchScript](../integrations/torchscript.md) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize`, `batch` |
| [ONNX](../integrations/onnx.md) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset`, `batch` |
| [OpenVINO](../integrations/openvino.md) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half`, `int8`, `batch`, `dynamic` |
| [TensorRT](../integrations/tensorrt.md) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace`, `int8`, `batch` |
| [CoreML](../integrations/coreml.md) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms`, `batch` |
| [TF SavedModel](../integrations/tf-savedmodel.md) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras`, `int8`, `batch` |
| [TF GraphDef](../integrations/tf-graphdef.md) | `pb` | `yolov8n.pb` | ❌ | `imgsz`, `batch` |
| [TF Lite](../integrations/tflite.md) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8`, `batch` |
| [TF Edge TPU](../integrations/edge-tpu.md) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz` |
| [TF.js](../integrations/tfjs.md) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` |
| [PaddlePaddle](../integrations/paddlepaddle.md) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz`, `batch` |
| [NCNN](../integrations/ncnn.md) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half`, `batch` |
{% include "macros/export-table.md" %}
See full `export` details in the [Export](../modes/export.md) page.

@ -74,20 +74,7 @@ Export a YOLOv8n model to a different format like ONNX or TensorRT. See Argument
This table details the configurations and options available for exporting YOLO models to different formats. These settings are critical for optimizing the exported model's performance, size, and compatibility across various platforms and environments. Proper configuration ensures that the model is ready for deployment in the intended application with optimal efficiency.
| Argument | Type | Default | Description |
| ----------- | ---------------- | --------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `format` | `str` | `'torchscript'` | Target format for the exported model, such as `'onnx'`, `'torchscript'`, `'tensorflow'`, or others, defining compatibility with various deployment environments. |
| `imgsz` | `int` or `tuple` | `640` | Desired image size for the model input. Can be an integer for square images or a tuple `(height, width)` for specific dimensions. |
| `keras` | `bool` | `False` | Enables export to Keras format for TensorFlow SavedModel, providing compatibility with TensorFlow serving and APIs. |
| `optimize` | `bool` | `False` | Applies optimization for mobile devices when exporting to TorchScript, potentially reducing model size and improving performance. |
| `half` | `bool` | `False` | Enables FP16 (half-precision) quantization, reducing model size and potentially speeding up inference on supported hardware. |
| `int8` | `bool` | `False` | Activates INT8 quantization, further compressing the model and speeding up inference with minimal accuracy loss, primarily for edge devices. |
| `dynamic` | `bool` | `False` | Allows dynamic input sizes for ONNX, TensorRT and OpenVINO exports, enhancing flexibility in handling varying image dimensions. |
| `simplify` | `bool` | `False` | Simplifies the model graph for ONNX exports with `onnxslim`, potentially improving performance and compatibility. |
| `opset` | `int` | `None` | Specifies the ONNX opset version for compatibility with different ONNX parsers and runtimes. If not set, uses the latest supported version. |
| `workspace` | `float` | `4.0` | Sets the maximum workspace size in GiB for TensorRT optimizations, balancing memory usage and performance. |
| `nms` | `bool` | `False` | Adds Non-Maximum Suppression (NMS) to the CoreML export, essential for accurate and efficient detection post-processing. |
| `batch` | `int` | `1` | Specifies export model batch inference size or the max number of images the exported model will process concurrently in `predict` mode. |
{% include "macros/export-args.md" %}
Adjusting these parameters allows for customization of the export process to fit specific requirements, such as deployment environment, hardware constraints, and performance targets. Selecting the appropriate format and settings is essential for achieving the best balance between model size, speed, and accuracy.
@ -95,21 +82,7 @@ Adjusting these parameters allows for customization of the export process to fit
Available YOLOv8 export formats are in the table below. You can export to any format using the `format` argument, i.e. `format='onnx'` or `format='engine'`. You can predict or validate directly on exported models, i.e. `yolo predict model=yolov8n.onnx`. Usage examples are shown for your model after export completes.
| Format | `format` Argument | Model | Metadata | Arguments |
| ------------------------------------------------- | ----------------- | ------------------------- | -------- | -------------------------------------------------------------------- |
| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - |
| [TorchScript](../integrations/torchscript.md) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize`, `batch` |
| [ONNX](../integrations/onnx.md) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset`, `batch` |
| [OpenVINO](../integrations/openvino.md) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half`, `int8`, `batch`, `dynamic` |
| [TensorRT](../integrations/tensorrt.md) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace`, `int8`, `batch` |
| [CoreML](../integrations/coreml.md) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms`, `batch` |
| [TF SavedModel](../integrations/tf-savedmodel.md) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras`, `int8`, `batch` |
| [TF GraphDef](../integrations/tf-graphdef.md) | `pb` | `yolov8n.pb` | ❌ | `imgsz`, `batch` |
| [TF Lite](../integrations/tflite.md) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8`, `batch` |
| [TF Edge TPU](../integrations/edge-tpu.md) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz` |
| [TF.js](../integrations/tfjs.md) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` |
| [PaddlePaddle](../integrations/paddlepaddle.md) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz`, `batch` |
| [NCNN](../integrations/ncnn.md) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half`, `batch` |
{% include "macros/export-table.md" %}
## FAQ

@ -720,6 +720,7 @@ The `plot()` method supports various arguments to customize the output:
| `show` | `bool` | Display the annotated image directly using the default image viewer. | `False` |
| `save` | `bool` | Save the annotated image to a file specified by `filename`. | `False` |
| `filename` | `str` | Path and name of the file to save the annotated image if `save` is `True`. | `None` |
| `color_mode` | `str` | Specify the color mode, e.g., 'instance' or 'class'. | `'class'` |
## Thread-Safe Inference

@ -2,6 +2,7 @@
comments: true
description: Master image classification using YOLOv8. Learn to train, validate, predict, and export models efficiently.
keywords: YOLOv8, image classification, AI, machine learning, pretrained models, ImageNet, model export, predict, train, validate
model_name: yolov8n-cls
---
# Image Classification
@ -165,21 +166,7 @@ Export a YOLOv8n-cls model to a different format like ONNX, CoreML, etc.
Available YOLOv8-cls export formats are in the table below. You can export to any format using the `format` argument, i.e. `format='onnx'` or `format='engine'`. You can predict or validate directly on exported models, i.e. `yolo predict model=yolov8n-cls.onnx`. Usage examples are shown for your model after export completes.
| Format | `format` Argument | Model | Metadata | Arguments |
| ------------------------------------------------- | ----------------- | ----------------------------- | -------- | -------------------------------------------------------------------- |
| [PyTorch](https://pytorch.org/) | - | `yolov8n-cls.pt` | ✅ | - |
| [TorchScript](../integrations/torchscript.md) | `torchscript` | `yolov8n-cls.torchscript` | ✅ | `imgsz`, `optimize`, `batch` |
| [ONNX](../integrations/onnx.md) | `onnx` | `yolov8n-cls.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset`, `batch` |
| [OpenVINO](../integrations/openvino.md) | `openvino` | `yolov8n-cls_openvino_model/` | ✅ | `imgsz`, `half`, `int8`, `batch`, `dynamic` |
| [TensorRT](../integrations/tensorrt.md) | `engine` | `yolov8n-cls.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace`, `int8`, `batch` |
| [CoreML](../integrations/coreml.md) | `coreml` | `yolov8n-cls.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms`, `batch` |
| [TF SavedModel](../integrations/tf-savedmodel.md) | `saved_model` | `yolov8n-cls_saved_model/` | ✅ | `imgsz`, `keras`, `int8`, `batch` |
| [TF GraphDef](../integrations/tf-graphdef.md) | `pb` | `yolov8n-cls.pb` | ❌ | `imgsz`, `batch` |
| [TF Lite](../integrations/tflite.md) | `tflite` | `yolov8n-cls.tflite` | ✅ | `imgsz`, `half`, `int8`, `batch` |
| [TF Edge TPU](../integrations/edge-tpu.md) | `edgetpu` | `yolov8n-cls_edgetpu.tflite` | ✅ | `imgsz` |
| [TF.js](../integrations/tfjs.md) | `tfjs` | `yolov8n-cls_web_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` |
| [PaddlePaddle](../integrations/paddlepaddle.md) | `paddle` | `yolov8n-cls_paddle_model/` | ✅ | `imgsz`, `batch` |
| [NCNN](../integrations/ncnn.md) | `ncnn` | `yolov8n-cls_ncnn_model/` | ✅ | `imgsz`, `half`, `batch` |
{% include "macros/export-table.md" %}
See full `export` details in the [Export](../modes/export.md) page.

@ -167,21 +167,7 @@ Export a YOLOv8n model to a different format like ONNX, CoreML, etc.
Available YOLOv8 export formats are in the table below. You can export to any format using the `format` argument, i.e. `format='onnx'` or `format='engine'`. You can predict or validate directly on exported models, i.e. `yolo predict model=yolov8n.onnx`. Usage examples are shown for your model after export completes.
| Format | `format` Argument | Model | Metadata | Arguments |
| ------------------------------------------------- | ----------------- | ------------------------- | -------- | -------------------------------------------------------------------- |
| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - |
| [TorchScript](../integrations/torchscript.md) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize`, `batch` |
| [ONNX](../integrations/onnx.md) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset`, `batch` |
| [OpenVINO](../integrations/openvino.md) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half`, `int8`, `batch`, `dynamic` |
| [TensorRT](../integrations/tensorrt.md) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace`, `int8`, `batch` |
| [CoreML](../integrations/coreml.md) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms`, `batch` |
| [TF SavedModel](../integrations/tf-savedmodel.md) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras`, `int8`, `batch` |
| [TF GraphDef](../integrations/tf-graphdef.md) | `pb` | `yolov8n.pb` | ❌ | `imgsz`, `batch` |
| [TF Lite](../integrations/tflite.md) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8`, `batch` |
| [TF Edge TPU](../integrations/edge-tpu.md) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz` |
| [TF.js](../integrations/tfjs.md) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` |
| [PaddlePaddle](../integrations/paddlepaddle.md) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz`, `batch` |
| [NCNN](../integrations/ncnn.md) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half`, `batch` |
{% include "macros/export-table.md" %}
See full `export` details in the [Export](../modes/export.md) page.

@ -2,6 +2,7 @@
comments: true
description: Discover how to detect objects with rotation for higher precision using YOLOv8 OBB models. Learn, train, validate, and export OBB models effortlessly.
keywords: Oriented Bounding Boxes, OBB, Object Detection, YOLOv8, Ultralytics, DOTAv1, Model Training, Model Export, AI, Machine Learning
model_name: yolov8n-obb
---
# Oriented Bounding Boxes Object Detection
@ -188,21 +189,7 @@ Export a YOLOv8n-obb model to a different format like ONNX, CoreML, etc.
Available YOLOv8-obb export formats are in the table below. You can export to any format using the `format` argument, i.e. `format='onnx'` or `format='engine'`. You can predict or validate directly on exported models, i.e. `yolo predict model=yolov8n-obb.onnx`. Usage examples are shown for your model after export completes.
| Format | `format` Argument | Model | Metadata | Arguments |
| ------------------------------------------------- | ----------------- | ----------------------------- | -------- | -------------------------------------------------------------------- |
| [PyTorch](https://pytorch.org/) | - | `yolov8n-obb.pt` | ✅ | - |
| [TorchScript](../integrations/torchscript.md) | `torchscript` | `yolov8n-obb.torchscript` | ✅ | `imgsz`, `optimize`, `batch` |
| [ONNX](../integrations/onnx.md) | `onnx` | `yolov8n-obb.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset`, `batch` |
| [OpenVINO](../integrations/openvino.md) | `openvino` | `yolov8n-obb_openvino_model/` | ✅ | `imgsz`, `half`, `int8`, `batch`, `dynamic` |
| [TensorRT](../integrations/tensorrt.md) | `engine` | `yolov8n-obb.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace`, `int8`, `batch` |
| [CoreML](../integrations/coreml.md) | `coreml` | `yolov8n-obb.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms`, `batch` |
| [TF SavedModel](../integrations/tf-savedmodel.md) | `saved_model` | `yolov8n-obb_saved_model/` | ✅ | `imgsz`, `keras`, `int8`, `batch` |
| [TF GraphDef](../integrations/tf-graphdef.md) | `pb` | `yolov8n-obb.pb` | ❌ | `imgsz`, `batch` |
| [TF Lite](../integrations/tflite.md) | `tflite` | `yolov8n-obb.tflite` | ✅ | `imgsz`, `half`, `int8`, `batch` |
| [TF Edge TPU](../integrations/edge-tpu.md) | `edgetpu` | `yolov8n-obb_edgetpu.tflite` | ✅ | `imgsz` |
| [TF.js](../integrations/tfjs.md) | `tfjs` | `yolov8n-obb_web_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` |
| [PaddlePaddle](../integrations/paddlepaddle.md) | `paddle` | `yolov8n-obb_paddle_model/` | ✅ | `imgsz`, `batch` |
| [NCNN](../integrations/ncnn.md) | `ncnn` | `yolov8n-obb_ncnn_model/` | ✅ | `imgsz`, `half`, `batch` |
{% include "macros/export-table.md" %}
See full `export` details in the [Export](../modes/export.md) page.

@ -2,6 +2,7 @@
comments: true
description: Discover how to use YOLOv8 for pose estimation tasks. Learn about model training, validation, prediction, and exporting in various formats.
keywords: pose estimation, YOLOv8, Ultralytics, keypoints, model training, image recognition, deep learning
model_name: yolov8n-pose
---
# Pose Estimation
@ -201,21 +202,7 @@ Export a YOLOv8n Pose model to a different format like ONNX, CoreML, etc.
Available YOLOv8-pose export formats are in the table below. You can export to any format using the `format` argument, i.e. `format='onnx'` or `format='engine'`. You can predict or validate directly on exported models, i.e. `yolo predict model=yolov8n-pose.onnx`. Usage examples are shown for your model after export completes.
| Format | `format` Argument | Model | Metadata | Arguments |
| ------------------------------------------------- | ----------------- | ------------------------------ | -------- | -------------------------------------------------------------------- |
| [PyTorch](https://pytorch.org/) | - | `yolov8n-pose.pt` | ✅ | - |
| [TorchScript](../integrations/torchscript.md) | `torchscript` | `yolov8n-pose.torchscript` | ✅ | `imgsz`, `optimize`, `batch` |
| [ONNX](../integrations/onnx.md) | `onnx` | `yolov8n-pose.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset`, `batch` |
| [OpenVINO](../integrations/openvino.md) | `openvino` | `yolov8n-pose_openvino_model/` | ✅ | `imgsz`, `half`, `int8`, `batch`, `dynamic` |
| [TensorRT](../integrations/tensorrt.md) | `engine` | `yolov8n-pose.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace`, `int8`, `batch` |
| [CoreML](../integrations/coreml.md) | `coreml` | `yolov8n-pose.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms`, `batch` |
| [TF SavedModel](../integrations/tf-savedmodel.md) | `saved_model` | `yolov8n-pose_saved_model/` | ✅ | `imgsz`, `keras`, `int8`, `batch` |
| [TF GraphDef](../integrations/tf-graphdef.md) | `pb` | `yolov8n-pose.pb` | ❌ | `imgsz`, `batch` |
| [TF Lite](../integrations/tflite.md) | `tflite` | `yolov8n-pose.tflite` | ✅ | `imgsz`, `half`, `int8`, `batch` |
| [TF Edge TPU](../integrations/edge-tpu.md) | `edgetpu` | `yolov8n-pose_edgetpu.tflite` | ✅ | `imgsz` |
| [TF.js](../integrations/tfjs.md) | `tfjs` | `yolov8n-pose_web_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` |
| [PaddlePaddle](../integrations/paddlepaddle.md) | `paddle` | `yolov8n-pose_paddle_model/` | ✅ | `imgsz`, `batch` |
| [NCNN](../integrations/ncnn.md) | `ncnn` | `yolov8n-pose_ncnn_model/` | ✅ | `imgsz`, `half`, `batch` |
{% include "macros/export-table.md" %}
See full `export` details in the [Export](../modes/export.md) page.

@ -2,6 +2,7 @@
comments: true
description: Master instance segmentation using YOLOv8. Learn how to detect, segment and outline objects in images with detailed guides and examples.
keywords: instance segmentation, YOLOv8, object detection, image segmentation, machine learning, deep learning, computer vision, COCO dataset, Ultralytics
model_name: yolov8n-seg
---
# Instance Segmentation
@ -172,21 +173,7 @@ Export a YOLOv8n-seg model to a different format like ONNX, CoreML, etc.
Available YOLOv8-seg export formats are in the table below. You can export to any format using the `format` argument, i.e. `format='onnx'` or `format='engine'`. You can predict or validate directly on exported models, i.e. `yolo predict model=yolov8n-seg.onnx`. Usage examples are shown for your model after export completes.
| Format | `format` Argument | Model | Metadata | Arguments |
| ------------------------------------------------- | ----------------- | ----------------------------- | -------- | -------------------------------------------------------------------- |
| [PyTorch](https://pytorch.org/) | - | `yolov8n-seg.pt` | ✅ | - |
| [TorchScript](../integrations/torchscript.md) | `torchscript` | `yolov8n-seg.torchscript` | ✅ | `imgsz`, `optimize`, `batch` |
| [ONNX](../integrations/onnx.md) | `onnx` | `yolov8n-seg.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset`, `batch` |
| [OpenVINO](../integrations/openvino.md) | `openvino` | `yolov8n-seg_openvino_model/` | ✅ | `imgsz`, `half`, `int8`, `batch`, `dynamic` |
| [TensorRT](../integrations/tensorrt.md) | `engine` | `yolov8n-seg.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace`, `int8`, `batch` |
| [CoreML](../integrations/coreml.md) | `coreml` | `yolov8n-seg.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms`, `batch` |
| [TF SavedModel](../integrations/tf-savedmodel.md) | `saved_model` | `yolov8n-seg_saved_model/` | ✅ | `imgsz`, `keras`, `int8`, `batch` |
| [TF GraphDef](../integrations/tf-graphdef.md) | `pb` | `yolov8n-seg.pb` | ❌ | `imgsz`, `batch` |
| [TF Lite](../integrations/tflite.md) | `tflite` | `yolov8n-seg.tflite` | ✅ | `imgsz`, `half`, `int8`, `batch` |
| [TF Edge TPU](../integrations/edge-tpu.md) | `edgetpu` | `yolov8n-seg_edgetpu.tflite` | ✅ | `imgsz` |
| [TF.js](../integrations/tfjs.md) | `tfjs` | `yolov8n-seg_web_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` |
| [PaddlePaddle](../integrations/paddlepaddle.md) | `paddle` | `yolov8n-seg_paddle_model/` | ✅ | `imgsz`, `batch` |
| [NCNN](../integrations/ncnn.md) | `ncnn` | `yolov8n-seg_ncnn_model/` | ✅ | `imgsz`, `half`, `batch` |
{% include "macros/export-table.md" %}
See full `export` details in the [Export](../modes/export.md) page.

@ -215,19 +215,7 @@ Careful tuning and experimentation with these settings are crucial to ensure opt
Export settings for YOLO models encompass configurations and options related to saving or exporting the model for use in different environments or platforms. These settings can impact the model's performance, size, and compatibility with various systems. Key export settings include the exported model file format (e.g., ONNX, TensorFlow SavedModel), the target device (e.g., CPU, GPU), and additional features such as masks or multiple labels per box. The export process may also be affected by the model's specific task and the requirements or constraints of the destination environment or platform.
| Argument | Type | Default | Description |
| ----------- | ---------------- | --------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `format` | `str` | `'torchscript'` | Target format for the exported model, such as `'onnx'`, `'torchscript'`, `'tensorflow'`, or others, defining compatibility with various deployment environments. |
| `imgsz` | `int` or `tuple` | `640` | Desired image size for the model input. Can be an integer for square images or a tuple `(height, width)` for specific dimensions. |
| `keras` | `bool` | `False` | Enables export to Keras format for TensorFlow SavedModel, providing compatibility with TensorFlow serving and APIs. |
| `optimize` | `bool` | `False` | Applies optimization for mobile devices when exporting to TorchScript, potentially reducing model size and improving performance. |
| `half` | `bool` | `False` | Enables FP16 (half-precision) quantization, reducing model size and potentially speeding up inference on supported hardware. |
| `int8` | `bool` | `False` | Activates INT8 quantization, further compressing the model and speeding up inference with minimal accuracy loss, primarily for edge devices. |
| `dynamic` | `bool` | `False` | Allows dynamic input sizes for ONNX, TensorRT and OpenVINO exports, enhancing flexibility in handling varying image dimensions. |
| `simplify` | `bool` | `False` | Simplifies the model graph for ONNX exports, potentially improving performance and compatibility. |
| `opset` | `int` | `None` | Specifies the ONNX opset version for compatibility with different ONNX parsers and runtimes. If not set, uses the latest supported version. |
| `workspace` | `float` | `4.0` | Sets the maximum workspace size in GB for TensorRT optimizations, balancing memory usage and performance. |
| `nms` | `bool` | `False` | Adds Non-Maximum Suppression (NMS) to the CoreML export, essential for accurate and efficient detection post-processing. |
{% include "macros/export-args.md" %}
It is crucial to thoughtfully configure these settings to ensure the exported model is optimized for the intended use case and functions effectively in the target environment.

@ -169,21 +169,7 @@ Export a YOLOv8n model to a different format like ONNX, CoreML, etc.
Available YOLOv8 export formats are in the table below. You can export to any format using the `format` argument, i.e. `format='onnx'` or `format='engine'`.
| Format | `format` Argument | Model | Metadata | Arguments |
| ------------------------------------------------- | ----------------- | ------------------------- | -------- | -------------------------------------------------------------------- |
| [PyTorch](https://pytorch.org/) | - | `yolov8n.pt` | ✅ | - |
| [TorchScript](../integrations/torchscript.md) | `torchscript` | `yolov8n.torchscript` | ✅ | `imgsz`, `optimize`, `batch` |
| [ONNX](../integrations/onnx.md) | `onnx` | `yolov8n.onnx` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `opset`, `batch` |
| [OpenVINO](../integrations/openvino.md) | `openvino` | `yolov8n_openvino_model/` | ✅ | `imgsz`, `half`, `int8`, `batch`, `dynamic` |
| [TensorRT](../integrations/tensorrt.md) | `engine` | `yolov8n.engine` | ✅ | `imgsz`, `half`, `dynamic`, `simplify`, `workspace`, `int8`, `batch` |
| [CoreML](../integrations/coreml.md) | `coreml` | `yolov8n.mlpackage` | ✅ | `imgsz`, `half`, `int8`, `nms`, `batch` |
| [TF SavedModel](../integrations/tf-savedmodel.md) | `saved_model` | `yolov8n_saved_model/` | ✅ | `imgsz`, `keras`, `int8`, `batch` |
| [TF GraphDef](../integrations/tf-graphdef.md) | `pb` | `yolov8n.pb` | ❌ | `imgsz`, `batch` |
| [TF Lite](../integrations/tflite.md) | `tflite` | `yolov8n.tflite` | ✅ | `imgsz`, `half`, `int8`, `batch` |
| [TF Edge TPU](../integrations/edge-tpu.md) | `edgetpu` | `yolov8n_edgetpu.tflite` | ✅ | `imgsz` |
| [TF.js](../integrations/tfjs.md) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz`, `half`, `int8`, `batch` |
| [PaddlePaddle](../integrations/paddlepaddle.md) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz`, `batch` |
| [NCNN](../integrations/ncnn.md) | `ncnn` | `yolov8n_ncnn_model/` | ✅ | `imgsz`, `half`, `batch` |
{% include "macros/export-table.md" %}
See full `export` details in the [Export](../modes/export.md) page.

@ -63,7 +63,7 @@ The converted masks will be saved in the specified output directory.
```python
from ultralytics.data.converter import convert_segment_masks_to_yolo_seg
# For COCO dataset we have 80 classes
# The classes here is the total classes in the dataset, for COCO dataset we have 80 classes
convert_segment_masks_to_yolo_seg(masks_dir="path/to/masks_dir", output_dir="path/to/output_dir", classes=80)
```

@ -2,7 +2,7 @@
1185102784@qq.com: Laughing-q
130829914+IvorZhu331@users.noreply.github.com: IvorZhu331
135830346+UltralyticsAssistant@users.noreply.github.com: UltralyticsAssistant
1579093407@qq.com: null
1579093407@qq.com: YOLOv5-Magic
17216799+ouphi@users.noreply.github.com: ouphi
17316848+maianumerosky@users.noreply.github.com: maianumerosky
34196005+fcakyon@users.noreply.github.com: fcakyon

@ -102,3 +102,100 @@ div.highlight {
}
}
/* Banner (same as the one on the Ultralytics website) -------------------------------------------------------------- */
/* MkDocs Ultralytics Plugin ---------------------------------------------------------------------------------------- */
.git-info,
.dates-container,
.authors-container,
.share-buttons {
display: flex;
align-items: center;
justify-content: flex-end;
flex-wrap: wrap;
}
.git-info {
font-size: 0.8em;
color: grey;
margin-bottom: 10px;
}
.dates-container,
.authors-container {
margin-bottom: 10px;
}
.date-item,
.author-link,
.share-button {
display: flex;
align-items: center;
cursor: pointer;
}
.date-item {
margin-right: 10px;
}
.hover-item {
transition: all 0.2s ease;
filter: grayscale(100%);
}
.date-item .hover-item {
font-size: 1.6em;
margin-right: 5px;
}
.author-link .hover-item {
width: 50px;
height: 50px;
border-radius: 50%;
margin-right: 3px;
}
.hover-item:hover {
transform: scale(1.2);
filter: grayscale(0%);
}
.share-buttons {
margin-top: 10px;
}
.share-button {
background-color: #1da1f2;
color: white;
padding: 6px 12px;
border-radius: 5px;
border: none;
font-size: 0.95em;
margin: 5px;
transition: all 0.2s ease;
}
.share-button:hover {
transform: scale(1.1);
filter: brightness(1.2);
}
.share-button.linkedin {
background-color: #0077b5;
}
.share-button i {
margin-right: 5px;
font-size: 1.1em;
}
@media (max-width: 1024px) {
.git-info {
flex-direction: column;
align-items: flex-end;
}
.dates-container,
.authors-container {
width: 100%;
}
}
/* MkDocs Ultralytics Plugin ---------------------------------------------------------------------------------------- */

@ -9,103 +9,94 @@ from sahi.predict import get_sliced_prediction
from sahi.utils.yolov8 import download_yolov8s_model
from ultralytics.utils.files import increment_path
from ultralytics.utils.plotting import Annotator, colors
def run(weights="yolov8n.pt", source="test.mp4", view_img=False, save_img=False, exist_ok=False):
"""
Run object detection on a video using YOLOv8 and SAHI.
Args:
weights (str): Model weights path.
source (str): Video file path.
view_img (bool): Show results.
save_img (bool): Save results.
exist_ok (bool): Overwrite existing files.
"""
# Check source path
if not Path(source).exists():
raise FileNotFoundError(f"Source path '{source}' does not exist.")
yolov8_model_path = f"models/{weights}"
download_yolov8s_model(yolov8_model_path)
detection_model = AutoDetectionModel.from_pretrained(
model_type="yolov8", model_path=yolov8_model_path, confidence_threshold=0.3, device="cpu"
)
# Video setup
videocapture = cv2.VideoCapture(source)
frame_width, frame_height = int(videocapture.get(3)), int(videocapture.get(4))
fps, fourcc = int(videocapture.get(5)), cv2.VideoWriter_fourcc(*"mp4v")
# Output setup
save_dir = increment_path(Path("ultralytics_results_with_sahi") / "exp", exist_ok)
save_dir.mkdir(parents=True, exist_ok=True)
video_writer = cv2.VideoWriter(str(save_dir / f"{Path(source).stem}.mp4"), fourcc, fps, (frame_width, frame_height))
while videocapture.isOpened():
success, frame = videocapture.read()
if not success:
break
results = get_sliced_prediction(
frame, detection_model, slice_height=512, slice_width=512, overlap_height_ratio=0.2, overlap_width_ratio=0.2
)
object_prediction_list = results.object_prediction_list
boxes_list = []
clss_list = []
for ind, _ in enumerate(object_prediction_list):
boxes = (
object_prediction_list[ind].bbox.minx,
object_prediction_list[ind].bbox.miny,
object_prediction_list[ind].bbox.maxx,
object_prediction_list[ind].bbox.maxy,
)
clss = object_prediction_list[ind].category.name
boxes_list.append(boxes)
clss_list.append(clss)
for box, cls in zip(boxes_list, clss_list):
x1, y1, x2, y2 = box
cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (56, 56, 255), 2)
label = str(cls)
t_size = cv2.getTextSize(label, 0, fontScale=0.6, thickness=1)[0]
cv2.rectangle(
frame, (int(x1), int(y1) - t_size[1] - 3), (int(x1) + t_size[0], int(y1) + 3), (56, 56, 255), -1
)
cv2.putText(
frame, label, (int(x1), int(y1) - 2), 0, 0.6, [255, 255, 255], thickness=1, lineType=cv2.LINE_AA
)
if view_img:
cv2.imshow(Path(source).stem, frame)
if save_img:
video_writer.write(frame)
class SahiInference:
def __init__(self):
self.detection_model = None
if cv2.waitKey(1) & 0xFF == ord("q"):
break
video_writer.release()
videocapture.release()
cv2.destroyAllWindows()
def parse_opt():
"""Parse command line arguments."""
parser = argparse.ArgumentParser()
parser.add_argument("--weights", type=str, default="yolov8n.pt", help="initial weights path")
parser.add_argument("--source", type=str, required=True, help="video file path")
parser.add_argument("--view-img", action="store_true", help="show results")
parser.add_argument("--save-img", action="store_true", help="save results")
parser.add_argument("--exist-ok", action="store_true", help="existing project/name ok, do not increment")
return parser.parse_args()
def load_model(self, weights):
yolov8_model_path = f"models/{weights}"
download_yolov8s_model(yolov8_model_path)
self.detection_model = AutoDetectionModel.from_pretrained(
model_type="yolov8", model_path=yolov8_model_path, confidence_threshold=0.3, device="cpu"
)
def inference(
self, weights="yolov8n.pt", source="test.mp4", view_img=False, save_img=False, exist_ok=False, track=False
):
"""
Run object detection on a video using YOLOv8 and SAHI.
Args:
weights (str): Model weights path.
source (str): Video file path.
view_img (bool): Show results.
save_img (bool): Save results.
exist_ok (bool): Overwrite existing files.
track (bool): Enable object tracking with SAHI
"""
# Video setup
cap = cv2.VideoCapture(source)
assert cap.isOpened(), "Error reading video file"
frame_width, frame_height = int(cap.get(3)), int(cap.get(4))
# Output setup
save_dir = increment_path(Path("ultralytics_results_with_sahi") / "exp", exist_ok)
save_dir.mkdir(parents=True, exist_ok=True)
video_writer = cv2.VideoWriter(
str(save_dir / f"{Path(source).stem}.mp4"),
cv2.VideoWriter_fourcc(*"mp4v"),
int(cap.get(5)),
(frame_width, frame_height),
)
def main(opt):
"""Main function."""
run(**vars(opt))
# Load model
self.load_model(weights)
while cap.isOpened():
success, frame = cap.read()
if not success:
break
annotator = Annotator(frame) # Initialize annotator for plotting detection and tracking results
results = get_sliced_prediction(
frame,
self.detection_model,
slice_height=512,
slice_width=512,
overlap_height_ratio=0.2,
overlap_width_ratio=0.2,
)
detection_data = [
(det.category.name, det.category.id, (det.bbox.minx, det.bbox.miny, det.bbox.maxx, det.bbox.maxy))
for det in results.object_prediction_list
]
for det in detection_data:
annotator.box_label(det[2], label=str(det[0]), color=colors(int(det[1]), True))
if view_img:
cv2.imshow(Path(source).stem, frame)
if save_img:
video_writer.write(frame)
if cv2.waitKey(1) & 0xFF == ord("q"):
break
video_writer.release()
cap.release()
cv2.destroyAllWindows()
def parse_opt(self):
"""Parse command line arguments."""
parser = argparse.ArgumentParser()
parser.add_argument("--weights", type=str, default="yolov8n.pt", help="initial weights path")
parser.add_argument("--source", type=str, required=True, help="video file path")
parser.add_argument("--view-img", action="store_true", help="show results")
parser.add_argument("--save-img", action="store_true", help="save results")
parser.add_argument("--exist-ok", action="store_true", help="existing project/name ok, do not increment")
return parser.parse_args()
if __name__ == "__main__":
opt = parse_opt()
main(opt)
inference = SahiInference()
inference.inference(**vars(inference.parse_opt()))

@ -614,6 +614,7 @@ nav:
# Plugins including 301 redirects navigation ---------------------------------------------------------------------------
plugins:
- macros
- search:
lang: en
- mkdocstrings:
@ -640,6 +641,7 @@ plugins:
add_authors: True
add_json_ld: True
add_share_buttons: True
add_css: False
default_image: https://raw.githubusercontent.com/ultralytics/assets/main/yolov8/banner-yolov8.png
- mkdocs-jupyter
- redirects:

@ -91,9 +91,10 @@ dev = [
"mkdocs>=1.6.0",
"mkdocs-material>=9.5.9",
"mkdocstrings[python]",
"mkdocs-jupyter", # for notebooks
"mkdocs-redirects", # for 301 redirects
"mkdocs-ultralytics-plugin>=0.0.49", # for meta descriptions and images, dates and authors
"mkdocs-jupyter", # notebooks
"mkdocs-redirects", # 301 redirects
"mkdocs-ultralytics-plugin>=0.1.2", # for meta descriptions and images, dates and authors
"mkdocs-macros-plugin>=1.0.5" # duplicating content (i.e. export tables) in multiple places
]
export = [
"onnx>=1.12.0", # ONNX export

@ -26,6 +26,7 @@ from ultralytics.utils import (
WEIGHTS_DIR,
WINDOWS,
checks,
is_github_action_running,
)
from ultralytics.utils.downloads import download
from ultralytics.utils.torch_utils import TORCH_1_9
@ -131,6 +132,7 @@ def test_predict_grey_and_4ch():
@pytest.mark.slow
@pytest.mark.skipif(not ONLINE, reason="environment is offline")
@pytest.mark.skipif(is_github_action_running(), reason="No auth https://github.com/JuanBindez/pytubefix/issues/166")
def test_youtube():
"""Test YOLO model on a YouTube video stream, handling potential network-related errors."""
model = YOLO(MODEL)

@ -1,6 +1,6 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
__version__ = "8.2.74"
__version__ = "8.2.77"
import os

@ -81,7 +81,7 @@ CLI_HELP_MSG = f"""
5. Explore your datasets using semantic search and SQL with a simple GUI powered by Ultralytics Explorer API
yolo explorer data=data.yaml model=yolov8n.pt
6. Streamlit real-time object detection on your webcam with Ultralytics YOLOv8
6. Streamlit real-time webcam inference GUI
yolo streamlit-predict
7. Run special commands:

@ -344,13 +344,13 @@ def convert_segment_masks_to_yolo_seg(masks_dir, output_dir, classes):
Args:
masks_dir (str): The path to the directory where all mask images (png, jpg) are stored.
output_dir (str): The path to the directory where the converted YOLO segmentation masks will be stored.
classes (int): Total classes in the dataset i.e for COCO classes=80
classes (int): Total classes in the dataset i.e. for COCO classes=80
Example:
```python
from ultralytics.data.converter import convert_segment_masks_to_yolo_seg
# for coco dataset, we have 80 classes
# The classes here is the total classes in the dataset, for COCO dataset we have 80 classes
convert_segment_masks_to_yolo_seg('path/to/masks_directory', 'path/to/output/directory', classes=80)
```
@ -373,7 +373,7 @@ def convert_segment_masks_to_yolo_seg(masks_dir, output_dir, classes):
"""
import os
pixel_to_class_mapping = {i + 1: i for i in range(80)}
pixel_to_class_mapping = {i + 1: i for i in range(classes)}
for mask_filename in os.listdir(masks_dir):
if mask_filename.endswith(".png"):
mask_path = os.path.join(masks_dir, mask_filename)

@ -144,7 +144,7 @@ def get_window_obj(anno, windows, iof_thr=0.7):
return [np.zeros((0, 9), dtype=np.float32) for _ in range(len(windows))] # window_anns
def crop_and_save(anno, windows, window_objs, im_dir, lb_dir):
def crop_and_save(anno, windows, window_objs, im_dir, lb_dir, allow_background_images=True):
"""
Crop images and save new labels.
@ -154,6 +154,7 @@ def crop_and_save(anno, windows, window_objs, im_dir, lb_dir):
window_objs (list): A list of labels inside each window.
im_dir (str): The output directory path of images.
lb_dir (str): The output directory path of labels.
allow_background_images (bool): Whether to include background images without labels.
Notes:
The directory structure assumed for the DOTA dataset:
@ -173,19 +174,19 @@ def crop_and_save(anno, windows, window_objs, im_dir, lb_dir):
patch_im = im[y_start:y_stop, x_start:x_stop]
ph, pw = patch_im.shape[:2]
cv2.imwrite(str(Path(im_dir) / f"{new_name}.jpg"), patch_im)
label = window_objs[i]
if len(label) == 0:
continue
label[:, 1::2] -= x_start
label[:, 2::2] -= y_start
label[:, 1::2] /= pw
label[:, 2::2] /= ph
with open(Path(lb_dir) / f"{new_name}.txt", "w") as f:
for lb in label:
formatted_coords = ["{:.6g}".format(coord) for coord in lb[1:]]
f.write(f"{int(lb[0])} {' '.join(formatted_coords)}\n")
if len(label) or allow_background_images:
cv2.imwrite(str(Path(im_dir) / f"{new_name}.jpg"), patch_im)
if len(label):
label[:, 1::2] -= x_start
label[:, 2::2] -= y_start
label[:, 1::2] /= pw
label[:, 2::2] /= ph
with open(Path(lb_dir) / f"{new_name}.txt", "w") as f:
for lb in label:
formatted_coords = ["{:.6g}".format(coord) for coord in lb[1:]]
f.write(f"{int(lb[0])} {' '.join(formatted_coords)}\n")
def split_images_and_labels(data_root, save_dir, split="train", crop_sizes=(1024,), gaps=(200,)):

@ -460,6 +460,7 @@ class Results(SimpleClass):
show=False,
save=False,
filename=None,
color_mode="class",
):
"""
Plots detection results on an input RGB image.
@ -481,6 +482,7 @@ class Results(SimpleClass):
show (bool): Whether to display the annotated image.
save (bool): Whether to save the annotated image.
filename (str | None): Filename to save image if save is True.
color_mode (bool): Specify the color mode, e.g., 'instance' or 'class'. Default to 'class'.
Returns:
(np.ndarray): Annotated image as a numpy array.
@ -491,6 +493,7 @@ class Results(SimpleClass):
... im = result.plot()
... im.show()
"""
assert color_mode in {"instance", "class"}, f"Expected color_mode='instance' or 'class', not {color_mode}."
if img is None and isinstance(self.orig_img, torch.Tensor):
img = (self.orig_img[0].detach().permute(1, 2, 0).contiguous() * 255).to(torch.uint8).cpu().numpy()
@ -519,17 +522,22 @@ class Results(SimpleClass):
.contiguous()
/ 255
)
idx = pred_boxes.cls if pred_boxes else range(len(pred_masks))
idx = pred_boxes.cls if pred_boxes and color_mode == "class" else reversed(range(len(pred_masks)))
annotator.masks(pred_masks.data, colors=[colors(x, True) for x in idx], im_gpu=im_gpu)
# Plot Detect results
if pred_boxes is not None and show_boxes:
for d in reversed(pred_boxes):
for i, d in enumerate(reversed(pred_boxes)):
c, conf, id = int(d.cls), float(d.conf) if conf else None, None if d.id is None else int(d.id.item())
name = ("" if id is None else f"id:{id} ") + names[c]
label = (f"{name} {conf:.2f}" if conf else name) if labels else None
box = d.xyxyxyxy.reshape(-1, 4, 2).squeeze() if is_obb else d.xyxy.squeeze()
annotator.box_label(box, label, color=colors(c, True), rotated=is_obb)
annotator.box_label(
box,
label,
color=colors(i if color_mode == "instance" else c, True),
rotated=is_obb,
)
# Plot Classify results
if pred_probs is not None and show_probs:
@ -539,8 +547,14 @@ class Results(SimpleClass):
# Plot Pose results
if self.keypoints is not None:
for k in reversed(self.keypoints.data):
annotator.kpts(k, self.orig_shape, radius=kpt_radius, kpt_line=kpt_line)
for i, k in enumerate(reversed(self.keypoints.data)):
annotator.kpts(
k,
self.orig_shape,
radius=kpt_radius,
kpt_line=kpt_line,
kpt_color=colors(i, True) if color_mode == "instance" else None,
)
# Show results
if show:

@ -174,9 +174,11 @@ class BaseTrainer:
world_size = len(self.args.device.split(","))
elif isinstance(self.args.device, (tuple, list)): # i.e. device=[0, 1, 2, 3] (multi-GPU from CLI is list)
world_size = len(self.args.device)
elif self.args.device in {"cpu", "mps"}: # i.e. device='cpu' or 'mps'
world_size = 0
elif torch.cuda.is_available(): # i.e. device=None or device='' or device=number
world_size = 1 # default to device 0
else: # i.e. device='cpu' or 'mps'
else: # i.e. device=None or device=''
world_size = 0
# Run subprocess if DDP training, else train normally

@ -60,7 +60,7 @@ class DetectionTrainer(BaseTrainer):
if self.args.multi_scale:
imgs = batch["img"]
sz = (
random.randrange(self.args.imgsz * 0.5, self.args.imgsz * 1.5 + self.stride)
random.randrange(int(self.args.imgsz * 0.5), int(self.args.imgsz * 1.5 + self.stride))
// self.stride
* self.stride
) # size

@ -566,10 +566,6 @@ class AutoBackend(nn.Module):
y = [y]
elif self.pb: # GraphDef
y = self.frozen_func(x=self.tf.constant(im))
if (self.task == "segment" or len(y) == 2) and len(self.names) == 999: # segments and names not defined
ip, ib = (0, 1) if len(y[0].shape) == 4 else (1, 0) # index of protos, boxes
nc = y[ib].shape[1] - y[ip].shape[3] - 4 # y = (1, 160, 160, 32), (1, 116, 8400)
self.names = {i: f"class{i}" for i in range(nc)}
else: # Lite or Edge TPU
details = self.input_details[0]
is_int = details["dtype"] in {np.int8, np.int16} # is TFLite quantized int8 or int16 model
@ -607,6 +603,10 @@ class AutoBackend(nn.Module):
# for x in y:
# print(type(x), len(x)) if isinstance(x, (list, tuple)) else print(type(x), x.shape) # debug shapes
if isinstance(y, (list, tuple)):
if len(self.names) == 999 and (self.task == "segment" or len(y) == 2): # segments and names not defined
ip, ib = (0, 1) if len(y[0].shape) == 4 else (1, 0) # index of protos, boxes
nc = y[ib].shape[1] - y[ip].shape[3] - 4 # y = (1, 160, 160, 32), (1, 116, 8400)
self.names = {i: f"class{i}" for i in range(nc)}
return self.from_numpy(y[0]) if len(y) == 1 else [self.from_numpy(x) for x in y]
else:
return self.from_numpy(y)

@ -15,6 +15,11 @@ class TrackState:
Tracked (int): State when the object is successfully tracked in subsequent frames.
Lost (int): State when the object is no longer tracked.
Removed (int): State when the object is removed from tracking.
Examples:
>>> state = TrackState.New
>>> if state == TrackState.New:
>>> print("Object is newly detected.")
"""
New = 0
@ -33,13 +38,13 @@ class BaseTrack:
is_activated (bool): Flag indicating whether the track is currently active.
state (TrackState): Current state of the track.
history (OrderedDict): Ordered history of the track's states.
features (list): List of features extracted from the object for tracking.
curr_feature (any): The current feature of the object being tracked.
features (List): List of features extracted from the object for tracking.
curr_feature (Any): The current feature of the object being tracked.
score (float): The confidence score of the tracking.
start_frame (int): The frame number where tracking started.
frame_id (int): The most recent frame ID processed by the track.
time_since_update (int): Frames passed since the last update.
location (tuple): The location of the object in the context of multi-camera tracking.
location (Tuple): The location of the object in the context of multi-camera tracking.
Methods:
end_frame: Returns the ID of the last frame where the object was tracked.
@ -50,12 +55,26 @@ class BaseTrack:
mark_lost: Marks the track as lost.
mark_removed: Marks the track as removed.
reset_id: Resets the global track ID counter.
Examples:
Initialize a new track and mark it as lost:
>>> track = BaseTrack()
>>> track.mark_lost()
>>> print(track.state) # Output: 2 (TrackState.Lost)
"""
_count = 0
def __init__(self):
"""Initializes a new track with unique ID and foundational tracking attributes."""
"""
Initializes a new track with a unique ID and foundational tracking attributes.
Examples:
Initialize a new track
>>> track = BaseTrack()
>>> print(track.track_id)
0
"""
self.track_id = 0
self.is_activated = False
self.state = TrackState.New
@ -70,36 +89,36 @@ class BaseTrack:
@property
def end_frame(self):
"""Return the last frame ID of the track."""
"""Returns the ID of the most recent frame where the object was tracked."""
return self.frame_id
@staticmethod
def next_id():
"""Increment and return the global track ID counter."""
"""Increment and return the next unique global track ID for object tracking."""
BaseTrack._count += 1
return BaseTrack._count
def activate(self, *args):
"""Abstract method to activate the track with provided arguments."""
"""Activates the track with provided arguments, initializing necessary attributes for tracking."""
raise NotImplementedError
def predict(self):
"""Abstract method to predict the next state of the track."""
"""Predicts the next state of the track based on the current state and tracking model."""
raise NotImplementedError
def update(self, *args, **kwargs):
"""Abstract method to update the track with new observations."""
"""Updates the track with new observations and data, modifying its state and attributes accordingly."""
raise NotImplementedError
def mark_lost(self):
"""Mark the track as lost."""
"""Marks the track as lost by updating its state to TrackState.Lost."""
self.state = TrackState.Lost
def mark_removed(self):
"""Mark the track as removed."""
"""Marks the track as removed by setting its state to TrackState.Removed."""
self.state = TrackState.Removed
@staticmethod
def reset_id():
"""Reset the global track ID counter."""
"""Reset the global track ID counter to its initial value."""
BaseTrack._count = 0

@ -15,6 +15,9 @@ class BOTrack(STrack):
"""
An extended version of the STrack class for YOLOv8, adding object tracking features.
This class extends the STrack class to include additional functionalities for object tracking, such as feature
smoothing, Kalman filter prediction, and reactivation of tracks.
Attributes:
shared_kalman (KalmanFilterXYWH): A shared Kalman filter for all instances of BOTrack.
smooth_feat (np.ndarray): Smoothed feature vector.
@ -34,16 +37,35 @@ class BOTrack(STrack):
convert_coords(tlwh): Converts tlwh bounding box coordinates to xywh format.
tlwh_to_xywh(tlwh): Convert bounding box to xywh format `(center x, center y, width, height)`.
Usage:
bo_track = BOTrack(tlwh, score, cls, feat)
bo_track.predict()
bo_track.update(new_track, frame_id)
Examples:
Create a BOTrack instance and update its features
>>> bo_track = BOTrack(tlwh=[100, 50, 80, 40], score=0.9, cls=1, feat=np.random.rand(128))
>>> bo_track.predict()
>>> new_track = BOTrack(tlwh=[110, 60, 80, 40], score=0.85, cls=1, feat=np.random.rand(128))
>>> bo_track.update(new_track, frame_id=2)
"""
shared_kalman = KalmanFilterXYWH()
def __init__(self, tlwh, score, cls, feat=None, feat_history=50):
"""Initialize YOLOv8 object with temporal parameters, such as feature history, alpha and current features."""
"""
Initialize a BOTrack object with temporal parameters, such as feature history, alpha, and current features.
Args:
tlwh (np.ndarray): Bounding box coordinates in tlwh format (top left x, top left y, width, height).
score (float): Confidence score of the detection.
cls (int): Class ID of the detected object.
feat (np.ndarray | None): Feature vector associated with the detection.
feat_history (int): Maximum length of the feature history deque.
Examples:
Initialize a BOTrack object with bounding box, score, class ID, and feature vector
>>> tlwh = np.array([100, 50, 80, 120])
>>> score = 0.9
>>> cls = 1
>>> feat = np.random.rand(128)
>>> bo_track = BOTrack(tlwh, score, cls, feat)
"""
super().__init__(tlwh, score, cls)
self.smooth_feat = None
@ -54,7 +76,7 @@ class BOTrack(STrack):
self.alpha = 0.9
def update_features(self, feat):
"""Update features vector and smooth it using exponential moving average."""
"""Update the feature vector and apply exponential moving average smoothing."""
feat /= np.linalg.norm(feat)
self.curr_feat = feat
if self.smooth_feat is None:
@ -65,7 +87,7 @@ class BOTrack(STrack):
self.smooth_feat /= np.linalg.norm(self.smooth_feat)
def predict(self):
"""Predicts the mean and covariance using Kalman filter."""
"""Predicts the object's future state using the Kalman filter to update its mean and covariance."""
mean_state = self.mean.copy()
if self.state != TrackState.Tracked:
mean_state[6] = 0
@ -80,14 +102,14 @@ class BOTrack(STrack):
super().re_activate(new_track, frame_id, new_id)
def update(self, new_track, frame_id):
"""Update the YOLOv8 instance with new track and frame ID."""
"""Updates the YOLOv8 instance with new track information and the current frame ID."""
if new_track.curr_feat is not None:
self.update_features(new_track.curr_feat)
super().update(new_track, frame_id)
@property
def tlwh(self):
"""Get current position in bounding box format `(top left x, top left y, width, height)`."""
"""Returns the current bounding box position in `(top left x, top left y, width, height)` format."""
if self.mean is None:
return self._tlwh.copy()
ret = self.mean[:4].copy()
@ -96,7 +118,7 @@ class BOTrack(STrack):
@staticmethod
def multi_predict(stracks):
"""Predicts the mean and covariance of multiple object tracks using shared Kalman filter."""
"""Predicts the mean and covariance for multiple object tracks using a shared Kalman filter."""
if len(stracks) <= 0:
return
multi_mean = np.asarray([st.mean.copy() for st in stracks])
@ -111,12 +133,12 @@ class BOTrack(STrack):
stracks[i].covariance = cov
def convert_coords(self, tlwh):
"""Converts Top-Left-Width-Height bounding box coordinates to X-Y-Width-Height format."""
"""Converts tlwh bounding box coordinates to xywh format."""
return self.tlwh_to_xywh(tlwh)
@staticmethod
def tlwh_to_xywh(tlwh):
"""Convert bounding box to format `(center x, center y, width, height)`."""
"""Convert bounding box from tlwh (top-left-width-height) to xywh (center-x-center-y-width-height) format."""
ret = np.asarray(tlwh).copy()
ret[:2] += ret[2:] / 2
return ret
@ -129,9 +151,9 @@ class BOTSORT(BYTETracker):
Attributes:
proximity_thresh (float): Threshold for spatial proximity (IoU) between tracks and detections.
appearance_thresh (float): Threshold for appearance similarity (ReID embeddings) between tracks and detections.
encoder (object): Object to handle ReID embeddings, set to None if ReID is not enabled.
encoder (Any): Object to handle ReID embeddings, set to None if ReID is not enabled.
gmc (GMC): An instance of the GMC algorithm for data association.
args (object): Parsed command-line arguments containing tracking parameters.
args (Any): Parsed command-line arguments containing tracking parameters.
Methods:
get_kalmanfilter(): Returns an instance of KalmanFilterXYWH for object tracking.
@ -139,17 +161,29 @@ class BOTSORT(BYTETracker):
get_dists(tracks, detections): Get distances between tracks and detections using IoU and (optionally) ReID.
multi_predict(tracks): Predict and track multiple objects with YOLOv8 model.
Usage:
bot_sort = BOTSORT(args, frame_rate)
bot_sort.init_track(dets, scores, cls, img)
bot_sort.multi_predict(tracks)
Examples:
Initialize BOTSORT and process detections
>>> bot_sort = BOTSORT(args, frame_rate=30)
>>> bot_sort.init_track(dets, scores, cls, img)
>>> bot_sort.multi_predict(tracks)
Note:
The class is designed to work with the YOLOv8 object detection model and supports ReID only if enabled via args.
"""
def __init__(self, args, frame_rate=30):
"""Initialize YOLOv8 object with ReID module and GMC algorithm."""
"""
Initialize YOLOv8 object with ReID module and GMC algorithm.
Args:
args (object): Parsed command-line arguments containing tracking parameters.
frame_rate (int): Frame rate of the video being processed.
Examples:
Initialize BOTSORT with command-line arguments and a specified frame rate:
>>> args = parse_args()
>>> bot_sort = BOTSORT(args, frame_rate=30)
"""
super().__init__(args, frame_rate)
# ReID module
self.proximity_thresh = args.proximity_thresh
@ -161,11 +195,11 @@ class BOTSORT(BYTETracker):
self.gmc = GMC(method=args.gmc_method)
def get_kalmanfilter(self):
"""Returns an instance of KalmanFilterXYWH for object tracking."""
"""Returns an instance of KalmanFilterXYWH for predicting and updating object states in the tracking process."""
return KalmanFilterXYWH()
def init_track(self, dets, scores, cls, img=None):
"""Initialize track with detections, scores, and classes."""
"""Initialize object tracks using detection bounding boxes, scores, class labels, and optional ReID features."""
if len(dets) == 0:
return []
if self.args.with_reid and self.encoder is not None:
@ -175,7 +209,7 @@ class BOTSORT(BYTETracker):
return [BOTrack(xyxy, s, c) for (xyxy, s, c) in zip(dets, scores, cls)] # detections
def get_dists(self, tracks, detections):
"""Get distances between tracks and detections using IoU and (optionally) ReID embeddings."""
"""Calculates distances between tracks and detections using IoU and optionally ReID embeddings."""
dists = matching.iou_distance(tracks, detections)
dists_mask = dists > self.proximity_thresh
@ -190,10 +224,10 @@ class BOTSORT(BYTETracker):
return dists
def multi_predict(self, tracks):
"""Predict and track multiple objects with YOLOv8 model."""
"""Predicts the mean and covariance of multiple object tracks using a shared Kalman filter."""
BOTrack.multi_predict(tracks)
def reset(self):
"""Reset tracker."""
"""Resets the BOTSORT tracker to its initial state, clearing all tracked objects and internal states."""
super().reset()
self.gmc.reset_params()

@ -25,7 +25,7 @@ class STrack(BaseTrack):
is_activated (bool): Boolean flag indicating if the track has been activated.
score (float): Confidence score of the track.
tracklet_len (int): Length of the tracklet.
cls (any): Class label for the object.
cls (Any): Class label for the object.
idx (int): Index or identifier for the object.
frame_id (int): Current frame ID.
start_frame (int): Frame where the object was first detected.
@ -39,12 +39,31 @@ class STrack(BaseTrack):
update(new_track, frame_id): Update the state of a matched track.
convert_coords(tlwh): Convert bounding box to x-y-aspect-height format.
tlwh_to_xyah(tlwh): Convert tlwh bounding box to xyah format.
Examples:
Initialize and activate a new track
>>> track = STrack(xywh=[100, 200, 50, 80, 0], score=0.9, cls='person')
>>> track.activate(kalman_filter=KalmanFilterXYAH(), frame_id=1)
"""
shared_kalman = KalmanFilterXYAH()
def __init__(self, xywh, score, cls):
"""Initialize new STrack instance."""
"""
Initialize a new STrack instance.
Args:
xywh (List[float]): Bounding box coordinates and dimensions in the format (x, y, w, h, [a], idx), where
(x, y) is the center, (w, h) are width and height, [a] is optional aspect ratio, and idx is the id.
score (float): Confidence score of the detection.
cls (Any): Class label for the detected object.
Examples:
>>> xywh = [100.0, 150.0, 50.0, 75.0, 1]
>>> score = 0.9
>>> cls = 'person'
>>> track = STrack(xywh, score, cls)
"""
super().__init__()
# xywh+idx or xywha+idx
assert len(xywh) in {5, 6}, f"expected 5 or 6 values but got {len(xywh)}"
@ -60,7 +79,7 @@ class STrack(BaseTrack):
self.angle = xywh[4] if len(xywh) == 6 else None
def predict(self):
"""Predicts mean and covariance using Kalman filter."""
"""Predicts the next state (mean and covariance) of the object using the Kalman filter."""
mean_state = self.mean.copy()
if self.state != TrackState.Tracked:
mean_state[7] = 0
@ -68,7 +87,7 @@ class STrack(BaseTrack):
@staticmethod
def multi_predict(stracks):
"""Perform multi-object predictive tracking using Kalman filter for given stracks."""
"""Perform multi-object predictive tracking using Kalman filter for the provided list of STrack instances."""
if len(stracks) <= 0:
return
multi_mean = np.asarray([st.mean.copy() for st in stracks])
@ -83,7 +102,7 @@ class STrack(BaseTrack):
@staticmethod
def multi_gmc(stracks, H=np.eye(2, 3)):
"""Update state tracks positions and covariances using a homography matrix."""
"""Update state tracks positions and covariances using a homography matrix for multiple tracks."""
if len(stracks) > 0:
multi_mean = np.asarray([st.mean.copy() for st in stracks])
multi_covariance = np.asarray([st.covariance for st in stracks])
@ -101,7 +120,7 @@ class STrack(BaseTrack):
stracks[i].covariance = cov
def activate(self, kalman_filter, frame_id):
"""Start a new tracklet."""
"""Activate a new tracklet using the provided Kalman filter and initialize its state and covariance."""
self.kalman_filter = kalman_filter
self.track_id = self.next_id()
self.mean, self.covariance = self.kalman_filter.initiate(self.convert_coords(self._tlwh))
@ -114,7 +133,7 @@ class STrack(BaseTrack):
self.start_frame = frame_id
def re_activate(self, new_track, frame_id, new_id=False):
"""Reactivates a previously lost track with a new detection."""
"""Reactivates a previously lost track using new detection data and updates its state and attributes."""
self.mean, self.covariance = self.kalman_filter.update(
self.mean, self.covariance, self.convert_coords(new_track.tlwh)
)
@ -136,6 +155,12 @@ class STrack(BaseTrack):
Args:
new_track (STrack): The new track containing updated information.
frame_id (int): The ID of the current frame.
Examples:
Update the state of a track with new detection information
>>> track = STrack([100, 200, 50, 80, 0.9, 1])
>>> new_track = STrack([105, 205, 55, 85, 0.95, 1])
>>> track.update(new_track, 2)
"""
self.frame_id = frame_id
self.tracklet_len += 1
@ -158,7 +183,7 @@ class STrack(BaseTrack):
@property
def tlwh(self):
"""Get current position in bounding box format (top left x, top left y, width, height)."""
"""Returns the bounding box in top-left-width-height format from the current state estimate."""
if self.mean is None:
return self._tlwh.copy()
ret = self.mean[:4].copy()
@ -168,16 +193,14 @@ class STrack(BaseTrack):
@property
def xyxy(self):
"""Convert bounding box to format (min x, min y, max x, max y), i.e., (top left, bottom right)."""
"""Converts bounding box from (top left x, top left y, width, height) to (min x, min y, max x, max y) format."""
ret = self.tlwh.copy()
ret[2:] += ret[:2]
return ret
@staticmethod
def tlwh_to_xyah(tlwh):
"""Convert bounding box to format (center x, center y, aspect ratio, height), where the aspect ratio is width /
height.
"""
"""Convert bounding box from tlwh format to center-x-center-y-aspect-height (xyah) format."""
ret = np.asarray(tlwh).copy()
ret[:2] += ret[2:] / 2
ret[2] /= ret[3]
@ -185,14 +208,14 @@ class STrack(BaseTrack):
@property
def xywh(self):
"""Get current position in bounding box format (center x, center y, width, height)."""
"""Returns the current position of the bounding box in (center x, center y, width, height) format."""
ret = np.asarray(self.tlwh).copy()
ret[:2] += ret[2:] / 2
return ret
@property
def xywha(self):
"""Get current position in bounding box format (center x, center y, width, height, angle)."""
"""Returns position in (center x, center y, width, height, angle) format, warning if angle is missing."""
if self.angle is None:
LOGGER.warning("WARNING ⚠ `angle` attr not found, returning `xywh` instead.")
return self.xywh
@ -200,12 +223,12 @@ class STrack(BaseTrack):
@property
def result(self):
"""Get current tracking results."""
"""Returns the current tracking results in the appropriate bounding box format."""
coords = self.xyxy if self.angle is None else self.xywha
return coords.tolist() + [self.track_id, self.score, self.cls, self.idx]
def __repr__(self):
"""Return a string representation of the BYTETracker object with start and end frames and track ID."""
"""Returns a string representation of the STrack object including start frame, end frame, and track ID."""
return f"OT_{self.track_id}_({self.start_frame}-{self.end_frame})"
@ -213,18 +236,18 @@ class BYTETracker:
"""
BYTETracker: A tracking algorithm built on top of YOLOv8 for object detection and tracking.
The class is responsible for initializing, updating, and managing the tracks for detected objects in a video
sequence. It maintains the state of tracked, lost, and removed tracks over frames, utilizes Kalman filtering for
predicting the new object locations, and performs data association.
Responsible for initializing, updating, and managing the tracks for detected objects in a video sequence.
It maintains the state of tracked, lost, and removed tracks over frames, utilizes Kalman filtering for predicting
the new object locations, and performs data association.
Attributes:
tracked_stracks (list[STrack]): List of successfully activated tracks.
lost_stracks (list[STrack]): List of lost tracks.
removed_stracks (list[STrack]): List of removed tracks.
tracked_stracks (List[STrack]): List of successfully activated tracks.
lost_stracks (List[STrack]): List of lost tracks.
removed_stracks (List[STrack]): List of removed tracks.
frame_id (int): The current frame ID.
args (namespace): Command-line arguments.
args (Namespace): Command-line arguments.
max_time_lost (int): The maximum frames for a track to be considered as 'lost'.
kalman_filter (object): Kalman Filter object.
kalman_filter (KalmanFilterXYAH): Kalman Filter object.
Methods:
update(results, img=None): Updates object tracker with new detections.
@ -236,10 +259,27 @@ class BYTETracker:
joint_stracks(tlista, tlistb): Combines two lists of stracks.
sub_stracks(tlista, tlistb): Filters out the stracks present in the second list from the first list.
remove_duplicate_stracks(stracksa, stracksb): Removes duplicate stracks based on IoU.
Examples:
Initialize BYTETracker and update with detection results
>>> tracker = BYTETracker(args, frame_rate=30)
>>> results = yolo_model.detect(image)
>>> tracked_objects = tracker.update(results)
"""
def __init__(self, args, frame_rate=30):
"""Initialize a YOLOv8 object to track objects with given arguments and frame rate."""
"""
Initialize a BYTETracker instance for object tracking.
Args:
args (Namespace): Command-line arguments containing tracking parameters.
frame_rate (int): Frame rate of the video sequence.
Examples:
Initialize BYTETracker with command-line arguments and a frame rate of 30
>>> args = Namespace(track_buffer=30)
>>> tracker = BYTETracker(args, frame_rate=30)
"""
self.tracked_stracks = [] # type: list[STrack]
self.lost_stracks = [] # type: list[STrack]
self.removed_stracks = [] # type: list[STrack]
@ -251,7 +291,7 @@ class BYTETracker:
self.reset_id()
def update(self, results, img=None):
"""Updates object tracker with new detections and returns tracked object bounding boxes."""
"""Updates the tracker with new detections and returns the current list of tracked objects."""
self.frame_id += 1
activated_stracks = []
refind_stracks = []
@ -365,31 +405,31 @@ class BYTETracker:
return np.asarray([x.result for x in self.tracked_stracks if x.is_activated], dtype=np.float32)
def get_kalmanfilter(self):
"""Returns a Kalman filter object for tracking bounding boxes."""
"""Returns a Kalman filter object for tracking bounding boxes using KalmanFilterXYAH."""
return KalmanFilterXYAH()
def init_track(self, dets, scores, cls, img=None):
"""Initialize object tracking with detections and scores using STrack algorithm."""
"""Initializes object tracking with given detections, scores, and class labels using the STrack algorithm."""
return [STrack(xyxy, s, c) for (xyxy, s, c) in zip(dets, scores, cls)] if len(dets) else [] # detections
def get_dists(self, tracks, detections):
"""Calculates the distance between tracks and detections using IoU and fuses scores."""
"""Calculates the distance between tracks and detections using IoU and optionally fuses scores."""
dists = matching.iou_distance(tracks, detections)
if self.args.fuse_score:
dists = matching.fuse_score(dists, detections)
return dists
def multi_predict(self, tracks):
"""Returns the predicted tracks using the YOLOv8 network."""
"""Predict the next states for multiple tracks using Kalman filter."""
STrack.multi_predict(tracks)
@staticmethod
def reset_id():
"""Resets the ID counter of STrack."""
"""Resets the ID counter for STrack instances to ensure unique track IDs across tracking sessions."""
STrack.reset_id()
def reset(self):
"""Reset tracker."""
"""Resets the tracker by clearing all tracked, lost, and removed tracks and reinitializing the Kalman filter."""
self.tracked_stracks = [] # type: list[STrack]
self.lost_stracks = [] # type: list[STrack]
self.removed_stracks = [] # type: list[STrack]
@ -399,7 +439,7 @@ class BYTETracker:
@staticmethod
def joint_stracks(tlista, tlistb):
"""Combine two lists of stracks into a single one."""
"""Combines two lists of STrack objects into a single list, ensuring no duplicates based on track IDs."""
exists = {}
res = []
for t in tlista:
@ -414,20 +454,13 @@ class BYTETracker:
@staticmethod
def sub_stracks(tlista, tlistb):
"""DEPRECATED CODE in https://github.com/ultralytics/ultralytics/pull/1890/
stracks = {t.track_id: t for t in tlista}
for t in tlistb:
tid = t.track_id
if stracks.get(tid, 0):
del stracks[tid]
return list(stracks.values())
"""
"""Filters out the stracks present in the second list from the first list."""
track_ids_b = {t.track_id for t in tlistb}
return [t for t in tlista if t.track_id not in track_ids_b]
@staticmethod
def remove_duplicate_stracks(stracksa, stracksb):
"""Remove duplicate stracks with non-maximum IoU distance."""
"""Removes duplicate stracks from two lists based on Intersection over Union (IoU) distance."""
pdist = matching.iou_distance(stracksa, stracksb)
pairs = np.where(pdist < 0.15)
dupa, dupb = [], []

@ -21,10 +21,15 @@ def on_predict_start(predictor: object, persist: bool = False) -> None:
Args:
predictor (object): The predictor object to initialize trackers for.
persist (bool, optional): Whether to persist the trackers if they already exist. Defaults to False.
persist (bool): Whether to persist the trackers if they already exist.
Raises:
AssertionError: If the tracker_type is not 'bytetrack' or 'botsort'.
Examples:
Initialize trackers for a predictor object:
>>> predictor = SomePredictorClass()
>>> on_predict_start(predictor, persist=True)
"""
if hasattr(predictor, "trackers") and persist:
return
@ -51,7 +56,12 @@ def on_predict_postprocess_end(predictor: object, persist: bool = False) -> None
Args:
predictor (object): The predictor object containing the predictions.
persist (bool, optional): Whether to persist the trackers if they already exist. Defaults to False.
persist (bool): Whether to persist the trackers if they already exist.
Examples:
Postprocess predictions and update with tracking
>>> predictor = YourPredictorClass()
>>> on_predict_postprocess_end(predictor, persist=True)
"""
path, im0s = predictor.batch[:2]
@ -84,6 +94,11 @@ def register_tracker(model: object, persist: bool) -> None:
Args:
model (object): The model object to register tracking callbacks for.
persist (bool): Whether to persist the trackers if they already exist.
Examples:
Register tracking callbacks to a YOLO model
>>> model = YOLOModel()
>>> register_tracker(model, persist=True)
"""
model.add_callback("on_predict_start", partial(on_predict_start, persist=persist))
model.add_callback("on_predict_postprocess_end", partial(on_predict_postprocess_end, persist=persist))

@ -19,27 +19,39 @@ class GMC:
method (str): The method used for tracking. Options include 'orb', 'sift', 'ecc', 'sparseOptFlow', 'none'.
downscale (int): Factor by which to downscale the frames for processing.
prevFrame (np.ndarray): Stores the previous frame for tracking.
prevKeyPoints (list): Stores the keypoints from the previous frame.
prevKeyPoints (List): Stores the keypoints from the previous frame.
prevDescriptors (np.ndarray): Stores the descriptors from the previous frame.
initializedFirstFrame (bool): Flag to indicate if the first frame has been processed.
Methods:
__init__(self, method='sparseOptFlow', downscale=2): Initializes a GMC object with the specified method
and downscale factor.
apply(self, raw_frame, detections=None): Applies the chosen method to a raw frame and optionally uses
provided detections.
applyEcc(self, raw_frame, detections=None): Applies the ECC algorithm to a raw frame.
applyFeatures(self, raw_frame, detections=None): Applies feature-based methods like ORB or SIFT to a raw frame.
applySparseOptFlow(self, raw_frame, detections=None): Applies the Sparse Optical Flow method to a raw frame.
__init__: Initializes a GMC object with the specified method and downscale factor.
apply: Applies the chosen method to a raw frame and optionally uses provided detections.
applyEcc: Applies the ECC algorithm to a raw frame.
applyFeatures: Applies feature-based methods like ORB or SIFT to a raw frame.
applySparseOptFlow: Applies the Sparse Optical Flow method to a raw frame.
reset_params: Resets the internal parameters of the GMC object.
Examples:
Create a GMC object and apply it to a frame
>>> gmc = GMC(method='sparseOptFlow', downscale=2)
>>> frame = np.array([[1, 2, 3], [4, 5, 6]])
>>> processed_frame = gmc.apply(frame)
>>> print(processed_frame)
array([[1, 2, 3],
[4, 5, 6]])
"""
def __init__(self, method: str = "sparseOptFlow", downscale: int = 2) -> None:
"""
Initialize a video tracker with specified parameters.
Initialize a Generalized Motion Compensation (GMC) object with tracking method and downscale factor.
Args:
method (str): The method used for tracking. Options include 'orb', 'sift', 'ecc', 'sparseOptFlow', 'none'.
downscale (int): Downscale factor for processing frames.
Examples:
Initialize a GMC object with the 'sparseOptFlow' method and a downscale factor of 2
>>> gmc = GMC(method='sparseOptFlow', downscale=2)
"""
super().__init__()
@ -79,20 +91,21 @@ class GMC:
def apply(self, raw_frame: np.array, detections: list = None) -> np.array:
"""
Apply object detection on a raw frame using specified method.
Apply object detection on a raw frame using the specified method.
Args:
raw_frame (np.ndarray): The raw frame to be processed.
detections (list): List of detections to be used in the processing.
raw_frame (np.ndarray): The raw frame to be processed, with shape (H, W, C).
detections (List | None): List of detections to be used in the processing.
Returns:
(np.ndarray): Processed frame.
(np.ndarray): Processed frame with applied object detection.
Examples:
>>> gmc = GMC()
>>> gmc.apply(np.array([[1, 2, 3], [4, 5, 6]]))
array([[1, 2, 3],
[4, 5, 6]])
>>> gmc = GMC(method='sparseOptFlow')
>>> raw_frame = np.random.rand(480, 640, 3)
>>> processed_frame = gmc.apply(raw_frame)
>>> print(processed_frame.shape)
(480, 640, 3)
"""
if self.method in {"orb", "sift"}:
return self.applyFeatures(raw_frame, detections)
@ -105,19 +118,20 @@ class GMC:
def applyEcc(self, raw_frame: np.array) -> np.array:
"""
Apply ECC algorithm to a raw frame.
Apply the ECC (Enhanced Correlation Coefficient) algorithm to a raw frame for motion compensation.
Args:
raw_frame (np.ndarray): The raw frame to be processed.
raw_frame (np.ndarray): The raw frame to be processed, with shape (H, W, C).
Returns:
(np.ndarray): Processed frame.
(np.ndarray): The processed frame with the applied ECC transformation.
Examples:
>>> gmc = GMC()
>>> gmc.applyEcc(np.array([[1, 2, 3], [4, 5, 6]]))
array([[1, 2, 3],
[4, 5, 6]])
>>> gmc = GMC(method='ecc')
>>> processed_frame = gmc.applyEcc(np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]))
>>> print(processed_frame)
[[1. 0. 0.]
[0. 1. 0.]]
"""
height, width, _ = raw_frame.shape
frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY)
@ -127,8 +141,6 @@ class GMC:
if self.downscale > 1.0:
frame = cv2.GaussianBlur(frame, (3, 3), 1.5)
frame = cv2.resize(frame, (width // self.downscale, height // self.downscale))
width = width // self.downscale
height = height // self.downscale
# Handle first frame
if not self.initializedFirstFrame:
@ -154,17 +166,18 @@ class GMC:
Apply feature-based methods like ORB or SIFT to a raw frame.
Args:
raw_frame (np.ndarray): The raw frame to be processed.
detections (list): List of detections to be used in the processing.
raw_frame (np.ndarray): The raw frame to be processed, with shape (H, W, C).
detections (List | None): List of detections to be used in the processing.
Returns:
(np.ndarray): Processed frame.
Examples:
>>> gmc = GMC()
>>> gmc.applyFeatures(np.array([[1, 2, 3], [4, 5, 6]]))
array([[1, 2, 3],
[4, 5, 6]])
>>> gmc = GMC(method='orb')
>>> raw_frame = np.random.randint(0, 255, (480, 640, 3), dtype=np.uint8)
>>> processed_frame = gmc.applyFeatures(raw_frame)
>>> print(processed_frame.shape)
(2, 3)
"""
height, width, _ = raw_frame.shape
frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY)
@ -296,16 +309,17 @@ class GMC:
Apply Sparse Optical Flow method to a raw frame.
Args:
raw_frame (np.ndarray): The raw frame to be processed.
raw_frame (np.ndarray): The raw frame to be processed, with shape (H, W, C).
Returns:
(np.ndarray): Processed frame.
(np.ndarray): Processed frame with shape (2, 3).
Examples:
>>> gmc = GMC()
>>> gmc.applySparseOptFlow(np.array([[1, 2, 3], [4, 5, 6]]))
array([[1, 2, 3],
[4, 5, 6]])
>>> result = gmc.applySparseOptFlow(np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]))
>>> print(result)
[[1. 0. 0.]
[0. 1. 0.]]
"""
height, width, _ = raw_frame.shape
frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY)
@ -356,7 +370,7 @@ class GMC:
return H
def reset_params(self) -> None:
"""Reset parameters."""
"""Reset the internal parameters including previous frame, keypoints, and descriptors."""
self.prevFrame = None
self.prevKeyPoints = None
self.prevDescriptors = None

@ -6,17 +6,49 @@ import scipy.linalg
class KalmanFilterXYAH:
"""
For bytetrack. A simple Kalman filter for tracking bounding boxes in image space.
The 8-dimensional state space (x, y, a, h, vx, vy, va, vh) contains the bounding box center position (x, y), aspect
ratio a, height h, and their respective velocities.
Object motion follows a constant velocity model. The bounding box location (x, y, a, h) is taken as direct
observation of the state space (linear observation model).
A KalmanFilterXYAH class for tracking bounding boxes in image space using a Kalman filter.
Implements a simple Kalman filter for tracking bounding boxes in image space. The 8-dimensional state space
(x, y, a, h, vx, vy, va, vh) contains the bounding box center position (x, y), aspect ratio a, height h, and their
respective velocities. Object motion follows a constant velocity model, and bounding box location (x, y, a, h) is
taken as a direct observation of the state space (linear observation model).
Attributes:
_motion_mat (np.ndarray): The motion matrix for the Kalman filter.
_update_mat (np.ndarray): The update matrix for the Kalman filter.
_std_weight_position (float): Standard deviation weight for position.
_std_weight_velocity (float): Standard deviation weight for velocity.
Methods:
initiate: Creates a track from an unassociated measurement.
predict: Runs the Kalman filter prediction step.
project: Projects the state distribution to measurement space.
multi_predict: Runs the Kalman filter prediction step (vectorized version).
update: Runs the Kalman filter correction step.
gating_distance: Computes the gating distance between state distribution and measurements.
Examples:
Initialize the Kalman filter and create a track from a measurement
>>> kf = KalmanFilterXYAH()
>>> measurement = np.array([100, 200, 1.5, 50])
>>> mean, covariance = kf.initiate(measurement)
>>> print(mean)
>>> print(covariance)
"""
def __init__(self):
"""Initialize Kalman filter model matrices with motion and observation uncertainty weights."""
"""
Initialize Kalman filter model matrices with motion and observation uncertainty weights.
The Kalman filter is initialized with an 8-dimensional state space (x, y, a, h, vx, vy, va, vh), where (x, y)
represents the bounding box center position, 'a' is the aspect ratio, 'h' is the height, and their respective
velocities are (vx, vy, va, vh). The filter uses a constant velocity model for object motion and a linear
observation model for bounding box location.
Examples:
Initialize a Kalman filter for tracking:
>>> kf = KalmanFilterXYAH()
"""
ndim, dt = 4, 1.0
# Create Kalman filter model matrices
@ -32,15 +64,20 @@ class KalmanFilterXYAH:
def initiate(self, measurement: np.ndarray) -> tuple:
"""
Create track from unassociated measurement.
Create a track from an unassociated measurement.
Args:
measurement (ndarray): Bounding box coordinates (x, y, a, h) with center position (x, y), aspect ratio a,
and height h.
Returns:
(tuple[ndarray, ndarray]): Returns the mean vector (8 dimensional) and covariance matrix (8x8 dimensional)
(tuple[ndarray, ndarray]): Returns the mean vector (8-dimensional) and covariance matrix (8x8 dimensional)
of the new track. Unobserved velocities are initialized to 0 mean.
Examples:
>>> kf = KalmanFilterXYAH()
>>> measurement = np.array([100, 50, 1.5, 200])
>>> mean, covariance = kf.initiate(measurement)
"""
mean_pos = measurement
mean_vel = np.zeros_like(mean_pos)
@ -64,12 +101,18 @@ class KalmanFilterXYAH:
Run Kalman filter prediction step.
Args:
mean (ndarray): The 8 dimensional mean vector of the object state at the previous time step.
covariance (ndarray): The 8x8 dimensional covariance matrix of the object state at the previous time step.
mean (ndarray): The 8-dimensional mean vector of the object state at the previous time step.
covariance (ndarray): The 8x8-dimensional covariance matrix of the object state at the previous time step.
Returns:
(tuple[ndarray, ndarray]): Returns the mean vector and covariance matrix of the predicted state. Unobserved
velocities are initialized to 0 mean.
Examples:
>>> kf = KalmanFilterXYAH()
>>> mean = np.array([0, 0, 1, 1, 0, 0, 0, 0])
>>> covariance = np.eye(8)
>>> predicted_mean, predicted_covariance = kf.predict(mean, covariance)
"""
std_pos = [
self._std_weight_position * mean[3],
@ -100,6 +143,12 @@ class KalmanFilterXYAH:
Returns:
(tuple[ndarray, ndarray]): Returns the projected mean and covariance matrix of the given state estimate.
Examples:
>>> kf = KalmanFilterXYAH()
>>> mean = np.array([0, 0, 1, 1, 0, 0, 0, 0])
>>> covariance = np.eye(8)
>>> projected_mean, projected_covariance = kf.project(mean, covariance)
"""
std = [
self._std_weight_position * mean[3],
@ -115,15 +164,21 @@ class KalmanFilterXYAH:
def multi_predict(self, mean: np.ndarray, covariance: np.ndarray) -> tuple:
"""
Run Kalman filter prediction step (Vectorized version).
Run Kalman filter prediction step for multiple object states (Vectorized version).
Args:
mean (ndarray): The Nx8 dimensional mean matrix of the object states at the previous time step.
covariance (ndarray): The Nx8x8 covariance matrix of the object states at the previous time step.
Returns:
(tuple[ndarray, ndarray]): Returns the mean vector and covariance matrix of the predicted state. Unobserved
velocities are initialized to 0 mean.
(tuple[ndarray, ndarray]): Returns the mean matrix and covariance matrix of the predicted states.
The mean matrix has shape (N, 8) and the covariance matrix has shape (N, 8, 8). Unobserved velocities
are initialized to 0 mean.
Examples:
>>> mean = np.random.rand(10, 8) # 10 object states
>>> covariance = np.random.rand(10, 8, 8) # Covariance matrices for 10 object states
>>> predicted_mean, predicted_covariance = kalman_filter.multi_predict(mean, covariance)
"""
std_pos = [
self._std_weight_position * mean[:, 3],
@ -160,6 +215,13 @@ class KalmanFilterXYAH:
Returns:
(tuple[ndarray, ndarray]): Returns the measurement-corrected state distribution.
Examples:
>>> kf = KalmanFilterXYAH()
>>> mean = np.array([0, 0, 1, 1, 0, 0, 0, 0])
>>> covariance = np.eye(8)
>>> measurement = np.array([1, 1, 1, 1])
>>> new_mean, new_covariance = kf.update(mean, covariance, measurement)
"""
projected_mean, projected_cov = self.project(mean, covariance)
@ -182,23 +244,31 @@ class KalmanFilterXYAH:
metric: str = "maha",
) -> np.ndarray:
"""
Compute gating distance between state distribution and measurements. A suitable distance threshold can be
obtained from `chi2inv95`. If `only_position` is False, the chi-square distribution has 4 degrees of freedom,
otherwise 2.
Compute gating distance between state distribution and measurements.
A suitable distance threshold can be obtained from `chi2inv95`. If `only_position` is False, the chi-square
distribution has 4 degrees of freedom, otherwise 2.
Args:
mean (ndarray): Mean vector over the state distribution (8 dimensional).
covariance (ndarray): Covariance of the state distribution (8x8 dimensional).
measurements (ndarray): An Nx4 matrix of N measurements, each in format (x, y, a, h) where (x, y)
is the bounding box center position, a the aspect ratio, and h the height.
only_position (bool, optional): If True, distance computation is done with respect to the bounding box
center position only. Defaults to False.
metric (str, optional): The metric to use for calculating the distance. Options are 'gaussian' for the
squared Euclidean distance and 'maha' for the squared Mahalanobis distance. Defaults to 'maha'.
measurements (ndarray): An (N, 4) matrix of N measurements, each in format (x, y, a, h) where (x, y) is the
bounding box center position, a the aspect ratio, and h the height.
only_position (bool): If True, distance computation is done with respect to box center position only.
metric (str): The metric to use for calculating the distance. Options are 'gaussian' for the squared
Euclidean distance and 'maha' for the squared Mahalanobis distance.
Returns:
(np.ndarray): Returns an array of length N, where the i-th element contains the squared distance between
(mean, covariance) and `measurements[i]`.
Examples:
Compute gating distance using Mahalanobis metric:
>>> kf = KalmanFilterXYAH()
>>> mean = np.array([0, 0, 1, 1, 0, 0, 0, 0])
>>> covariance = np.eye(8)
>>> measurements = np.array([[1, 1, 1, 1], [2, 2, 1, 1]])
>>> distances = kf.gating_distance(mean, covariance, measurements, only_position=False, metric='maha')
"""
mean, covariance = self.project(mean, covariance)
if only_position:
@ -218,13 +288,33 @@ class KalmanFilterXYAH:
class KalmanFilterXYWH(KalmanFilterXYAH):
"""
For BoT-SORT. A simple Kalman filter for tracking bounding boxes in image space.
The 8-dimensional state space (x, y, w, h, vx, vy, vw, vh) contains the bounding box center position (x, y), width
w, height h, and their respective velocities.
A KalmanFilterXYWH class for tracking bounding boxes in image space using a Kalman filter.
Object motion follows a constant velocity model. The bounding box location (x, y, w, h) is taken as direct
Implements a Kalman filter for tracking bounding boxes with state space (x, y, w, h, vx, vy, vw, vh), where
(x, y) is the center position, w is the width, h is the height, and vx, vy, vw, vh are their respective velocities.
The object motion follows a constant velocity model, and the bounding box location (x, y, w, h) is taken as a direct
observation of the state space (linear observation model).
Attributes:
_motion_mat (np.ndarray): The motion matrix for the Kalman filter.
_update_mat (np.ndarray): The update matrix for the Kalman filter.
_std_weight_position (float): Standard deviation weight for position.
_std_weight_velocity (float): Standard deviation weight for velocity.
Methods:
initiate: Creates a track from an unassociated measurement.
predict: Runs the Kalman filter prediction step.
project: Projects the state distribution to measurement space.
multi_predict: Runs the Kalman filter prediction step in a vectorized manner.
update: Runs the Kalman filter correction step.
Examples:
Create a Kalman filter and initialize a track
>>> kf = KalmanFilterXYWH()
>>> measurement = np.array([100, 50, 20, 40])
>>> mean, covariance = kf.initiate(measurement)
>>> print(mean)
>>> print(covariance)
"""
def initiate(self, measurement: np.ndarray) -> tuple:
@ -237,6 +327,22 @@ class KalmanFilterXYWH(KalmanFilterXYAH):
Returns:
(tuple[ndarray, ndarray]): Returns the mean vector (8 dimensional) and covariance matrix (8x8 dimensional)
of the new track. Unobserved velocities are initialized to 0 mean.
Examples:
>>> kf = KalmanFilterXYWH()
>>> measurement = np.array([100, 50, 20, 40])
>>> mean, covariance = kf.initiate(measurement)
>>> print(mean)
[100. 50. 20. 40. 0. 0. 0. 0.]
>>> print(covariance)
[[ 4. 0. 0. 0. 0. 0. 0. 0.]
[ 0. 4. 0. 0. 0. 0. 0. 0.]
[ 0. 0. 4. 0. 0. 0. 0. 0.]
[ 0. 0. 0. 4. 0. 0. 0. 0.]
[ 0. 0. 0. 0. 0.25 0. 0. 0.]
[ 0. 0. 0. 0. 0. 0.25 0. 0.]
[ 0. 0. 0. 0. 0. 0. 0.25 0.]
[ 0. 0. 0. 0. 0. 0. 0. 0.25]]
"""
mean_pos = measurement
mean_vel = np.zeros_like(mean_pos)
@ -260,12 +366,18 @@ class KalmanFilterXYWH(KalmanFilterXYAH):
Run Kalman filter prediction step.
Args:
mean (ndarray): The 8 dimensional mean vector of the object state at the previous time step.
covariance (ndarray): The 8x8 dimensional covariance matrix of the object state at the previous time step.
mean (ndarray): The 8-dimensional mean vector of the object state at the previous time step.
covariance (ndarray): The 8x8-dimensional covariance matrix of the object state at the previous time step.
Returns:
(tuple[ndarray, ndarray]): Returns the mean vector and covariance matrix of the predicted state. Unobserved
velocities are initialized to 0 mean.
Examples:
>>> kf = KalmanFilterXYWH()
>>> mean = np.array([0, 0, 1, 1, 0, 0, 0, 0])
>>> covariance = np.eye(8)
>>> predicted_mean, predicted_covariance = kf.predict(mean, covariance)
"""
std_pos = [
self._std_weight_position * mean[2],
@ -296,6 +408,12 @@ class KalmanFilterXYWH(KalmanFilterXYAH):
Returns:
(tuple[ndarray, ndarray]): Returns the projected mean and covariance matrix of the given state estimate.
Examples:
>>> kf = KalmanFilterXYWH()
>>> mean = np.array([0, 0, 1, 1, 0, 0, 0, 0])
>>> covariance = np.eye(8)
>>> projected_mean, projected_cov = kf.project(mean, covariance)
"""
std = [
self._std_weight_position * mean[2],
@ -320,6 +438,12 @@ class KalmanFilterXYWH(KalmanFilterXYAH):
Returns:
(tuple[ndarray, ndarray]): Returns the mean vector and covariance matrix of the predicted state. Unobserved
velocities are initialized to 0 mean.
Examples:
>>> mean = np.random.rand(5, 8) # 5 objects with 8-dimensional state vectors
>>> covariance = np.random.rand(5, 8, 8) # 5 objects with 8x8 covariance matrices
>>> kf = KalmanFilterXYWH()
>>> predicted_mean, predicted_covariance = kf.multi_predict(mean, covariance)
"""
std_pos = [
self._std_weight_position * mean[:, 2],
@ -356,5 +480,12 @@ class KalmanFilterXYWH(KalmanFilterXYAH):
Returns:
(tuple[ndarray, ndarray]): Returns the measurement-corrected state distribution.
Examples:
>>> kf = KalmanFilterXYWH()
>>> mean = np.array([0, 0, 1, 1, 0, 0, 0, 0])
>>> covariance = np.eye(8)
>>> measurement = np.array([0.5, 0.5, 1.2, 1.2])
>>> new_mean, new_covariance = kf.update(mean, covariance, measurement)
"""
return super().update(mean, covariance, measurement)

@ -19,18 +19,23 @@ except (ImportError, AssertionError, AttributeError):
def linear_assignment(cost_matrix: np.ndarray, thresh: float, use_lap: bool = True) -> tuple:
"""
Perform linear assignment using scipy or lap.lapjv.
Perform linear assignment using either the scipy or lap.lapjv method.
Args:
cost_matrix (np.ndarray): The matrix containing cost values for assignments.
cost_matrix (np.ndarray): The matrix containing cost values for assignments, with shape (N, M).
thresh (float): Threshold for considering an assignment valid.
use_lap (bool, optional): Whether to use lap.lapjv. Defaults to True.
use_lap (bool): Use lap.lapjv for the assignment. If False, scipy.optimize.linear_sum_assignment is used.
Returns:
Tuple with:
- matched indices
- unmatched indices from 'a'
- unmatched indices from 'b'
(tuple): A tuple containing:
- matched_indices (np.ndarray): Array of matched indices of shape (K, 2), where K is the number of matches.
- unmatched_a (np.ndarray): Array of unmatched indices from the first set, with shape (L,).
- unmatched_b (np.ndarray): Array of unmatched indices from the second set, with shape (M,).
Examples:
>>> cost_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
>>> thresh = 5.0
>>> matched_indices, unmatched_a, unmatched_b = linear_assignment(cost_matrix, thresh, use_lap=True)
"""
if cost_matrix.size == 0:
@ -68,6 +73,12 @@ def iou_distance(atracks: list, btracks: list) -> np.ndarray:
Returns:
(np.ndarray): Cost matrix computed based on IoU.
Examples:
Compute IoU distance between two sets of tracks
>>> atracks = [np.array([0, 0, 10, 10]), np.array([20, 20, 30, 30])]
>>> btracks = [np.array([5, 5, 15, 15]), np.array([25, 25, 35, 35])]
>>> cost_matrix = iou_distance(atracks, btracks)
"""
if atracks and isinstance(atracks[0], np.ndarray) or btracks and isinstance(btracks[0], np.ndarray):
@ -98,12 +109,19 @@ def embedding_distance(tracks: list, detections: list, metric: str = "cosine") -
Compute distance between tracks and detections based on embeddings.
Args:
tracks (list[STrack]): List of tracks.
detections (list[BaseTrack]): List of detections.
metric (str, optional): Metric for distance computation. Defaults to 'cosine'.
tracks (list[STrack]): List of tracks, where each track contains embedding features.
detections (list[BaseTrack]): List of detections, where each detection contains embedding features.
metric (str): Metric for distance computation. Supported metrics include 'cosine', 'euclidean', etc.
Returns:
(np.ndarray): Cost matrix computed based on embeddings.
(np.ndarray): Cost matrix computed based on embeddings with shape (N, M), where N is the number of tracks
and M is the number of detections.
Examples:
Compute the embedding distance between tracks and detections using cosine metric
>>> tracks = [STrack(...), STrack(...)] # List of track objects with embedding features
>>> detections = [BaseTrack(...), BaseTrack(...)] # List of detection objects with embedding features
>>> cost_matrix = embedding_distance(tracks, detections, metric='cosine')
"""
cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float32)
@ -122,11 +140,17 @@ def fuse_score(cost_matrix: np.ndarray, detections: list) -> np.ndarray:
Fuses cost matrix with detection scores to produce a single similarity matrix.
Args:
cost_matrix (np.ndarray): The matrix containing cost values for assignments.
detections (list[BaseTrack]): List of detections with scores.
cost_matrix (np.ndarray): The matrix containing cost values for assignments, with shape (N, M).
detections (list[BaseTrack]): List of detections, each containing a score attribute.
Returns:
(np.ndarray): Fused similarity matrix.
(np.ndarray): Fused similarity matrix with shape (N, M).
Examples:
Fuse a cost matrix with detection scores
>>> cost_matrix = np.random.rand(5, 10) # 5 tracks and 10 detections
>>> detections = [BaseTrack(score=np.random.rand()) for _ in range(10)]
>>> fused_matrix = fuse_score(cost_matrix, detections)
"""
if cost_matrix.size == 0:

@ -47,7 +47,7 @@ PYTHON_VERSION = platform.python_version()
TORCH_VERSION = torch.__version__
TORCHVISION_VERSION = importlib.metadata.version("torchvision") # faster than importing torchvision
HELP_MSG = """
Usage examples for running Ultralytics YOLO:
Examples for running Ultralytics:
1. Install the ultralytics package:

@ -11,19 +11,44 @@ from pathlib import Path
class WorkingDirectory(contextlib.ContextDecorator):
"""Usage: @WorkingDirectory(dir) decorator or 'with WorkingDirectory(dir):' context manager."""
"""
A context manager and decorator for temporarily changing the working directory.
This class allows for the temporary change of the working directory using a context manager or decorator.
It ensures that the original working directory is restored after the context or decorated function completes.
Attributes:
dir (Path): The new directory to switch to.
cwd (Path): The original current working directory before the switch.
Methods:
__enter__: Changes the current directory to the specified directory.
__exit__: Restores the original working directory on context exit.
Examples:
Using as a context manager:
>>> with WorkingDirectory('/path/to/new/dir'):
>>> # Perform operations in the new directory
>>> pass
Using as a decorator:
>>> @WorkingDirectory('/path/to/new/dir')
>>> def some_function():
>>> # Perform operations in the new directory
>>> pass
"""
def __init__(self, new_dir):
"""Sets the working directory to 'new_dir' upon instantiation."""
"""Sets the working directory to 'new_dir' upon instantiation for use with context managers or decorators."""
self.dir = new_dir # new dir
self.cwd = Path.cwd().resolve() # current dir
def __enter__(self):
"""Changes the current directory to the specified directory."""
"""Changes the current working directory to the specified directory upon entering the context."""
os.chdir(self.dir)
def __exit__(self, exc_type, exc_val, exc_tb): # noqa
"""Restore the current working directory on context exit."""
"""Restores the original working directory when exiting the context."""
os.chdir(self.cwd)
@ -35,18 +60,16 @@ def spaces_in_path(path):
file/directory back to its original location.
Args:
path (str | Path): The original path.
path (str | Path): The original path that may contain spaces.
Yields:
(Path): Temporary path with spaces replaced by underscores if spaces were present, otherwise the original path.
Example:
```python
with ultralytics.utils.files import spaces_in_path
with spaces_in_path('/path/with spaces') as new_path:
# Your code here
```
Examples:
Use the context manager to handle paths with spaces:
>>> from ultralytics.utils.files import spaces_in_path
>>> with spaces_in_path('/path/with spaces') as new_path:
>>> # Your code here
"""
# If path has spaces, replace them with underscores
@ -84,21 +107,35 @@ def spaces_in_path(path):
def increment_path(path, exist_ok=False, sep="", mkdir=False):
"""
Increments a file or directory path, i.e. runs/exp --> runs/exp{sep}2, runs/exp{sep}3, ... etc.
Increments a file or directory path, i.e., runs/exp --> runs/exp{sep}2, runs/exp{sep}3, ... etc.
If the path exists and exist_ok is not set to True, the path will be incremented by appending a number and sep to
If the path exists and `exist_ok` is not True, the path will be incremented by appending a number and `sep` to
the end of the path. If the path is a file, the file extension will be preserved. If the path is a directory, the
number will be appended directly to the end of the path. If mkdir is set to True, the path will be created as a
number will be appended directly to the end of the path. If `mkdir` is set to True, the path will be created as a
directory if it does not already exist.
Args:
path (str, pathlib.Path): Path to increment.
exist_ok (bool, optional): If True, the path will not be incremented and returned as-is. Defaults to False.
sep (str, optional): Separator to use between the path and the incrementation number. Defaults to ''.
mkdir (bool, optional): Create a directory if it does not exist. Defaults to False.
path (str | pathlib.Path): Path to increment.
exist_ok (bool): If True, the path will not be incremented and returned as-is.
sep (str): Separator to use between the path and the incrementation number.
mkdir (bool): Create a directory if it does not exist.
Returns:
(pathlib.Path): Incremented path.
Examples:
Increment a directory path:
>>> from pathlib import Path
>>> path = Path("runs/exp")
>>> new_path = increment_path(path)
>>> print(new_path)
runs/exp2
Increment a file path:
>>> path = Path("runs/exp/results.txt")
>>> new_path = increment_path(path)
>>> print(new_path)
runs/exp/results2.txt
"""
path = Path(path) # os-agnostic
if path.exists() and not exist_ok:
@ -118,19 +155,19 @@ def increment_path(path, exist_ok=False, sep="", mkdir=False):
def file_age(path=__file__):
"""Return days since last file update."""
"""Return days since the last modification of the specified file."""
dt = datetime.now() - datetime.fromtimestamp(Path(path).stat().st_mtime) # delta
return dt.days # + dt.seconds / 86400 # fractional days
def file_date(path=__file__):
"""Return human-readable file modification date, i.e. '2021-3-26'."""
"""Returns the file modification date in 'YYYY-M-D' format."""
t = datetime.fromtimestamp(Path(path).stat().st_mtime)
return f"{t.year}-{t.month}-{t.day}"
def file_size(path):
"""Return file/dir size (MB)."""
"""Returns the size of a file or directory in megabytes (MB)."""
if isinstance(path, (str, Path)):
mb = 1 << 20 # bytes to MiB (1024 ** 2)
path = Path(path)
@ -142,7 +179,7 @@ def file_size(path):
def get_latest_run(search_dir="."):
"""Return path to most recent 'last.pt' in /runs (i.e. to --resume from)."""
"""Returns the path to the most recent 'last.pt' file in the specified directory for resuming training."""
last_list = glob.glob(f"{search_dir}/**/last*.pt", recursive=True)
return max(last_list, key=os.path.getctime) if last_list else ""
@ -152,17 +189,15 @@ def update_models(model_names=("yolov8n.pt",), source_dir=Path("."), update_name
Updates and re-saves specified YOLO models in an 'updated_models' subdirectory.
Args:
model_names (tuple, optional): Model filenames to update, defaults to ("yolov8n.pt").
source_dir (Path, optional): Directory containing models and target subdirectory, defaults to current directory.
update_names (bool, optional): Update model names from a data YAML.
Example:
```python
from ultralytics.utils.files import update_models
model_names = (f"rtdetr-{size}.pt" for size in "lx")
update_models(model_names)
```
model_names (Tuple[str, ...]): Model filenames to update.
source_dir (Path): Directory containing models and target subdirectory.
update_names (bool): Update model names from a data YAML.
Examples:
Update specified YOLO models and save them in 'updated_models' subdirectory:
>>> from ultralytics.utils.files import update_models
>>> model_names = ("yolov8n.pt", "yolov8s.pt")
>>> update_models(model_names, source_dir=Path("/models"), update_names=True)
"""
from ultralytics import YOLO
from ultralytics.nn.autobackend import default_class_names

@ -369,7 +369,7 @@ class Annotator:
# Convert im back to PIL and update draw
self.fromarray(self.im)
def kpts(self, kpts, shape=(640, 640), radius=5, kpt_line=True, conf_thres=0.25):
def kpts(self, kpts, shape=(640, 640), radius=5, kpt_line=True, conf_thres=0.25, kpt_color=None):
"""
Plot keypoints on the image.
@ -379,6 +379,7 @@ class Annotator:
radius (int, optional): Radius of the drawn keypoints. Default is 5.
kpt_line (bool, optional): If True, the function will draw lines connecting keypoints
for human pose. Default is True.
kpt_color (tuple, optional): The color of the keypoints (B, G, R).
Note:
`kpt_line=True` currently only supports human pose plotting.
@ -391,7 +392,7 @@ class Annotator:
is_pose = nkpt == 17 and ndim in {2, 3}
kpt_line &= is_pose # `kpt_line=True` for now only supports human pose plotting
for i, k in enumerate(kpts):
color_k = [int(x) for x in self.kpt_color[i]] if is_pose else colors(i)
color_k = kpt_color or (self.kpt_color[i].tolist() if is_pose else colors(i))
x_coord, y_coord = k[0], k[1]
if x_coord % shape[1] != 0 and y_coord % shape[0] != 0:
if len(k) == 3:
@ -414,7 +415,14 @@ class Annotator:
continue
if pos2[0] % shape[1] == 0 or pos2[1] % shape[0] == 0 or pos2[0] < 0 or pos2[1] < 0:
continue
cv2.line(self.im, pos1, pos2, [int(x) for x in self.limb_color[i]], thickness=2, lineType=cv2.LINE_AA)
cv2.line(
self.im,
pos1,
pos2,
kpt_color or self.limb_color[i].tolist(),
thickness=2,
lineType=cv2.LINE_AA,
)
if self.pil:
# Convert im back to PIL and update draw
self.fromarray(self.im)

Loading…
Cancel
Save