Merge branch 'main' into mask_select_strat

pull/16826/head
Laughing 1 week ago committed by GitHub
commit 7442e6a752
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 4
      .github/ISSUE_TEMPLATE/bug-report.yml
  2. 47
      .github/workflows/ci.yaml
  3. 18
      .github/workflows/docker.yaml
  4. 6
      .github/workflows/docs.yml
  5. 2
      .github/workflows/format.yml
  6. 20
      .github/workflows/links.yml
  7. 19
      .github/workflows/publish.yml
  8. 1
      .gitignore
  9. 1
      docker/Dockerfile
  10. 12
      docker/Dockerfile-cpu
  11. 1
      docker/Dockerfile-runner
  12. 1
      docs/en/datasets/index.md
  13. 141
      docs/en/datasets/pose/dog-pose.md
  14. 9
      docs/en/datasets/pose/index.md
  15. 131
      docs/en/guides/analytics.md
  16. 1
      docs/en/guides/distance-calculation.md
  17. 166
      docs/en/guides/heatmaps.md
  18. 165
      docs/en/guides/object-counting.md
  19. 52
      docs/en/guides/queue-management.md
  20. 85
      docs/en/guides/region-counting.md
  21. 12
      docs/en/guides/speed-estimation.md
  22. 12
      docs/en/guides/streamlit-live-inference.md
  23. 36
      docs/en/guides/workouts-monitoring.md
  24. 6
      docs/en/hub/models.md
  25. 39
      docs/en/integrations/albumentations.md
  26. 2
      docs/en/integrations/index.md
  27. 325
      docs/en/integrations/sony-imx500.md
  28. 1
      docs/en/macros/export-table.md
  29. 2
      docs/en/macros/predict-args.md
  30. 1
      docs/en/macros/train-args.md
  31. 2
      docs/en/macros/validation-args.md
  32. 21
      docs/en/modes/benchmark.md
  33. 16
      docs/en/reference/solutions/region_counter.md
  34. 4
      docs/en/reference/utils/torch_utils.md
  35. 6
      docs/mkdocs_github_authors.yaml
  36. 343
      docs/overrides/javascript/extra.js
  37. 19
      docs/overrides/javascript/giscus.js
  38. 11
      docs/overrides/stylesheets/style.css
  39. 10
      mkdocs.yml
  40. 9
      tests/test_exports.py
  41. 2
      ultralytics/__init__.py
  42. 3
      ultralytics/cfg/__init__.py
  43. 23
      ultralytics/cfg/datasets/dog-pose.yaml
  44. 5
      ultralytics/data/augment.py
  45. 5
      ultralytics/data/converter.py
  46. 179
      ultralytics/engine/exporter.py
  47. 4
      ultralytics/engine/model.py
  48. 6
      ultralytics/engine/results.py
  49. 17
      ultralytics/engine/trainer.py
  50. 3
      ultralytics/models/fastsam/predict.py
  51. 3
      ultralytics/models/rtdetr/train.py
  52. 7
      ultralytics/models/yolo/detect/train.py
  53. 4
      ultralytics/models/yolo/detect/val.py
  54. 4
      ultralytics/models/yolo/pose/val.py
  55. 4
      ultralytics/models/yolo/segment/val.py
  56. 27
      ultralytics/nn/autobackend.py
  57. 3
      ultralytics/nn/modules/block.py
  58. 12
      ultralytics/nn/modules/head.py
  59. 2
      ultralytics/solutions/__init__.py
  60. 2
      ultralytics/solutions/analytics.py
  61. 4
      ultralytics/solutions/heatmap.py
  62. 78
      ultralytics/solutions/object_counter.py
  63. 112
      ultralytics/solutions/region_counter.py
  64. 2
      ultralytics/solutions/solutions.py
  65. 12
      ultralytics/utils/autobatch.py
  66. 7
      ultralytics/utils/benchmarks.py
  67. 3
      ultralytics/utils/callbacks/raytune.py
  68. 9
      ultralytics/utils/callbacks/wb.py
  69. 44
      ultralytics/utils/tal.py
  70. 55
      ultralytics/utils/torch_utils.py

@ -52,9 +52,9 @@ body:
- type: textarea
attributes:
label: Environment
description: Many issues are often related to dependency versions and hardware. Please provide the output of `yolo checks` or `ultralytics.checks()` command to help us diagnose the problem.
description: Many issues are often related to dependency versions and hardware. Please provide the output of `yolo checks` (CLI) or `ultralytics.utils.checks.collect_system_info()` (Python) command to help us diagnose the problem.
placeholder: |
Paste output of `yolo checks` or `ultralytics.checks()` command, i.e.:
Paste output of `yolo checks` (CLI) or `ultralytics.utils.checks.collect_system_info()` (Python) command, i.e.:
```
Ultralytics 8.3.2 🚀 Python-3.11.2 torch-2.4.1 CPU (Apple M3)
Setup complete ✅ (8 CPUs, 16.0 GB RAM, 266.5/460.4 GB disk)

@ -52,16 +52,15 @@ jobs:
- uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: "pip" # caching pip dependencies
- uses: astral-sh/setup-uv@v3
- name: Install requirements
shell: bash # for Windows compatibility
run: |
python -m pip install --upgrade pip wheel
pip install . --extra-index-url https://download.pytorch.org/whl/cpu
uv pip install --system . --extra-index-url https://download.pytorch.org/whl/cpu
- name: Check environment
run: |
yolo checks
pip list
uv pip list
- name: Test HUB training
shell: python
env:
@ -111,6 +110,7 @@ jobs:
- name: Install requirements
shell: bash # for Windows compatibility
run: |
# Warnings: uv causes numpy errors during benchmarking
python -m pip install --upgrade pip wheel
pip install -e ".[export]" "coverage[toml]" --extra-index-url https://download.pytorch.org/whl/cpu
- name: Check environment
@ -143,7 +143,7 @@ jobs:
coverage xml -o coverage-benchmarks.xml
- name: Upload Coverage Reports to CodeCov
if: github.repository == 'ultralytics/ultralytics'
uses: codecov/codecov-action@v4
uses: codecov/codecov-action@v5
with:
flags: Benchmarks
env:
@ -172,12 +172,11 @@ jobs:
- uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: "pip" # caching pip dependencies
- uses: astral-sh/setup-uv@v3
- name: Install requirements
shell: bash # for Windows compatibility
run: |
# CoreML must be installed before export due to protobuf error from AutoInstall
python -m pip install --upgrade pip wheel
slow=""
torch=""
if [ "${{ matrix.torch }}" == "1.8.0" ]; then
@ -186,11 +185,11 @@ jobs:
if [[ "${{ github.event_name }}" =~ ^(schedule|workflow_dispatch)$ ]]; then
slow="pycocotools mlflow"
fi
pip install -e ".[export]" $torch $slow pytest-cov --extra-index-url https://download.pytorch.org/whl/cpu
uv pip install --system -e ".[export]" $torch $slow pytest-cov --extra-index-url https://download.pytorch.org/whl/cpu
- name: Check environment
run: |
yolo checks
pip list
uv pip list
- name: Pytest tests
shell: bash # for Windows compatibility
run: |
@ -201,7 +200,7 @@ jobs:
pytest $slow --cov=ultralytics/ --cov-report xml tests/
- name: Upload Coverage Reports to CodeCov
if: github.repository == 'ultralytics/ultralytics' # && matrix.os == 'ubuntu-latest' && matrix.python-version == '3.11'
uses: codecov/codecov-action@v4
uses: codecov/codecov-action@v5
with:
flags: Tests
env:
@ -213,12 +212,13 @@ jobs:
runs-on: gpu-latest
steps:
- uses: actions/checkout@v4
- uses: astral-sh/setup-uv@v3
- name: Install requirements
run: pip install . pytest-cov
run: uv pip install --system . pytest-cov
- name: Check environment
run: |
yolo checks
pip list
uv pip list
- name: Pytest tests
run: |
slow=""
@ -227,7 +227,7 @@ jobs:
fi
pytest $slow --cov=ultralytics/ --cov-report xml tests/test_cuda.py
- name: Upload Coverage Reports to CodeCov
uses: codecov/codecov-action@v4
uses: codecov/codecov-action@v5
with:
flags: GPU
env:
@ -294,13 +294,8 @@ jobs:
channels: conda-forge,defaults
channel-priority: true
activate-environment: anaconda-client-env
- name: Cleanup toolcache
run: |
echo "Free space before deletion:"
df -h /
rm -rf /opt/hostedtoolcache
echo "Free space after deletion:"
df -h /
- name: Cleanup disk space
uses: ultralytics/actions/cleanup-disk@main
- name: Install Linux packages
run: |
# Fix cv2 ImportError: 'libEGL.so.1: cannot open shared object file: No such file or directory'
@ -348,14 +343,14 @@ jobs:
Summary:
runs-on: ubuntu-latest
needs: [HUB, Benchmarks, Tests, GPU, RaspberryPi, Conda] # Add job names that you want to check for failure
if: always() # This ensures the job runs even if previous jobs fail
needs: [HUB, Benchmarks, Tests, GPU, RaspberryPi, Conda]
if: always()
steps:
- name: Check for failure and notify
if: (needs.HUB.result == 'failure' || needs.Benchmarks.result == 'failure' || needs.Tests.result == 'failure' || needs.GPU.result == 'failure' || needs.RaspberryPi.result == 'failure' || needs.Conda.result == 'failure' ) && github.repository == 'ultralytics/ultralytics' && (github.event_name == 'schedule' || github.event_name == 'push') && github.run_attempt == '1'
uses: slackapi/slack-github-action@v1.27.0
uses: slackapi/slack-github-action@v2.0.0
with:
webhook-type: incoming-webhook
webhook: ${{ secrets.SLACK_WEBHOOK_URL_YOLO }}
payload: |
{"text": "<!channel> GitHub Actions error for ${{ github.workflow }} ❌\n\n\n*Repository:* https://github.com/${{ github.repository }}\n*Action:* https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\n*Author:* ${{ github.actor }}\n*Event:* ${{ github.event_name }}\n"}
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_YOLO }}
text: "<!channel> GitHub Actions error for ${{ github.workflow }} ❌\n\n\n*Repository:* https://github.com/${{ github.repository }}\n*Action:* https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\n*Author:* ${{ github.actor }}\n*Event:* ${{ github.event_name }}\n"

@ -134,12 +134,12 @@ jobs:
- name: Build Image
if: github.event_name == 'push' || github.event.inputs[matrix.dockerfile] == 'true'
uses: nick-invision/retry@v3
uses: ultralytics/actions/retry@main
with:
timeout_minutes: 120
retry_wait_seconds: 60
max_attempts: 3 # retry twice
command: |
retry_delay_seconds: 60
retries: 2
run: |
docker build \
--platform ${{ matrix.platforms }} \
-f docker/${{ matrix.dockerfile }} \
@ -172,7 +172,7 @@ jobs:
fi
if [[ "${{ matrix.tags }}" == "latest-python" ]]; then
t=ultralytics/ultralytics:latest-jupyter
v=ultralytics/ultralytics:${{ steps.get_version.outputs.version_tag }}-jupyter
v=ultralytics/ultralytics:${{ steps.get_version.outputs.version }}-jupyter
docker build -f docker/Dockerfile-jupyter -t $t -t $v .
docker push $t
if [[ "${{ steps.check_tag.outputs.new_release }}" == "true" ]]; then
@ -202,9 +202,9 @@ jobs:
steps:
- name: Check for failure and notify
if: needs.docker.result == 'failure' && github.repository == 'ultralytics/ultralytics' && github.event_name == 'push' && github.run_attempt == '1'
uses: slackapi/slack-github-action@v1.27.0
uses: slackapi/slack-github-action@v2.0.0
with:
webhook-type: incoming-webhook
webhook: ${{ secrets.SLACK_WEBHOOK_URL_YOLO }}
payload: |
{"text": "<!channel> GitHub Actions error for ${{ github.workflow }} ❌\n\n\n*Repository:* https://github.com/${{ github.repository }}\n*Action:* https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\n*Author:* ${{ github.actor }}\n*Event:* ${{ github.event_name }}\n"}
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_YOLO }}
text: "<!channel> GitHub Actions error for ${{ github.workflow }} ❌\n\n\n*Repository:* https://github.com/${{ github.repository }}\n*Action:* https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\n*Author:* ${{ github.actor }}\n*Event:* ${{ github.event_name }}\n"

@ -29,7 +29,7 @@ on:
jobs:
Docs:
if: github.repository == 'ultralytics/ultralytics'
runs-on: macos-14
runs-on: ubuntu-latest
steps:
- name: Git config
run: |
@ -46,9 +46,9 @@ jobs:
uses: actions/setup-python@v5
with:
python-version: "3.x"
cache: "pip" # caching pip dependencies
- uses: astral-sh/setup-uv@v3
- name: Install Dependencies
run: pip install ruff black tqdm minify-html mkdocs-material "mkdocstrings[python]" mkdocs-jupyter mkdocs-redirects mkdocs-ultralytics-plugin mkdocs-macros-plugin
run: uv pip install --system ruff black tqdm minify-html mkdocs-material "mkdocstrings[python]" mkdocs-jupyter mkdocs-redirects mkdocs-ultralytics-plugin mkdocs-macros-plugin
- name: Ruff fixes
continue-on-error: true
run: ruff check --fix --unsafe-fixes --select D --ignore=D100,D104,D203,D205,D212,D213,D401,D406,D407,D413 .

@ -15,7 +15,7 @@ on:
jobs:
format:
runs-on: macos-14
runs-on: ubuntu-latest
steps:
- name: Run Ultralytics Formatting
uses: ultralytics/actions@main

@ -29,12 +29,12 @@ jobs:
sudo mv lychee /usr/local/bin
- name: Test Markdown and HTML links with retry
uses: nick-invision/retry@v3
uses: ultralytics/actions/retry@main
with:
timeout_minutes: 5
retry_wait_seconds: 60
max_attempts: 3
command: |
timeout_minutes: 60
retry_delay_seconds: 900
retries: 2
run: |
lychee \
--scheme https \
--timeout 60 \
@ -59,12 +59,12 @@ jobs:
- name: Test Markdown, HTML, YAML, Python and Notebook links with retry
if: github.event_name == 'workflow_dispatch'
uses: nick-invision/retry@v3
uses: ultralytics/actions/retry@main
with:
timeout_minutes: 5
retry_wait_seconds: 60
max_attempts: 3
command: |
timeout_minutes: 60
retry_delay_seconds: 900
retries: 2
run: |
lychee \
--scheme https \
--timeout 60 \

@ -17,7 +17,7 @@ jobs:
if: github.repository == 'ultralytics/ultralytics' && github.actor == 'glenn-jocher'
name: Publish
runs-on: ubuntu-latest
environment: # for GitHub Deployments tab
environment: # for GitHub Deployments tab
name: Release - PyPI
url: https://pypi.org/p/ultralytics
permissions:
@ -90,19 +90,20 @@ jobs:
fi
echo "PR_NUMBER=$PR_NUMBER" >> $GITHUB_ENV
echo "PR_TITLE=$PR_TITLE" >> $GITHUB_ENV
- name: Notify on Slack (Success)
if: success() && github.event_name == 'push' && steps.check_pypi.outputs.increment == 'True'
uses: slackapi/slack-github-action@v1.27.0
uses: slackapi/slack-github-action@v2.0.0
with:
webhook-type: incoming-webhook
webhook: ${{ secrets.SLACK_WEBHOOK_URL_YOLO }}
payload: |
{"text": "<!channel> GitHub Actions success for ${{ github.workflow }} ✅\n\n\n*Repository:* https://github.com/${{ github.repository }}\n*Action:* https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\n*Author:* ${{ github.actor }}\n*Event:* NEW `${{ github.repository }} ${{ steps.check_pypi.outputs.current_tag }}` pip package published 😃\n*Job Status:* ${{ job.status }}\n*Pull Request:* <https://github.com/${{ github.repository }}/pull/${{ env.PR_NUMBER }}> ${{ env.PR_TITLE }}\n"}
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_YOLO }}
text: "<!channel> GitHub Actions success for ${{ github.workflow }} ✅\n\n\n*Repository:* https://github.com/${{ github.repository }}\n*Action:* https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\n*Author:* ${{ github.actor }}\n*Event:* NEW `${{ github.repository }} ${{ steps.check_pypi.outputs.current_tag }}` pip package published 😃\n*Job Status:* ${{ job.status }}\n*Pull Request:* <https://github.com/${{ github.repository }}/pull/${{ env.PR_NUMBER }}> ${{ env.PR_TITLE }}\n"
- name: Notify on Slack (Failure)
if: failure()
uses: slackapi/slack-github-action@v1.27.0
uses: slackapi/slack-github-action@v2.0.0
with:
webhook-type: incoming-webhook
webhook: ${{ secrets.SLACK_WEBHOOK_URL_YOLO }}
payload: |
{"text": "<!channel> GitHub Actions error for ${{ github.workflow }} ❌\n\n\n*Repository:* https://github.com/${{ github.repository }}\n*Action:* https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\n*Author:* ${{ github.actor }}\n*Event:* ${{ github.event_name }}\n*Job Status:* ${{ job.status }}\n*Pull Request:* <https://github.com/${{ github.repository }}/pull/${{ env.PR_NUMBER }}> ${{ env.PR_TITLE }}\n"}
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_YOLO }}
text: "<!channel> GitHub Actions error for ${{ github.workflow }} ❌\n\n\n*Repository:* https://github.com/${{ github.repository }}\n*Action:* https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\n*Author:* ${{ github.actor }}\n*Event:* ${{ github.event_name }}\n*Job Status:* ${{ job.status }}\n*Pull Request:* <https://github.com/${{ github.repository }}/pull/${{ env.PR_NUMBER }}> ${{ env.PR_TITLE }}\n"

1
.gitignore vendored

@ -163,6 +163,7 @@ weights/
*_openvino_model/
*_paddle_model/
*_ncnn_model/
*_imx_model/
pnnx*
# Autogenerated files for tests

@ -56,7 +56,6 @@ RUN pip install numpy==1.23.5
# Remove extra build files
RUN rm -rf tmp /root/.config/Ultralytics/persistent_cache.json
# Usage Examples -------------------------------------------------------------------------------------------------------
# Build and Push

@ -2,8 +2,8 @@
# Builds ultralytics/ultralytics:latest-cpu image on DockerHub https://hub.docker.com/r/ultralytics/ultralytics
# Image is CPU-optimized for ONNX, OpenVINO and PyTorch YOLO11 deployments
# Start FROM Ubuntu image https://hub.docker.com/_/ubuntu
FROM ubuntu:23.10
# Use official Python base image for reproducibility (3.11.10 for export and 3.12.6 for inference)
FROM python:3.11.10-slim-bookworm
# Set environment variables
ENV PYTHONUNBUFFERED=1 \
@ -39,14 +39,14 @@ RUN pip install -e ".[export]" --extra-index-url https://download.pytorch.org/wh
RUN yolo export model=tmp/yolo11n.pt format=edgetpu imgsz=32
RUN yolo export model=tmp/yolo11n.pt format=ncnn imgsz=32
# Requires Python<=3.10, bug with paddlepaddle==2.5.0 https://github.com/PaddlePaddle/X2Paddle/issues/991
# RUN pip install "paddlepaddle>=2.6.0" x2paddle
# Creates a symbolic link to make 'python' point to 'python3'
RUN ln -sf /usr/bin/python3 /usr/bin/python
RUN pip install "paddlepaddle>=2.6.0" x2paddle
# Remove extra build files
RUN rm -rf tmp /root/.config/Ultralytics/persistent_cache.json
# Set default command to bash
CMD ["/bin/bash"]
# Usage Examples -------------------------------------------------------------------------------------------------------
# Build and Push

@ -35,7 +35,6 @@ ENTRYPOINT sh -c './config.sh --url https://github.com/ultralytics/ultralytics \
--replace && \
./run.sh'
# Usage Examples -------------------------------------------------------------------------------------------------------
# Build and Push

@ -74,6 +74,7 @@ Pose estimation is a technique used to determine the pose of the object relative
- [COCO8-pose](pose/coco8-pose.md): A smaller dataset for pose estimation tasks, containing a subset of 8 COCO images with human pose annotations.
- [Tiger-pose](pose/tiger-pose.md): A compact dataset consisting of 263 images focused on tigers, annotated with 12 keypoints per tiger for pose estimation tasks.
- [Hand-Keypoints](pose/hand-keypoints.md): A concise dataset featuring over 26,000 images centered on human hands, annotated with 21 keypoints per hand, designed for pose estimation tasks.
- [Dog-pose](pose/dog-pose.md): A comprehensive dataset featuring approximately 6,000 images focused on dogs, annotated with 24 keypoints per dog, tailored for pose estimation tasks.
## [Classification](classify/index.md)

@ -0,0 +1,141 @@
---
comments: true
description: Discover the Dog-Pose dataset for pose detection. Featuring 6,773 training and 1,703 test images, it's a robust dataset for training YOLO11 models.
keywords: Dog-Pose, Ultralytics, pose detection dataset, YOLO11, machine learning, computer vision, training data
---
# Dog-Pose Dataset
## Introduction
The [Ultralytics](https://www.ultralytics.com/) Dog-pose dataset is a high-quality and extensive dataset specifically curated for dog keypoint estimation. With 6,773 training images and 1,703 test images, this dataset provides a solid foundation for training robust pose estimation models. Each annotated image includes 24 keypoints with 3 dimensions per keypoint (x, y, visibility), making it a valuable resource for advanced research and development in computer vision.
<img src="https://github.com/ultralytics/docs/releases/download/0/ultralytics-dogs.avif" alt="Ultralytics Dog-pose display image" width="800">
This dataset is intended for use with Ultralytics [HUB](https://hub.ultralytics.com/) and [YOLO11](https://github.com/ultralytics/ultralytics).
## Dataset YAML
A YAML (Yet Another Markup Language) file is used to define the dataset configuration. It includes paths, keypoint details, and other relevant information. In the case of the Dog-pose dataset, The `dog-pose.yaml` is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/dog-pose.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/dog-pose.yaml).
!!! example "ultralytics/cfg/datasets/dog-pose.yaml"
```yaml
--8<-- "ultralytics/cfg/datasets/dog-pose.yaml"
```
## Usage
To train a YOLO11n-pose model on the Dog-pose dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
!!! example "Train Example"
=== "Python"
```python
from ultralytics import YOLO
# Load a model
model = YOLO("yolo11n-pose.pt") # load a pretrained model (recommended for training)
# Train the model
results = model.train(data="dog-pose.yaml", epochs=100, imgsz=640)
```
=== "CLI"
```bash
# Start training from a pretrained *.pt model
yolo pose train data=dog-pose.yaml model=yolo11n-pose.pt epochs=100 imgsz=640
```
## Sample Images and Annotations
Here are some examples of images from the Dog-pose dataset, along with their corresponding annotations:
<img src="https://github.com/ultralytics/docs/releases/download/0/mosaiced-training-batch-2-dog-pose.avif" alt="Dataset sample image" width="800">
- **Mosaiced Image**: This image demonstrates a training batch composed of mosaiced dataset images. Mosaicing is a technique used during training that combines multiple images into a single image to increase the variety of objects and scenes within each training batch. This helps improve the model's ability to generalize to different object sizes, aspect ratios, and contexts.
The example showcases the variety and complexity of the images in the Dog-pose dataset and the benefits of using mosaicing during the training process.
## Citations and Acknowledgments
If you use the Dog-pose dataset in your research or development work, please cite the following paper:
!!! quote ""
=== "BibTeX"
```bibtex
@inproceedings{khosla2011fgvc,
title={Novel dataset for Fine-Grained Image Categorization},
author={Aditya Khosla and Nityananda Jayadevaprakash and Bangpeng Yao and Li Fei-Fei},
booktitle={First Workshop on Fine-Grained Visual Categorization (FGVC), IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
year={2011}
}
@inproceedings{deng2009imagenet,
title={ImageNet: A Large-Scale Hierarchical Image Database},
author={Jia Deng and Wei Dong and Richard Socher and Li-Jia Li and Kai Li and Li Fei-Fei},
booktitle={IEEE Computer Vision and Pattern Recognition (CVPR)},
year={2009}
}
```
We would like to acknowledge the Stanford team for creating and maintaining this valuable resource for the [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) community. For more information about the Dog-pose dataset and its creators, visit the [Stanford Dogs Dataset website](http://vision.stanford.edu/aditya86/ImageNetDogs/).
## FAQ
### What is the Dog-pose dataset, and how is it used with Ultralytics YOLO11?
The Dog-Pose dataset features 6,000 images annotated with 17 keypoints for dog pose estimation. Ideal for training and validating models with [Ultralytics YOLO11](https://docs.ultralytics.com/models/yolo11/), it supports applications like animal behavior analysis and veterinary studies.
### How do I train a YOLO11 model using the Dog-pose dataset in Ultralytics?
To train a YOLO11n-pose model on the Dog-pose dataset for 100 epochs with an image size of 640, follow these examples:
!!! example "Train Example"
=== "Python"
```python
from ultralytics import YOLO
# Load a model
model = YOLO("yolo11n-pose.pt")
# Train the model
results = model.train(data="dog-pose.yaml", epochs=100, imgsz=640)
```
=== "CLI"
```bash
yolo pose train data=dog-pose.yaml model=yolo11n-pose.pt epochs=100 imgsz=640
```
For a comprehensive list of training arguments, refer to the model [Training](../../modes/train.md) page.
### What are the benefits of using the Dog-pose dataset?
The Dog-pose dataset offers several benefits:
**Large and Diverse Dataset**: With 6,000 images, it provides a substantial amount of data covering a wide range of dog poses, breeds, and contexts, enabling robust model training and evaluation.
**Pose-specific Annotations**: Offers detailed annotations for pose estimation, ensuring high-quality data for training pose detection models.
**Real-World Scenarios**: Includes images from varied environments, enhancing the model's ability to generalize to real-world applications.
**Model Performance Improvement**: The diversity and scale of the dataset help improve model accuracy and robustness, particularly for tasks involving fine-grained pose estimation.
For more about its features and usage, see the [Dataset Introduction](#introduction) section.
### How does mosaicing benefit the YOLO11 training process using the Dog-pose dataset?
Mosaicing, as illustrated in the sample images from the Dog-pose dataset, merges multiple images into a single composite, enriching the diversity of objects and scenes in each training batch. This approach enhances the model's capacity to generalize across different object sizes, aspect ratios, and contexts, leading to improved performance. For example images, refer to the [Sample Images and Annotations](#sample-images-and-annotations) section.
### Where can I find the Dog-pose dataset YAML file and how do I use it?
The Dog-pose dataset YAML file can be found [here](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/dog-pose.yaml). This file defines the dataset configuration, including paths, classes, and other relevant information. Use this file with the YOLO11 training scripts as mentioned in the [Train Example](#how-do-i-train-a-yolo11-model-using-the-dog-pose-dataset-in-ultralytics) section.
For more FAQs and detailed documentation, visit the [Ultralytics Documentation](https://docs.ultralytics.com/).

@ -127,6 +127,15 @@ This section outlines the datasets that are compatible with Ultralytics YOLO for
- **Usage**: Great for human hand pose estimation.
- [Read more about Hand Keypoints](hand-keypoints.md)
### Dog-Pose
- **Description**: The Dog Pose dataset contains approximately 6,000 images, providing a diverse and extensive resource for training and validation of dog pose estimation models.
- **Label Format**: Follows the Ultralytics YOLO format, with annotations for multiple keypoints specific to dog anatomy.
- **Number of Classes**: 1 (Dog).
- **Keypoints**: Includes 24 keypoints tailored to dog poses, such as limbs, joints, and head positions.
- **Usage**: Ideal for training models to estimate dog poses in various scenarios, from research to real-world applications.
- [Read more about Dog-Pose](dog-pose.md)
### Adding your own dataset
If you have your own dataset and would like to use it for training pose estimation models with Ultralytics YOLO format, ensure that it follows the format specified above under "Ultralytics YOLO format". Convert your annotations to the required format and specify the paths, number of classes, and class names in the YAML configuration file.

@ -45,126 +45,15 @@ This guide provides a comprehensive overview of three fundamental types of [data
# generate the pie chart
yolo solutions analytics analytics_type="pie" show=True
```
=== "Python"
```python
import cv2
from ultralytics import solutions
cap = cv2.VideoCapture("Path/to/video/file.mp4")
assert cap.isOpened(), "Error reading video file"
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
out = cv2.VideoWriter(
"ultralytics_analytics.avi",
cv2.VideoWriter_fourcc(*"MJPG"),
fps,
(1920, 1080), # This is fixed
)
analytics = solutions.Analytics(
analytics_type="line",
show=True,
)
# generate the bar plots
yolo solutions analytics analytics_type="bar" show=True
frame_count = 0
while cap.isOpened():
success, im0 = cap.read()
if success:
frame_count += 1
im0 = analytics.process_data(im0, frame_count) # update analytics graph every frame
out.write(im0) # write the video file
else:
break
cap.release()
out.release()
cv2.destroyAllWindows()
# generate the area plots
yolo solutions analytics analytics_type="area" show=True
```
=== "Pie Chart"
```python
import cv2
from ultralytics import solutions
cap = cv2.VideoCapture("Path/to/video/file.mp4")
assert cap.isOpened(), "Error reading video file"
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
out = cv2.VideoWriter(
"ultralytics_analytics.avi",
cv2.VideoWriter_fourcc(*"MJPG"),
fps,
(1920, 1080), # This is fixed
)
analytics = solutions.Analytics(
analytics_type="pie",
show=True,
)
frame_count = 0
while cap.isOpened():
success, im0 = cap.read()
if success:
frame_count += 1
im0 = analytics.process_data(im0, frame_count) # update analytics graph every frame
out.write(im0) # write the video file
else:
break
cap.release()
out.release()
cv2.destroyAllWindows()
```
=== "Bar Plot"
```python
import cv2
from ultralytics import solutions
cap = cv2.VideoCapture("Path/to/video/file.mp4")
assert cap.isOpened(), "Error reading video file"
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
out = cv2.VideoWriter(
"ultralytics_analytics.avi",
cv2.VideoWriter_fourcc(*"MJPG"),
fps,
(1920, 1080), # This is fixed
)
analytics = solutions.Analytics(
analytics_type="bar",
show=True,
)
frame_count = 0
while cap.isOpened():
success, im0 = cap.read()
if success:
frame_count += 1
im0 = analytics.process_data(im0, frame_count) # update analytics graph every frame
out.write(im0) # write the video file
else:
break
cap.release()
out.release()
cv2.destroyAllWindows()
```
=== "Area chart"
=== "Python"
```python
import cv2
@ -173,9 +62,9 @@ This guide provides a comprehensive overview of three fundamental types of [data
cap = cv2.VideoCapture("Path/to/video/file.mp4")
assert cap.isOpened(), "Error reading video file"
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
# Video writer
out = cv2.VideoWriter(
"ultralytics_analytics.avi",
cv2.VideoWriter_fourcc(*"MJPG"),
@ -183,11 +72,15 @@ This guide provides a comprehensive overview of three fundamental types of [data
(1920, 1080), # This is fixed
)
# Init analytics
analytics = solutions.Analytics(
analytics_type="area",
show=True,
show=True, # Display the output
analytics_type="line", # Pass the analytics type, could be "pie", "bar" or "area".
model="yolo11n.pt", # Path to the YOLO11 model file
# classes=[0, 2], # If you want to count specific classes i.e person and car with COCO pretrained model.
)
# Process video
frame_count = 0
while cap.isOpened():
success, im0 = cap.read()

@ -55,6 +55,7 @@ Measuring the gap between two objects is known as distance calculation within a
# Init distance-calculation obj
distance = solutions.DistanceCalculation(model="yolo11n.pt", show=True)
# Process video
while cap.isOpened():
success, im0 = cap.read()
if not success:

@ -47,119 +47,12 @@ A heatmap generated with [Ultralytics YOLO11](https://github.com/ultralytics/ult
# Pass a custom colormap
yolo solutions heatmap colormap=cv2.COLORMAP_INFERNO
```
=== "Python"
```python
import cv2
from ultralytics import solutions
cap = cv2.VideoCapture("Path/to/video/file.mp4")
assert cap.isOpened(), "Error reading video file"
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
# Video writer
video_writer = cv2.VideoWriter("heatmap_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
# Init heatmap
heatmap = solutions.Heatmap(
show=True,
model="yolo11n.pt",
colormap=cv2.COLORMAP_PARULA,
)
while cap.isOpened():
success, im0 = cap.read()
if not success:
print("Video frame is empty or video processing has been successfully completed.")
break
im0 = heatmap.generate_heatmap(im0)
video_writer.write(im0)
cap.release()
video_writer.release()
cv2.destroyAllWindows()
```
=== "Line Counting"
```python
import cv2
from ultralytics import solutions
cap = cv2.VideoCapture("Path/to/video/file.mp4")
assert cap.isOpened(), "Error reading video file"
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
# Video writer
video_writer = cv2.VideoWriter("heatmap_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
# line for object counting
line_points = [(20, 400), (1080, 404)]
# Init heatmap
heatmap = solutions.Heatmap(
show=True,
model="yolo11n.pt",
colormap=cv2.COLORMAP_PARULA,
region=line_points,
)
while cap.isOpened():
success, im0 = cap.read()
if not success:
print("Video frame is empty or video processing has been successfully completed.")
break
im0 = heatmap.generate_heatmap(im0)
video_writer.write(im0)
cap.release()
video_writer.release()
cv2.destroyAllWindows()
# Heatmaps + object counting
yolo solutions heatmap region=[(20, 400), (1080, 404), (1080, 360), (20, 360)]
```
=== "Polygon Counting"
```python
import cv2
from ultralytics import solutions
cap = cv2.VideoCapture("Path/to/video/file.mp4")
assert cap.isOpened(), "Error reading video file"
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
# Video writer
video_writer = cv2.VideoWriter("heatmap_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
# Define polygon points
region_points = [(20, 400), (1080, 404), (1080, 360), (20, 360), (20, 400)]
# Init heatmap
heatmap = solutions.Heatmap(
show=True,
model="yolo11n.pt",
colormap=cv2.COLORMAP_PARULA,
region=region_points,
)
while cap.isOpened():
success, im0 = cap.read()
if not success:
print("Video frame is empty or video processing has been successfully completed.")
break
im0 = heatmap.generate_heatmap(im0)
video_writer.write(im0)
cap.release()
video_writer.release()
cv2.destroyAllWindows()
```
=== "Region Counting"
=== "Python"
```python
import cv2
@ -173,51 +66,24 @@ A heatmap generated with [Ultralytics YOLO11](https://github.com/ultralytics/ult
# Video writer
video_writer = cv2.VideoWriter("heatmap_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
# Define region points
region_points = [(20, 400), (1080, 404), (1080, 360), (20, 360)]
# Init heatmap
heatmap = solutions.Heatmap(
show=True,
model="yolo11n.pt",
colormap=cv2.COLORMAP_PARULA,
region=region_points,
)
while cap.isOpened():
success, im0 = cap.read()
if not success:
print("Video frame is empty or video processing has been successfully completed.")
break
im0 = heatmap.generate_heatmap(im0)
video_writer.write(im0)
cap.release()
video_writer.release()
cv2.destroyAllWindows()
```
=== "Specific Classes"
```python
import cv2
from ultralytics import solutions
cap = cv2.VideoCapture("Path/to/video/file.mp4")
assert cap.isOpened(), "Error reading video file"
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
# Video writer
video_writer = cv2.VideoWriter("heatmap_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
# In case you want to apply object counting + heatmaps, you can pass region points.
# region_points = [(20, 400), (1080, 404)] # Define line points
# region_points = [(20, 400), (1080, 404), (1080, 360), (20, 360)] # Define region points
# region_points = [(20, 400), (1080, 404), (1080, 360), (20, 360), (20, 400)] # Define polygon points
# Init heatmap
heatmap = solutions.Heatmap(
show=True,
model="yolo11n.pt",
classes=[0, 2],
show=True, # Display the output
model="yolo11n.pt", # Path to the YOLO11 model file
colormap=cv2.COLORMAP_PARULA, # Colormap of heatmap
# region=region_points, # If you want to do object counting with heatmaps, you can pass region_points
# classes=[0, 2], # If you want to generate heatmap for specific classes i.e person and car.
# show_in=True, # Display in counts
# show_out=True, # Display out counts
# line_width=2, # Adjust the line width for bounding boxes and text display
)
# Process video
while cap.isOpened():
success, im0 = cap.read()
if not success:

@ -19,7 +19,7 @@ Object counting with [Ultralytics YOLO11](https://github.com/ultralytics/ultraly
allowfullscreen>
</iframe>
<br>
<strong>Watch:</strong> Object Counting using Ultralytics YOLO11
<strong>Watch:</strong> Object Counting using Ultralytics YOLOv8
</td>
<td align="center">
<iframe loading="lazy" width="720" height="405" src="https://www.youtube.com/embed/Fj9TStNBVoY"
@ -73,165 +73,22 @@ Object counting with [Ultralytics YOLO11](https://github.com/ultralytics/ultraly
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
# Define region points
region_points = [(20, 400), (1080, 404), (1080, 360), (20, 360)]
# region_points = [(20, 400), (1080, 400)] # For line counting
region_points = [(20, 400), (1080, 404), (1080, 360), (20, 360)] # For rectangle region counting
# region_points = [(20, 400), (1080, 404), (1080, 360), (20, 360), (20, 400)] # For polygon region counting
# Video writer
video_writer = cv2.VideoWriter("object_counting_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
# Init Object Counter
counter = solutions.ObjectCounter(
show=True,
region=region_points,
model="yolo11n.pt",
)
# Process video
while cap.isOpened():
success, im0 = cap.read()
if not success:
print("Video frame is empty or video processing has been successfully completed.")
break
im0 = counter.count(im0)
video_writer.write(im0)
cap.release()
video_writer.release()
cv2.destroyAllWindows()
```
=== "OBB Object Counting"
```python
import cv2
from ultralytics import solutions
cap = cv2.VideoCapture("path/to/video/file.mp4")
assert cap.isOpened(), "Error reading video file"
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
# line or region points
line_points = [(20, 400), (1080, 400)]
# Video writer
video_writer = cv2.VideoWriter("object_counting_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
# Init Object Counter
counter = solutions.ObjectCounter(
show=True,
region=line_points,
model="yolo11n-obb.pt",
)
# Process video
while cap.isOpened():
success, im0 = cap.read()
if not success:
print("Video frame is empty or video processing has been successfully completed.")
break
im0 = counter.count(im0)
video_writer.write(im0)
cap.release()
video_writer.release()
cv2.destroyAllWindows()
```
=== "Count in Polygon"
```python
import cv2
from ultralytics import solutions
cap = cv2.VideoCapture("path/to/video/file.mp4")
assert cap.isOpened(), "Error reading video file"
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
# Define region points
region_points = [(20, 400), (1080, 404), (1080, 360), (20, 360), (20, 400)]
# Video writer
video_writer = cv2.VideoWriter("object_counting_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
# Init Object Counter
counter = solutions.ObjectCounter(
show=True,
region=region_points,
model="yolo11n.pt",
)
# Process video
while cap.isOpened():
success, im0 = cap.read()
if not success:
print("Video frame is empty or video processing has been successfully completed.")
break
im0 = counter.count(im0)
video_writer.write(im0)
cap.release()
video_writer.release()
cv2.destroyAllWindows()
```
=== "Count in Line"
```python
import cv2
from ultralytics import solutions
cap = cv2.VideoCapture("path/to/video/file.mp4")
assert cap.isOpened(), "Error reading video file"
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
# Define region points
line_points = [(20, 400), (1080, 400)]
# Video writer
video_writer = cv2.VideoWriter("object_counting_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
# Init Object Counter
counter = solutions.ObjectCounter(
show=True,
region=line_points,
model="yolo11n.pt",
)
# Process video
while cap.isOpened():
success, im0 = cap.read()
if not success:
print("Video frame is empty or video processing has been successfully completed.")
break
im0 = counter.count(im0)
video_writer.write(im0)
cap.release()
video_writer.release()
cv2.destroyAllWindows()
```
=== "Specific Classes"
```python
import cv2
from ultralytics import solutions
cap = cv2.VideoCapture("path/to/video/file.mp4")
assert cap.isOpened(), "Error reading video file"
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
# Video writer
video_writer = cv2.VideoWriter("object_counting_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
# Init Object Counter
counter = solutions.ObjectCounter(
show=True,
model="yolo11n.pt",
classes=[0, 1],
show=True, # Display the output
region=region_points, # Pass region points
model="yolo11n.pt", # model="yolo11n-obb.pt" for object counting using YOLO11 OBB model.
# classes=[0, 2], # If you want to count specific classes i.e person and car with COCO pretrained model.
# show_in=True, # Display in counts
# show_out=True, # Display out counts
# line_width=2, # Adjust the line width for bounding boxes and text display
)
# Process video

@ -60,53 +60,23 @@ Queue management using [Ultralytics YOLO11](https://github.com/ultralytics/ultra
assert cap.isOpened(), "Error reading video file"
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
# Video writer
video_writer = cv2.VideoWriter("queue_management.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
queue_region = [(20, 400), (1080, 404), (1080, 360), (20, 360)]
# Define queue region points
queue_region = [(20, 400), (1080, 404), (1080, 360), (20, 360)] # Define queue region points
# queue_region = [(20, 400), (1080, 404), (1080, 360), (20, 360), (20, 400)] # Define queue polygon points
# Init Queue Manager
queue = solutions.QueueManager(
model="yolo11n.pt",
region=queue_region,
)
while cap.isOpened():
success, im0 = cap.read()
if success:
out = queue.process_queue(im0)
video_writer.write(im0)
if cv2.waitKey(1) & 0xFF == ord("q"):
break
continue
print("Video frame is empty or video processing has been successfully completed.")
break
cap.release()
cv2.destroyAllWindows()
```
=== "Queue Manager Specific Classes"
```python
import cv2
from ultralytics import solutions
cap = cv2.VideoCapture("Path/to/video/file.mp4")
assert cap.isOpened(), "Error reading video file"
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
video_writer = cv2.VideoWriter("queue_management.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
queue_region = [(20, 400), (1080, 404), (1080, 360), (20, 360)]
queue = solutions.QueueManager(
model="yolo11n.pt",
classes=3,
show=True, # Display the output
model="yolo11n.pt", # Path to the YOLO11 model file
region=queue_region, # Pass queue region points
# classes=[0, 2], # If you want to count specific classes i.e person and car with COCO pretrained model.
# line_width=2, # Adjust the line width for bounding boxes and text display
)
# Process video
while cap.isOpened():
success, im0 = cap.read()

@ -34,56 +34,65 @@ keywords: object counting, regions, YOLOv8, computer vision, Ultralytics, effici
| ![People Counting in Different Region using Ultralytics YOLOv8](https://github.com/ultralytics/docs/releases/download/0/people-counting-different-region-ultralytics-yolov8.avif) | ![Crowd Counting in Different Region using Ultralytics YOLOv8](https://github.com/ultralytics/docs/releases/download/0/crowd-counting-different-region-ultralytics-yolov8.avif) |
| People Counting in Different Region using Ultralytics YOLOv8 | Crowd Counting in Different Region using Ultralytics YOLOv8 |
## Steps to Run
!!! example "Region Counting Example"
### Step 1: Install Required Libraries
=== "Python"
Begin by cloning the Ultralytics repository, installing dependencies, and navigating to the local directory using the provided commands in Step 2.
```python
import cv2
from ultralytics import solutions
```bash
# Clone Ultralytics repo
git clone https://github.com/ultralytics/ultralytics
cap = cv2.VideoCapture("Path/to/video/file.mp4")
assert cap.isOpened(), "Error reading video file"
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
# Navigate to the local directory
cd ultralytics/examples/YOLOv8-Region-Counter
```
# Define region points
# region_points = [(20, 400), (1080, 404), (1080, 360), (20, 360)] # Pass region as list
### Step 2: Run Region Counting Using Ultralytics YOLOv8
# pass region as dictionary
region_points = {
"region-01": [(50, 50), (250, 50), (250, 250), (50, 250)],
"region-02": [(640, 640), (780, 640), (780, 720), (640, 720)]
}
Execute the following basic commands for inference.
# Video writer
video_writer = cv2.VideoWriter("region_counting.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
???+ tip "Region is Movable"
# Init Object Counter
region = solutions.RegionCounter(
show=True,
region=region_points,
model="yolo11n.pt",
)
During video playback, you can interactively move the region within the video by clicking and dragging using the left mouse button.
# Process video
while cap.isOpened():
success, im0 = cap.read()
if not success:
print("Video frame is empty or video processing has been successfully completed.")
break
im0 = region.count(im0)
video_writer.write(im0)
```bash
# Save results
python yolov8_region_counter.py --source "path/to/video.mp4" --save-img
cap.release()
video_writer.release()
cv2.destroyAllWindows()
```
# Run model on CPU
python yolov8_region_counter.py --source "path/to/video.mp4" --device cpu
!!! tip "Ultralytics Example Code"
# Change model file
python yolov8_region_counter.py --source "path/to/video.mp4" --weights "path/to/model.pt"
The Ultralytics region counting module is available in our [examples section](https://github.com/ultralytics/ultralytics/blob/main/examples/YOLOv8-Region-Counter/yolov8_region_counter.py). You can explore this example for code customization and modify it to suit your specific use case.
# Detect specific classes (e.g., first and third classes)
python yolov8_region_counter.py --source "path/to/video.mp4" --classes 0 2
### Argument `RegionCounter`
# View results without saving
python yolov8_region_counter.py --source "path/to/video.mp4" --view-img
```
Here's a table with the `RegionCounter` arguments:
### Optional Arguments
| Name | Type | Default | Description |
| -------------------- | ------ | ------------ | --------------------------------------------------------------------------- |
| `--source` | `str` | `None` | Path to video file, for webcam 0 |
| `--line_thickness` | `int` | `2` | [Bounding Box](https://www.ultralytics.com/glossary/bounding-box) thickness |
| `--save-img` | `bool` | `False` | Save the predicted video/image |
| `--weights` | `str` | `yolov8n.pt` | Weights file path |
| `--classes` | `list` | `None` | Detect specific classes i.e. --classes 0 2 |
| `--region-thickness` | `int` | `2` | Region Box thickness |
| `--track-thickness` | `int` | `2` | Tracking line thickness |
| Name | Type | Default | Description |
| ------------ | ------ | -------------------------- | ---------------------------------------------------- |
| `model` | `str` | `None` | Path to Ultralytics YOLO Model File |
| `region` | `list` | `[(20, 400), (1260, 400)]` | List of points defining the counting region. |
| `line_width` | `int` | `2` | Line thickness for bounding boxes. |
| `show` | `bool` | `False` | Flag to control whether to display the video stream. |
## FAQ
@ -107,7 +116,7 @@ Follow these steps to run object counting in Ultralytics YOLOv8:
python yolov8_region_counter.py --source "path/to/video.mp4" --save-img
```
For more options, visit the [Run Region Counting](#steps-to-run) section.
For more options, visit the [Run Region Counting](https://github.com/ultralytics/ultralytics/blob/main/examples/YOLOv8-Region-Counter/readme.md) section.
### Why should I use Ultralytics YOLOv8 for object counting in regions?
@ -121,7 +130,7 @@ Explore deeper benefits in the [Advantages](#advantages-of-object-counting-in-re
### Can the defined regions be adjusted during video playback?
Yes, with Ultralytics YOLOv8, regions can be interactively moved during video playback. Simply click and drag with the left mouse button to reposition the region. This feature enhances flexibility for dynamic environments. Learn more in the tip section for [movable regions](#step-2-run-region-counting-using-ultralytics-yolov8).
Yes, with Ultralytics YOLOv8, regions can be interactively moved during video playback. Simply click and drag with the left mouse button to reposition the region. This feature enhances flexibility for dynamic environments. Learn more in the tip section for [movable regions](https://github.com/ultralytics/ultralytics/blob/33cdaa5782efb2bc2b5ede945771ba647882830d/examples/YOLOv8-Region-Counter/yolov8_region_counter.py#L39).
### What are some real-world applications of object counting in regions?

@ -61,16 +61,24 @@ keywords: Ultralytics YOLO11, speed estimation, object tracking, computer vision
from ultralytics import solutions
cap = cv2.VideoCapture("Path/to/video/file.mp4")
assert cap.isOpened(), "Error reading video file"
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
# Video writer
video_writer = cv2.VideoWriter("speed_management.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
# Define speed region points
speed_region = [(20, 400), (1080, 404), (1080, 360), (20, 360)]
speed = solutions.SpeedEstimator(model="yolo11n.pt", region=speed_region, show=True)
speed = solutions.SpeedEstimator(
show=True, # Display the output
model="yolo11n-pose.pt", # Path to the YOLO11 model file.
region=speed_region, # Pass region points
# classes=[0, 2], # If you want to estimate speed of specific classes.
# line_width=2, # Adjust the line width for bounding boxes and text display
)
# Process video
while cap.isOpened():
success, im0 = cap.read()

@ -40,6 +40,12 @@ Streamlit makes it simple to build and deploy interactive web applications. Comb
!!! example "Streamlit Application"
=== "CLI"
```bash
yolo streamlit-predict
```
=== "Python"
```python
@ -50,12 +56,6 @@ Streamlit makes it simple to build and deploy interactive web applications. Comb
### Make sure to run the file using command `streamlit run <file-name.py>`
```
=== "CLI"
```bash
yolo streamlit-predict
```
This will launch the Streamlit application in your default web browser. You will see the main title, subtitle, and the sidebar with configuration options. Select your desired YOLO11 model, set the confidence and NMS thresholds, and click the "Start" button to begin the real-time object detection.
You can optionally supply a specific model in Python:

@ -60,40 +60,18 @@ Monitoring workouts through pose estimation with [Ultralytics YOLO11](https://gi
assert cap.isOpened(), "Error reading video file"
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
gym = solutions.AIGym(
model="yolo11n-pose.pt",
show=True,
kpts=[6, 8, 10],
)
while cap.isOpened():
success, im0 = cap.read()
if not success:
print("Video frame is empty or video processing has been successfully completed.")
break
im0 = gym.monitor(im0)
cv2.destroyAllWindows()
```
=== "Workouts Monitoring with Save Output"
```python
import cv2
from ultralytics import solutions
cap = cv2.VideoCapture("path/to/video/file.mp4")
assert cap.isOpened(), "Error reading video file"
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
# Video writer
video_writer = cv2.VideoWriter("workouts.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
# Init AIGym
gym = solutions.AIGym(
show=True,
kpts=[6, 8, 10],
show=True, # Display the frame
kpts=[6, 8, 10], # keypoints index of person for monitoring specific exercise, by default it's for pushup
model="yolo11n-pose.pt", # Path to the YOLO11 pose estimation model file
# line_width=2, # Adjust the line width for bounding boxes and text display
)
# Process video
while cap.isOpened():
success, im0 = cap.read()
if not success:

@ -1,7 +1,7 @@
---
comments: true
description: Explore Ultralytics HUB for easy training, analysis, preview, deployment and sharing of custom vision AI models using YOLOv8. Start training today!.
keywords: Ultralytics HUB, YOLOv8, custom AI models, model training, model deployment, model analysis, vision AI
description: Explore Ultralytics HUB for easy training, analysis, preview, deployment and sharing of custom vision AI models using YOLO11. Start training today!.
keywords: Ultralytics HUB, YOLO11, custom AI models, model training, model deployment, model analysis, vision AI
---
# Ultralytics HUB Models
@ -66,7 +66,7 @@ In this step, you have to choose the project in which you want to create your mo
!!! info
You can read more about the available [YOLOv8](https://docs.ultralytics.com/models/yolov8/) (and [YOLOv5](https://docs.ultralytics.com/models/yolov5/)) architectures in our documentation.
You can read more about the available [YOLO models](https://docs.ultralytics.com/models) and architectures in our documentation.
By default, your model will use a pre-trained model (trained on the [COCO](https://docs.ultralytics.com/datasets/detect/coco/) dataset) to reduce training time. You can change this behavior and tweak your model's configuration by opening the **Advanced Model Configuration** accordion.

@ -158,3 +158,42 @@ If you are interested in learning more about Albumentations, check out the follo
In this guide, we explored the key aspects of Albumentations, a great Python library for image augmentation. We discussed its wide range of transformations, optimized performance, and how you can use it in your next YOLO11 project.
Also, if you'd like to know more about other Ultralytics YOLO11 integrations, visit our [integration guide page](../integrations/index.md). You'll find valuable resources and insights there.
## FAQ
### How can I integrate Albumentations with YOLO11 for improved data augmentation?
Albumentations integrates seamlessly with YOLO11 and applies automatically during training if you have the package installed. Here's how to get started:
```python
# Install required packages
# !pip install albumentations ultralytics
from ultralytics import YOLO
# Load and train model with automatic augmentations
model = YOLO("yolo11n.pt")
model.train(data="coco8.yaml", epochs=100)
```
The integration includes optimized augmentations like blur, median blur, grayscale conversion, and CLAHE with carefully tuned probabilities to enhance model performance.
### What are the key benefits of using Albumentations over other augmentation libraries?
Albumentations stands out for several reasons:
1. Performance: Built on OpenCV and NumPy with SIMD optimization for superior speed
2. Flexibility: Supports 70+ transformations across pixel-level, spatial-level, and mixing-level augmentations
3. Compatibility: Works seamlessly with popular frameworks like [PyTorch](../integrations/torchscript.md) and [TensorFlow](../integrations/tensorboard.md)
4. Reliability: Extensive test suite prevents silent data corruption
5. Ease of use: Single unified API for all augmentation types
### What types of computer vision tasks can benefit from Albumentations augmentation?
Albumentations enhances various [computer vision tasks](../tasks/index.md) including:
- [Object Detection](../tasks/detect.md): Improves model robustness to lighting, scale, and orientation variations
- [Instance Segmentation](../tasks/segment.md): Enhances mask prediction accuracy through diverse transformations
- [Classification](../tasks/classify.md): Increases model generalization with color and geometric augmentations
- [Pose Estimation](../tasks/pose.md): Helps models adapt to different viewpoints and lighting conditions
The library's diverse augmentation options make it valuable for any vision task requiring robust model performance.

@ -61,6 +61,8 @@ Welcome to the Ultralytics Integrations page! This page provides an overview of
- [Albumentations](albumentations.md): Enhance your Ultralytics models with powerful image augmentations to improve model robustness and generalization.
- [SONY IMX500](sony-imx500.md): Optimize and deploy [Ultralytics YOLOv8](https://docs.ultralytics.com/models/yolov8/) models on Raspberry Pi AI Cameras with the IMX500 sensor for fast, low-power performance.
## Deployment Integrations
- [CoreML](coreml.md): CoreML, developed by [Apple](https://www.apple.com/), is a framework designed for efficiently integrating machine learning models into applications across iOS, macOS, watchOS, and tvOS, using Apple's hardware for effective and secure [model deployment](https://www.ultralytics.com/glossary/model-deployment).

@ -0,0 +1,325 @@
---
comments: true
description: Learn to export Ultralytics YOLOv8 models to Sony's IMX500 format to optimize your models for efficient deployment.
keywords: Sony, IMX500, IMX 500, Atrios, MCT, model export, quantization, pruning, deep learning optimization, Raspberry Pi AI Camera, edge AI, PyTorch, IMX
---
# Sony IMX500 Export for Ultralytics YOLOv8
This guide covers exporting and deploying Ultralytics YOLOv8 models to Raspberry Pi AI Cameras that feature the Sony IMX500 sensor.
Deploying computer vision models on devices with limited computational power, such as [Raspberry Pi AI Camera](https://www.raspberrypi.com/products/ai-camera/), can be tricky. Using a model format optimized for faster performance makes a huge difference.
The IMX500 model format is designed to use minimal power while delivering fast performance for neural networks. It allows you to optimize your [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics) models for high-speed and low-power inferencing. In this guide, we'll walk you through exporting and deploying your models to the IMX500 format while making it easier for your models to perform well on the [Raspberry Pi AI Camera](https://www.raspberrypi.com/products/ai-camera/).
<p align="center">
<img width="100%" src="https://github.com/ultralytics/assets/releases/download/v8.3.0/ai-camera.avif" alt="Raspberry Pi AI Camera">
</p>
## Why Should You Export to IMX500
Sony's [IMX500 Intelligent Vision Sensor](https://developer.aitrios.sony-semicon.com/en/raspberrypi-ai-camera) is a game-changing piece of hardware in edge AI processing. It's the world's first intelligent vision sensor with on-chip AI capabilities. This sensor helps overcome many challenges in edge AI, including data processing bottlenecks, privacy concerns, and performance limitations.
While other sensors merely pass along images and frames, the IMX500 tells a whole story. It processes data directly on the sensor, allowing devices to generate insights in real-time.
## Sony's IMX500 Export for YOLOv8 Models
The IMX500 is designed to transform how devices handle data directly on the sensor, without needing to send it off to the cloud for processing.
The IMX500 works with quantized models. Quantization makes models smaller and faster without losing much [accuracy](https://www.ultralytics.com/glossary/accuracy). It is ideal for the limited resources of edge computing, allowing applications to respond quickly by reducing latency and allowing for quick data processing locally, without cloud dependency. Local processing also keeps user data private and secure since it's not sent to a remote server.
**IMX500 Key Features:**
- **Metadata Output:** Instead of transmitting images only, the IMX500 can output both image and metadata (inference result), and can output metadata only for minimizing data size, reducing bandwidth, and lowering costs.
- **Addresses Privacy Concerns:** By processing data on the device, the IMX500 addresses privacy concerns, ideal for human-centric applications like person counting and occupancy tracking.
- **Real-time Processing:** Fast, on-sensor processing supports real-time decisions, perfect for edge AI applications such as autonomous systems.
**Before You Begin:** For best results, ensure your YOLOv8 model is well-prepared for export by following our [Model Training Guide](https://docs.ultralytics.com/modes/train/), [Data Preparation Guide](https://docs.ultralytics.com/datasets/), and [Hyperparameter Tuning Guide](https://docs.ultralytics.com/guides/hyperparameter-tuning/).
## Usage Examples
Export an Ultralytics YOLOv8 model to IMX500 format and run inference with the exported model.
!!! note
Here we perform inference just to make sure the model works as expected. However, for deployment and inference on the Raspberry Pi AI Camera, please jump to [Using IMX500 Export in Deployment](#using-imx500-export-in-deployment) section.
!!! example
=== "Python"
```python
from ultralytics import YOLO
# Load a YOLOv8n PyTorch model
model = YOLO("yolov8n.pt")
# Export the model
model.export(format="imx") # exports with PTQ quantization by default
# Load the exported model
imx_model = YOLO("yolov8n_imx_model")
# Run inference
results = imx_model("https://ultralytics.com/images/bus.jpg")
```
=== "CLI"
```bash
# Export a YOLOv8n PyTorch model to imx format with Post-Training Quantization (PTQ)
yolo export model=yolov8n.pt format=imx
# Run inference with the exported model
yolo predict model=yolov8n_imx_model source='https://ultralytics.com/images/bus.jpg'
```
The export process will create an ONNX model for quantization validation, along with a directory named `<model-name>_imx_model`. This directory will include the `packerOut.zip` file, which is essential for packaging the model for deployment on the IMX500 hardware. Additionally, the `<model-name>_imx_model` folder will contain a text file (`labels.txt`) listing all the labels associated with the model.
```bash
yolov8n_imx_model
├── dnnParams.xml
├── labels.txt
├── packerOut.zip
├── yolov8n_imx.onnx
├── yolov8n_imx500_model_MemoryReport.json
└── yolov8n_imx500_model.pbtxt
```
## Arguments
When exporting a model to IMX500 format, you can specify various arguments:
| Key | Value | Description |
| -------- | ------ | -------------------------------------------------------- |
| `format` | `imx` | Format to export to (imx) |
| `int8` | `True` | Enable INT8 quantization for the model (default: `True`) |
| `imgsz` | `640` | Image size for the model input (default: `640`) |
## Using IMX500 Export in Deployment
After exporting Ultralytics YOLOv8n model to IMX500 format, it can be deployed to Raspberry Pi AI Camera for inference.
### Hardware Prerequisites
Make sure you have the below hardware:
1. Raspberry Pi 5 or Raspberry Pi 4 Model B
2. Raspberry Pi AI Camera
Connect the Raspberry Pi AI camera to the 15-pin MIPI CSI connector on the Raspberry Pi and power on the Raspberry Pi
### Software Prerequisites
!!! note
This guide has been tested with Raspberry Pi OS Bookworm running on a Raspberry Pi 5
Step 1: Open a terminal window and execute the following commands to update the Raspberry Pi software to the latest version.
```bash
sudo apt update && sudo apt full-upgrade
```
Step 2: Install IMX500 firmware which is required to operate the IMX500 sensor along with a packager tool.
```bash
sudo apt install imx500-all imx500-tools
```
Step 3: Install prerequisites to run `picamera2` application. We will use this application later for the deployment process.
```bash
sudo apt install python3-opencv python3-munkres
```
Step 4: Reboot Raspberry Pi for the changes to take into effect
```bash
sudo reboot
```
### Package Model and Deploy to AI Camera
After obtaining `packerOut.zip` from the IMX500 conversion process, you can pass this file into the packager tool to obtain an RPK file. This file can then be deployed directly to the AI Camera using `picamera2`.
Step 1: Package the model into RPK file
```bash
imx500-package -i <path to packerOut.zip> -o <output folder>
```
The above will generate a `network.rpk` file inside the specified output folder.
Step 2: Clone `picamera2` repository, install it and navigate to the imx500 examples
```bash
git clone -b next https://github.com/raspberrypi/picamera2
cd picamera2
pip install -e . --break-system-packages
cd examples/imx500
```
Step 3: Run YOLOv8 object detection, using the labels.txt file that has been generated during the IMX500 export.
```bash
python imx500_object_detection_demo.py --model <path to network.rpk> --fps 25 --bbox-normalization --ignore-dash-labels --bbox-order xy –labels <path to labels.txt>
```
Then you will be able to see live inference output as follows
<p align="center">
<img width="100%" src="https://github.com/ultralytics/assets/releases/download/v8.3.0/imx500-inference-rpi.avif" alt="Inference on Raspberry Pi AI Camera">
</p>
## Benchmarks
YOLOv8 benchmarks below were run by the Ultralytics team on Raspberry Pi AI Camera with `imx` model format measuring speed and accuracy.
| Model | Format | Status | Size (MB) | mAP50-95(B) | Inference time (ms/im) |
| ------- | ------ | ------ | --------- | ----------- | ---------------------- |
| YOLOv8n | imx | ✅ | 2.9 | 0.522 | 66.66 |
!!! note
Validation for the above benchmark was done using coco8 dataset
## What's Under the Hood?
<p align="center">
<img width="640" src="https://github.com/ultralytics/assets/releases/download/v8.3.0/imx500-deploy.avif" alt="IMX500 deployment">
</p>
### Sony Model Compression Toolkit (MCT)
[Sony's Model Compression Toolkit (MCT)](https://github.com/sony/model_optimization) is a powerful tool for optimizing deep learning models through quantization and pruning. It supports various quantization methods and provides advanced algorithms to reduce model size and computational complexity without significantly sacrificing accuracy. MCT is particularly useful for deploying models on resource-constrained devices, ensuring efficient inference and reduced latency.
### Supported Features of MCT
Sony's MCT offers a range of features designed to optimize neural network models:
1. **Graph Optimizations**: Transforms models into more efficient versions by folding layers like batch normalization into preceding layers.
2. **Quantization Parameter Search**: Minimizes quantization noise using metrics like Mean-Square-Error, No-Clipping, and Mean-Average-Error.
3. **Advanced Quantization Algorithms**:
- **Shift Negative Correction**: Addresses performance issues from symmetric activation quantization.
- **Outliers Filtering**: Uses z-score to detect and remove outliers.
- **Clustering**: Utilizes non-uniform quantization grids for better distribution matching.
- **Mixed-Precision Search**: Assigns different quantization bit-widths per layer based on sensitivity.
4. **Visualization**: Use TensorBoard to observe model performance insights, quantization phases, and bit-width configurations.
#### Quantization
MCT supports several quantization methods to reduce model size and improve inference speed:
1. **Post-Training Quantization (PTQ)**:
- Available via Keras and PyTorch APIs.
- Complexity: Low
- Computational Cost: Low (CPU minutes)
2. **Gradient-based Post-Training Quantization (GPTQ)**:
- Available via Keras and PyTorch APIs.
- Complexity: Medium
- Computational Cost: Moderate (2-3 GPU hours)
3. **Quantization-Aware Training (QAT)**:
- Complexity: High
- Computational Cost: High (12-36 GPU hours)
MCT also supports various quantization schemes for weights and activations:
1. Power-of-Two (hardware-friendly)
2. Symmetric
3. Uniform
#### Structured Pruning
MCT introduces structured, hardware-aware model pruning designed for specific hardware architectures. This technique leverages the target platform's Single Instruction, Multiple Data (SIMD) capabilities by pruning SIMD groups. This reduces model size and complexity while optimizing channel utilization, aligned with the SIMD architecture for targeted resource utilization of weights memory footprint. Available via Keras and PyTorch APIs.
### IMX500 Converter Tool (Compiler)
The IMX500 Converter Tool is integral to the IMX500 toolset, allowing the compilation of models for deployment on Sony's IMX500 sensor (for instance, Raspberry Pi AI Cameras). This tool facilitates the transition of Ultralytics YOLOv8 models processed through Ultralytics software, ensuring they are compatible and perform efficiently on the specified hardware. The export procedure following model quantization involves the generation of binary files that encapsulate essential data and device-specific configurations, streamlining the deployment process on the Raspberry Pi AI Camera.
## Real-World Use Cases
Export to IMX500 format has wide applicability across industries. Here are some examples:
- **Edge AI and IoT**: Enable object detection on drones or security cameras, where real-time processing on low-power devices is essential.
- **Wearable Devices**: Deploy models optimized for small-scale AI processing on health-monitoring wearables.
- **Smart Cities**: Use IMX500-exported YOLOv8 models for traffic monitoring and safety analysis with faster processing and minimal latency.
- **Retail Analytics**: Enhance in-store monitoring by deploying optimized models in point-of-sale systems or smart shelves.
## Conclusion
Exporting Ultralytics YOLOv8 models to Sony's IMX500 format allows you to deploy your models for efficient inference on IMX500-based cameras. By leveraging advanced quantization techniques, you can reduce model size and improve inference speed without significantly compromising accuracy.
For more information and detailed guidelines, refer to Sony's [IMX500 website](https://developer.aitrios.sony-semicon.com/en/raspberrypi-ai-camera).
## FAQ
### How do I export a YOLOv8 model to IMX500 format for Raspberry Pi AI Camera?
To export a YOLOv8 model to IMX500 format, use either the Python API or CLI command:
```python
from ultralytics import YOLO
model = YOLO("yolov8n.pt")
model.export(format="imx") # Exports with PTQ quantization by default
```
The export process will create a directory containing the necessary files for deployment, including `packerOut.zip` which can be used with the IMX500 packager tool on Raspberry Pi.
### What are the key benefits of using the IMX500 format for edge AI deployment?
The IMX500 format offers several important advantages for edge deployment:
- On-chip AI processing reduces latency and power consumption
- Outputs both image and metadata (inference result) instead of images only
- Enhanced privacy by processing data locally without cloud dependency
- Real-time processing capabilities ideal for time-sensitive applications
- Optimized quantization for efficient model deployment on resource-constrained devices
### What hardware and software prerequisites are needed for IMX500 deployment?
For deploying IMX500 models, you'll need:
Hardware:
- Raspberry Pi 5 or Raspberry Pi 4 Model B
- Raspberry Pi AI Camera with IMX500 sensor
Software:
- Raspberry Pi OS Bookworm
- IMX500 firmware and tools (`sudo apt install imx500-all imx500-tools`)
- Python packages for `picamera2` (`sudo apt install python3-opencv python3-munkres`)
### What performance can I expect from YOLOv8 models on the IMX500?
Based on Ultralytics benchmarks on Raspberry Pi AI Camera:
- YOLOv8n achieves 66.66ms inference time per image
- mAP50-95 of 0.522 on COCO8 dataset
- Model size of only 2.9MB after quantization
This demonstrates that IMX500 format provides efficient real-time inference while maintaining good accuracy for edge AI applications.
### How do I package and deploy my exported model to the Raspberry Pi AI Camera?
After exporting to IMX500 format:
1. Use the packager tool to create an RPK file:
```bash
imx500-package -i <path to packerOut.zip> -o <output folder>
```
2. Clone and install picamera2:
```bash
git clone -b next https://github.com/raspberrypi/picamera2
cd picamera2 && pip install -e . --break-system-packages
```
3. Run inference using the generated RPK file:
```bash
python imx500_object_detection_demo.py --model <path to network.rpk> --fps 25 --bbox-normalization --labels <path to labels.txt>
```

@ -14,3 +14,4 @@
| [PaddlePaddle](../integrations/paddlepaddle.md) | `paddle` | `{{ model_name or "yolo11n" }}_paddle_model/` | ✅ | `imgsz`, `batch` |
| [MNN](../integrations/mnn.md) | `mnn` | `{{ model_name or "yolo11n" }}.mnn` | ✅ | `imgsz`, `batch`, `int8`, `half` |
| [NCNN](../integrations/ncnn.md) | `ncnn` | `{{ model_name or "yolo11n" }}_ncnn_model/` | ✅ | `imgsz`, `half`, `batch` |
| [IMX500](../integrations/sony-imx500.md) | `imx` | `{{ model_name or "yolo11n" }}_imx_model/` | ✅ | `imgsz`, `int8` |

@ -13,7 +13,7 @@
| `augment` | `bool` | `False` | Enables test-time augmentation (TTA) for predictions, potentially improving detection robustness at the cost of inference speed. |
| `agnostic_nms` | `bool` | `False` | Enables class-agnostic Non-Maximum Suppression (NMS), which merges overlapping boxes of different classes. Useful in multi-class detection scenarios where class overlap is common. |
| `classes` | `list[int]` | `None` | Filters predictions to a set of class IDs. Only detections belonging to the specified classes will be returned. Useful for focusing on relevant objects in multi-class detection tasks. |
| `retina_masks` | `bool` | `False` | Uses high-resolution segmentation masks if available in the model. This can enhance mask quality for segmentation tasks, providing finer detail. |
| `retina_masks` | `bool` | `False` | Returns high-resolution segmentation masks. The returned masks (`masks.data`) will match the original image size if enabled. If disabled, they have the image size used during inference. |
| `embed` | `list[int]` | `None` | Specifies the layers from which to extract feature vectors or [embeddings](https://www.ultralytics.com/glossary/embeddings). Useful for downstream tasks like clustering or similarity search. |
| `project` | `str` | `None` | Name of the project directory where prediction outputs are saved if `save` is enabled. |
| `name` | `str` | `None` | Name of the prediction run. Used for creating a subdirectory within the project folder, where prediction outputs are stored if `save` is enabled. |

@ -17,7 +17,6 @@
| `exist_ok` | `False` | If True, allows overwriting of an existing project/name directory. Useful for iterative experimentation without needing to manually clear previous outputs. |
| `pretrained` | `True` | Determines whether to start training from a pretrained model. Can be a boolean value or a string path to a specific model from which to load weights. Enhances training efficiency and model performance. |
| `optimizer` | `'auto'` | Choice of optimizer for training. Options include `SGD`, `Adam`, `AdamW`, `NAdam`, `RAdam`, `RMSProp` etc., or `auto` for automatic selection based on model configuration. Affects convergence speed and stability. |
| `verbose` | `False` | Enables verbose output during training, providing detailed logs and progress updates. Useful for debugging and closely monitoring the training process. |
| `seed` | `0` | Sets the random seed for training, ensuring reproducibility of results across runs with the same configurations. |
| `deterministic` | `True` | Forces deterministic algorithm use, ensuring reproducibility but may affect performance and speed due to the restriction on non-deterministic algorithms. |
| `single_cls` | `False` | Treats all classes in multi-class datasets as a single class during training. Useful for binary classification tasks or when focusing on object presence rather than classification. |

@ -12,7 +12,7 @@
| `device` | `str` | `None` | Specifies the device for validation (`cpu`, `cuda:0`, etc.). Allows flexibility in utilizing CPU or GPU resources. |
| `dnn` | `bool` | `False` | If `True`, uses the [OpenCV](https://www.ultralytics.com/glossary/opencv) DNN module for ONNX model inference, offering an alternative to [PyTorch](https://www.ultralytics.com/glossary/pytorch) inference methods. |
| `plots` | `bool` | `False` | When set to `True`, generates and saves plots of predictions versus ground truth for visual evaluation of the model's performance. |
| `rect` | `bool` | `False` | If `True`, uses rectangular inference for batching, reducing padding and potentially increasing speed and efficiency. |
| `rect` | `bool` | `True` | If `True`, uses rectangular inference for batching, reducing padding and potentially increasing speed and efficiency. |
| `split` | `str` | `val` | Determines the dataset split to use for validation (`val`, `test`, or `train`). Allows flexibility in choosing the data segment for performance evaluation. |
| `project` | `str` | `None` | Name of the project directory where validation outputs are saved. |
| `name` | `str` | `None` | Name of the validation run. Used for creating a subdirectory within the project folder, where valdiation logs and outputs are stored. |

@ -18,15 +18,18 @@ keywords: model benchmarking, YOLO11, Ultralytics, performance evaluation, expor
<div style="display: flex; align-items: flex-start;">
<div style="margin-right: 20px;">
<label><input type="checkbox" name="algorithm" value="YOLO11" checked><span>Ultralytics YOLO11</span></label><br>
<label><input type="checkbox" name="algorithm" value="YOLOv6" checked><span>YOLOv6</span></label><br>
<label><input type="checkbox" name="algorithm" value="YOLOv7" checked><span>YOLOv7</span></label><br>
<label><input type="checkbox" name="algorithm" value="YOLOv10" checked><span>YOLOv10</span></label><br>
<label><input type="checkbox" name="algorithm" value="YOLO11" checked><span>YOLO11</span></label><br>
<label><input type="checkbox" name="algorithm" value="YOLOv10" checked><span>YOLOv10</span></label><br>
<label><input type="checkbox" name="algorithm" value="YOLOv9" checked><span>YOLOv9</span></label><br>
<label><input type="checkbox" name="algorithm" value="YOLOv8" checked><span>Ultralytics YOLOv8</span></label><br>
<label><input type="checkbox" name="algorithm" value="PPYOLOE" checked><span>PPYOLOE</span></label><br>
<label><input type="checkbox" name="algorithm" value="YOLOv5" checked><span>Ultralytics YOLOv5</span></label>
</div>
<label><input type="checkbox" name="algorithm" value="YOLOv8" checked><span>YOLOv8</span></label><br>
<label><input type="checkbox" name="algorithm" value="YOLOv7" checked><span>YOLOv7</span></label><br>
<label><input type="checkbox" name="algorithm" value="YOLOv6-3.0" checked><span>YOLOv6-3.0</span></label><br>
<label><input type="checkbox" name="algorithm" value="YOLOv5" checked><span>YOLOv5</span></label><br>
<label><input type="checkbox" name="algorithm" value="PP-YOLOE+" checked><span>PP-YOLOE+</span></label><br>
<label><input type="checkbox" name="algorithm" value="DAMO-YOLO" checked><span>DAMO-YOLO</span></label><br>
<label><input type="checkbox" name="algorithm" value="YOLOX" checked><span>YOLOX</span></label><br>
<label><input type="checkbox" name="algorithm" value="RTDETRv2" checked><span>RTDETRv2</span></label>
</div>
<div style="flex-grow: 1;"><canvas id="chart"></canvas></div> <!-- Canva for plotting benchmarks -->
</div>
@ -102,7 +105,7 @@ Arguments such as `model`, `data`, `imgsz`, `half`, `device`, and `verbose` prov
| `imgsz` | `640` | The input image size for the model. Can be a single integer for square images or a tuple `(width, height)` for non-square, e.g., `(640, 480)`. |
| `half` | `False` | Enables FP16 (half-precision) inference, reducing memory usage and possibly increasing speed on compatible hardware. Use `half=True` to enable. |
| `int8` | `False` | Activates INT8 quantization for further optimized performance on supported devices, especially useful for edge devices. Set `int8=True` to use. |
| `device` | `None` | Defines the computation device(s) for benchmarking, such as `"cpu"`, `"cuda:0"`, or a list of devices like `"cuda:0,1"` for multi-GPU setups. |
| `device` | `None` | Defines the computation device(s) for benchmarking, such as `"cpu"` or `"cuda:0"`. |
| `verbose` | `False` | Controls the level of detail in logging output. A boolean value; set `verbose=True` for detailed logs or a float for thresholding errors. |
## Export Formats

@ -0,0 +1,16 @@
---
description: Explore the Ultralytics Object Counter for real-time video streams. Learn about initializing parameters, tracking objects, and more.
keywords: Ultralytics, Object Counter, Real-time Tracking, Video Stream, Python, Object Detection
---
# Reference for `ultralytics/solutions/region_counter.py`
!!! note
This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/solutions/region_counter.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/solutions/region_counter.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/solutions/region_counter.py) 🛠. Thank you 🙏!
<br>
## ::: ultralytics.solutions.region_counter.RegionCounter
<br><br>

@ -19,6 +19,10 @@ keywords: Ultralytics, torch utils, model optimization, device selection, infere
<br><br><hr><br>
## ::: ultralytics.utils.torch_utils.FXModel
<br><br><hr><br>
## ::: ultralytics.utils.torch_utils.torch_distributed_zero_first
<br><br><hr><br>

@ -10,6 +10,9 @@
130829914+IvorZhu331@users.noreply.github.com:
avatar: https://avatars.githubusercontent.com/u/130829914?v=4
username: IvorZhu331
131249114+ServiAmirPM@users.noreply.github.com:
avatar: https://avatars.githubusercontent.com/u/131249114?v=4
username: ServiAmirPM
131261051+MatthewNoyce@users.noreply.github.com:
avatar: https://avatars.githubusercontent.com/u/131261051?v=4
username: MatthewNoyce
@ -109,6 +112,9 @@ chr043416@gmail.com:
davis.justin@mssm.org:
avatar: https://avatars.githubusercontent.com/u/23462437?v=4
username: justincdavis
francesco.mttl@gmail.com:
avatar: https://avatars.githubusercontent.com/u/3855193?v=4
username: ambitious-octopus
glenn.jocher@ultralytics.com:
avatar: https://avatars.githubusercontent.com/u/26833433?v=4
username: glenn-jocher

@ -1,4 +1,4 @@
// Apply theme based on user preference
// Light/Dark Mode -----------------------------------------------------------------------------------------------------
const applyTheme = (isDark) => {
document.body.setAttribute(
"data-md-color-scheme",
@ -12,24 +12,7 @@ const applyTheme = (isDark) => {
// Check and apply auto theme
const checkAutoTheme = () => {
const supportedLangCodes = [
"en",
"zh",
"ko",
"ja",
"ru",
"de",
"fr",
"es",
"pt",
"it",
"tr",
"vi",
"ar",
];
const langCode = window.location.pathname.split("/")[1];
const localStorageKey = `${supportedLangCodes.includes(langCode) ? `/${langCode}` : ""}/.__palette`;
const palette = JSON.parse(localStorage.getItem(localStorageKey) || "{}");
const palette = JSON.parse(localStorage.getItem(".__palette") || "{}");
if (palette.index === 0) {
applyTheme(window.matchMedia("(prefers-color-scheme: dark)").matches);
@ -47,45 +30,51 @@ checkAutoTheme();
document.addEventListener("DOMContentLoaded", () => {
const autoThemeInput = document.getElementById("__palette_1");
autoThemeInput?.addEventListener("click", () => {
if (autoThemeInput.checked) {
setTimeout(checkAutoTheme);
}
if (autoThemeInput.checked) setTimeout(checkAutoTheme);
});
});
// Iframe navigation
window.onhashchange = () => {
window.parent.postMessage(
{
type: "navigation",
hash:
window.location.pathname +
window.location.search +
window.location.hash,
},
"*",
);
};
// Add Inkeep button
// Inkeep --------------------------------------------------------------------------------------------------------------
document.addEventListener("DOMContentLoaded", () => {
const enableSearchBar = true;
const inkeepScript = document.createElement("script");
inkeepScript.src = "https://unpkg.com/@inkeep/uikit-js@0.3.11/dist/embed.js";
inkeepScript.src = "https://unpkg.com/@inkeep/uikit-js@0.3.18/dist/embed.js";
inkeepScript.type = "module";
inkeepScript.defer = true;
document.head.appendChild(inkeepScript);
// Configure and initialize the widget
const addInkeepWidget = () => {
if (enableSearchBar) {
const containerDiv = document.createElement("div");
containerDiv.style.transform = "scale(0.7)";
containerDiv.style.transformOrigin = "left center";
const inkeepDiv = document.createElement("div");
inkeepDiv.id = "inkeepSearchBar";
containerDiv.appendChild(inkeepDiv);
const headerElement = document.querySelector(".md-header__inner");
const searchContainer = headerElement.querySelector(".md-header__source");
if (headerElement && searchContainer) {
headerElement.insertBefore(containerDiv, searchContainer);
}
}
// configure and initialize the widget
const addInkeepWidget = (componentType, targetElementId) => {
const inkeepWidget = Inkeep().embed({
componentType: "ChatButton",
componentType,
...(componentType !== "ChatButton"
? { targetElement: targetElementId }
: {}),
colorModeSync: {
observedElement: document.documentElement,
isDarkModeCallback: (el) => {
const currentTheme = el.getAttribute("data-color-mode");
return currentTheme === "dark";
},
colorModeAttribute: "data-color-mode",
colorModeAttribute: "data-color-mode-scheme",
},
properties: {
chatButtonType: "PILL",
@ -101,13 +90,12 @@ document.addEventListener("DOMContentLoaded", () => {
theme: {
stylesheetUrls: ["/stylesheets/style.css"],
},
// ...optional settings
},
modalSettings: {
// optional settings
},
searchSettings: {
// optional settings
placeholder: "Search",
},
aiChatSettings: {
chatSubjectName: "Ultralytics",
@ -146,101 +134,206 @@ document.addEventListener("DOMContentLoaded", () => {
});
};
inkeepScript.addEventListener("load", () => {
addInkeepWidget(); // initialize the widget
const widgetContainer = document.getElementById("inkeepSearchBar");
addInkeepWidget("ChatButton");
widgetContainer && addInkeepWidget("SearchBar", "#inkeepSearchBar");
});
});
// This object contains the benchmark data for various object detection models.
// YOLO models chart ---------------------------------------------------------------------------------------------------
const data = {
'YOLOv5': {s: {speed: 1.92, mAP: 37.4}, m: {speed: 4.03, mAP: 45.4}, l: {speed: 6.61, mAP: 49.0}, x: {speed: 11.89, mAP: 50.7}},
'YOLOv6': {n: {speed: 1.17, mAP: 37.5}, s: {speed: 2.66, mAP: 45.0}, m: {speed: 5.28, mAP: 50.0}, l: {speed: 8.95, mAP: 52.8}},
'YOLOv7': {l: {speed: 6.84, mAP: 51.4}, x: {speed: 11.57, mAP: 53.1}},
'YOLOv8': {n: {speed: 1.47, mAP: 37.3}, s: {speed: 2.66, mAP: 44.9}, m: {speed: 5.86, mAP: 50.2}, l: {speed: 9.06, mAP: 52.9}, x: {speed: 14.37, mAP: 53.9}},
'YOLOv9': {t: {speed: 2.30, mAP: 37.8}, s: {speed: 3.54, mAP: 46.5}, m: {speed: 6.43, mAP: 51.5}, c: {speed: 7.16, mAP: 52.8}, e: {speed: 16.77, mAP: 55.1}},
'YOLOv10': {n: {speed: 1.56, mAP: 39.5}, s: {speed: 2.66, mAP: 46.7}, m: {speed: 5.48, mAP: 51.3}, b: {speed: 6.54, mAP: 52.7}, l: {speed: 8.33, mAP: 53.3}, x: {speed: 12.2, mAP: 54.4}},
'PPYOLOE': {t: {speed: 2.84, mAP: 39.9}, s: {speed: 2.62, mAP: 43.7}, m: {speed: 5.56, mAP: 49.8}, l: {speed: 8.36, mAP: 52.9}, x: {speed: 14.3, mAP: 54.7}},
'YOLO11': {n: {speed: 1.55, mAP: 39.5}, s: {speed: 2.63, mAP: 47.0}, m: {speed: 5.27, mAP: 51.4}, l: {speed: 6.84, mAP: 53.2}, x: {speed: 12.49, mAP: 54.7}}
YOLO11: {
n: { speed: 1.55, mAP: 39.5 },
s: { speed: 2.63, mAP: 47.0 },
m: { speed: 5.27, mAP: 51.4 },
l: { speed: 6.84, mAP: 53.2 },
x: { speed: 12.49, mAP: 54.7 },
},
YOLOv10: {
n: { speed: 1.56, mAP: 39.5 },
s: { speed: 2.66, mAP: 46.7 },
m: { speed: 5.48, mAP: 51.3 },
b: { speed: 6.54, mAP: 52.7 },
l: { speed: 8.33, mAP: 53.3 },
x: { speed: 12.2, mAP: 54.4 },
},
YOLOv9: {
t: { speed: 2.3, mAP: 37.8 },
s: { speed: 3.54, mAP: 46.5 },
m: { speed: 6.43, mAP: 51.5 },
c: { speed: 7.16, mAP: 52.8 },
e: { speed: 16.77, mAP: 55.1 },
},
YOLOv8: {
n: { speed: 1.47, mAP: 37.3 },
s: { speed: 2.66, mAP: 44.9 },
m: { speed: 5.86, mAP: 50.2 },
l: { speed: 9.06, mAP: 52.9 },
x: { speed: 14.37, mAP: 53.9 },
},
YOLOv7: { l: { speed: 6.84, mAP: 51.4 }, x: { speed: 11.57, mAP: 53.1 } },
"YOLOv6-3.0": {
n: { speed: 1.17, mAP: 37.5 },
s: { speed: 2.66, mAP: 45.0 },
m: { speed: 5.28, mAP: 50.0 },
l: { speed: 8.95, mAP: 52.8 },
},
YOLOv5: {
s: { speed: 1.92, mAP: 37.4 },
m: { speed: 4.03, mAP: 45.4 },
l: { speed: 6.61, mAP: 49.0 },
x: { speed: 11.89, mAP: 50.7 },
},
"PP-YOLOE+": {
t: { speed: 2.84, mAP: 39.9 },
s: { speed: 2.62, mAP: 43.7 },
m: { speed: 5.56, mAP: 49.8 },
l: { speed: 8.36, mAP: 52.9 },
x: { speed: 14.3, mAP: 54.7 },
},
"DAMO-YOLO": {
t: { speed: 2.32, mAP: 42.0 },
s: { speed: 3.45, mAP: 46.0 },
m: { speed: 5.09, mAP: 49.2 },
l: { speed: 7.18, mAP: 50.8 },
},
YOLOX: {
s: { speed: 2.56, mAP: 40.5 },
m: { speed: 5.43, mAP: 46.9 },
l: { speed: 9.04, mAP: 49.7 },
x: { speed: 16.1, mAP: 51.1 },
},
RTDETRv2: {
s: { speed: 5.03, mAP: 48.1 },
m: { speed: 7.51, mAP: 51.9 },
l: { speed: 9.76, mAP: 53.4 },
x: { speed: 15.03, mAP: 54.3 },
},
};
let chart = null; // chart variable will hold the reference to the current chart instance.
let chart = null; // chart variable will hold the reference to the current chart instance.
// Function to lighten a hex color by a specified amount.
function lightenHexColor(color, amount = 0.5) {
const r = parseInt(color.slice(1, 3), 16);
const g = parseInt(color.slice(3, 5), 16);
const b = parseInt(color.slice(5, 7), 16);
const newR = Math.min(255, Math.round(r + (255 - r) * amount));
const newG = Math.min(255, Math.round(g + (255 - g) * amount));
const newB = Math.min(255, Math.round(b + (255 - b) * amount));
return `#${newR.toString(16).padStart(2, "0")}${newG.toString(16).padStart(2, "0")}${newB.toString(16).padStart(2, "0")}`;
}
// This function is responsible for updating the benchmarks chart.
// Function to update the benchmarks chart.
function updateChart() {
// If a chart instance already exists, destroy it.
if (chart) {
chart.destroy();
}
if (chart) {
chart.destroy();
} // If a chart instance already exists, destroy it.
// Get the selected algorithms from the checkboxes.
const selectedAlgorithms = [...document.querySelectorAll('input[name="algorithm"]:checked')].map(e => e.value);
// Create the datasets for the selected algorithms.
const datasets = selectedAlgorithms.map((algorithm, index) => ({
label: algorithm, // Label for the data points in the legend.
data: Object.entries(data[algorithm]).map(([version, point]) => ({
x: point.speed, // Speed data points on the x-axis.
y: point.mAP, // mAP data points on the y-axis.
version: version.toUpperCase() // Store the version as additional data.
})),
fill: false, // Don't fill the chart.
borderColor: `hsl(${index * 90}, 70%, 50%)`, // Assign a unique color to each dataset.
tension: 0.3, // Smooth the line.
pointRadius: 5, // Increase the dot size.
pointHoverRadius: 10, // Increase the dot size on hover.
borderWidth: 2 // Set the line thickness.
}));
// If there are no selected algorithms, return without creating a new chart.
if (datasets.length === 0) {
return;
}
// Define a specific color map for models.
const colorMap = {
YOLO11: "#0b23a9",
YOLOv10: "#ff7f0e",
YOLOv9: "#2ca02c",
YOLOv8: "#d62728",
YOLOv7: "#9467bd",
"YOLOv6-3.0": "#8c564b",
YOLOv5: "#e377c2",
"PP-YOLOE+": "#7f7f7f",
"DAMO-YOLO": "#bcbd22",
YOLOX: "#17becf",
RTDETRv2: "#eccd22",
};
// Get the selected algorithms from the checkboxes.
const selectedAlgorithms = [
...document.querySelectorAll('input[name="algorithm"]:checked'),
].map((e) => e.value);
// Create a new chart instance.
chart = new Chart(document.getElementById('chart').getContext('2d'), {
type: 'line', // Set the chart type to line.
data: { datasets },
options: {
plugins: {
legend: { display: true, position: 'top', labels: {color: '#808080'} }, // Configure the legend.
tooltip: {
callbacks: {
label: (tooltipItem) => {
const { dataset, dataIndex } = tooltipItem;
const point = dataset.data[dataIndex];
return `${dataset.label}${point.version.toLowerCase()}: Speed = ${point.x}, mAP = ${point.y}`; // Custom tooltip label.
}
},
mode: 'nearest',
intersect: false
} // Configure the tooltip.
// Create the datasets for the selected algorithms.
const datasets = selectedAlgorithms.map((algorithm, i) => {
const baseColor =
colorMap[algorithm] || `hsl(${Math.random() * 360}, 70%, 50%)`;
const lineColor = i === 0 ? baseColor : lightenHexColor(baseColor, 0.6); // Lighten non-primary lines.
return {
label: algorithm, // Label for the data points in the legend.
data: Object.entries(data[algorithm]).map(([version, point]) => ({
x: point.speed, // Speed data points on the x-axis.
y: point.mAP, // mAP data points on the y-axis.
version: version.toUpperCase(), // Store the version as additional data.
})),
fill: false, // Don't fill the chart.
borderColor: lineColor, // Use the lightened color for the line.
tension: 0.3, // Smooth the line.
pointRadius: i === 0 ? 7 : 4, // Highlight primary dataset points.
pointHoverRadius: i === 0 ? 9 : 6, // Highlight hover for primary dataset.
pointBackgroundColor: lineColor, // Fill points with the line color.
pointBorderColor: "#ffffff", // Add a border around points for contrast.
borderWidth: i === 0 ? 3 : 1.5, // Slightly increase line size for the primary dataset.
};
});
if (datasets.length === 0) {
return;
} // If there are no selected algorithms, return without creating a new chart.
// Create a new chart instance.
chart = new Chart(document.getElementById("chart").getContext("2d"), {
type: "line", // Set the chart type to line.
data: { datasets },
options: {
plugins: {
legend: {
display: true,
position: "top",
labels: { color: "#808080" },
}, // Configure the legend.
tooltip: {
callbacks: {
label: (tooltipItem) => {
const { dataset, dataIndex } = tooltipItem;
const point = dataset.data[dataIndex];
return `${dataset.label}${point.version.toLowerCase()}: Speed = ${point.x}, mAP = ${point.y}`; // Custom tooltip label.
},
interaction: { mode: 'nearest', axis: 'x', intersect: false }, // Configure the interaction mode.
scales: {
x: {
type: 'linear', position: 'bottom',
title: { display: true, text: 'Latency T4 TensorRT10 FP16 (ms/img)', color: '#808080'}, // X-axis title.
grid: { color: '#e0e0e0' }, // Grid line color.
ticks: { color: '#808080' } // Tick label color.
},
y: {
title: { display: true, text: 'mAP', color: '#808080'}, // Y-axis title.
grid: { color: '#e0e0e0' }, // Grid line color.
ticks: { color: '#808080' } // Tick label color.
}
}
}
});
},
mode: "nearest",
intersect: false,
}, // Configure the tooltip.
},
interaction: { mode: "nearest", axis: "x", intersect: false }, // Configure the interaction mode.
scales: {
x: {
type: "linear",
position: "bottom",
title: {
display: true,
text: "Latency T4 TensorRT10 FP16 (ms/img)",
color: "#808080",
}, // X-axis title.
grid: { color: "#e0e0e0" }, // Grid line color.
ticks: { color: "#808080" }, // Tick label color.
},
y: {
title: { display: true, text: "mAP", color: "#808080" }, // Y-axis title.
grid: { color: "#e0e0e0" }, // Grid line color.
ticks: { color: "#808080" }, // Tick label color.
},
},
},
});
}
// Poll for Chart.js to load, then initialize checkboxes and chart
function initializeApp() {
if (typeof Chart !== 'undefined') {
document.querySelectorAll('input[name="algorithm"]').forEach(checkbox =>
checkbox.addEventListener('change', updateChart)
);
updateChart();
} else {
setTimeout(initializeApp, 100); // Retry every 100ms
}
if (typeof Chart !== "undefined") {
document
.querySelectorAll('input[name="algorithm"]')
.forEach((checkbox) => checkbox.addEventListener("change", updateChart));
updateChart();
} else {
setTimeout(initializeApp, 100); // Retry every 100ms
}
}
document.addEventListener("DOMContentLoaded", initializeApp); // Initial chart rendering on page load

@ -57,14 +57,17 @@ function setupGiscusLoader() {
const giscusContainer = document.getElementById("giscus-container");
if (giscusContainer) {
const observer = new IntersectionObserver((entries) => {
entries.forEach((entry) => {
if (entry.isIntersecting) {
loadGiscus();
observer.unobserve(entry.target);
}
});
}, { threshold: 0.1 }); // Trigger when 10% of the element is visible
const observer = new IntersectionObserver(
(entries) => {
entries.forEach((entry) => {
if (entry.isIntersecting) {
loadGiscus();
observer.unobserve(entry.target);
}
});
},
{ threshold: 0.1 },
); // Trigger when 10% of the element is visible
observer.observe(giscusContainer);
}

@ -265,8 +265,15 @@ div.highlight {
}
/* MkDocs Ultralytics Plugin ---------------------------------------------------------------------------------------- */
/* Inkeep button font color ----------------------------------------------------------------------------------------- */
/* Inkeep ----------------------------------------------------------------------------------------------------------- */
.ikp-floating-button {
color: #111f68;
}
/* Inkeep button ---------------------------------------------------------------------------------------------------- */
#inkeepSearchBar {
transition: all 0.2s ease-in-out;
}
#inkeepSearchBar:hover {
transform: scale(1.1);
filter: brightness(1.2);
}
/* Inkeep ----------------------------------------------------------------------------------------------------------- */

@ -291,6 +291,7 @@ nav:
- COCO8-pose: datasets/pose/coco8-pose.md
- Tiger-pose: datasets/pose/tiger-pose.md
- Hand-keypoints: datasets/pose/hand-keypoints.md
- Dog-pose: datasets/pose/dog-pose.md
- Classification:
- datasets/classify/index.md
- Caltech 101: datasets/classify/caltech101.md
@ -412,12 +413,14 @@ nav:
- TF.js: integrations/tfjs.md
- TFLite: integrations/tflite.md
- TFLite Edge TPU: integrations/edge-tpu.md
- Sony IMX500: integrations/sony-imx500.md
- TensorBoard: integrations/tensorboard.md
- TensorRT: integrations/tensorrt.md
- TorchScript: integrations/torchscript.md
- VS Code: integrations/vscode.md
- Weights & Biases: integrations/weights-biases.md
- Albumentations: integrations/albumentations.md
- SONY IMX500: integrations/sony-imx500.md
- HUB:
- hub/index.md
- Web:
@ -559,7 +562,6 @@ nav:
- utils: reference/nn/modules/utils.md
- tasks: reference/nn/tasks.md
- solutions:
- solutions: reference/solutions/solutions.md
- ai_gym: reference/solutions/ai_gym.md
- analytics: reference/solutions/analytics.md
- distance_calculation: reference/solutions/distance_calculation.md
@ -567,8 +569,10 @@ nav:
- object_counter: reference/solutions/object_counter.md
- parking_management: reference/solutions/parking_management.md
- queue_management: reference/solutions/queue_management.md
- solutions: reference/solutions/solutions.md
- speed_estimation: reference/solutions/speed_estimation.md
- streamlit_inference: reference/solutions/streamlit_inference.md
- region_counter: reference/solutions/region_counter.md
- trackers:
- basetrack: reference/trackers/basetrack.md
- bot_sort: reference/trackers/bot_sort.md
@ -624,8 +628,8 @@ nav:
# Plugins including 301 redirects navigation ---------------------------------------------------------------------------
plugins:
- macros
- search:
lang: en
# - search:
# lang: en
- mkdocstrings:
enabled: true
default_handler: python

@ -205,3 +205,12 @@ def test_export_ncnn():
"""Test YOLO exports to NCNN format."""
file = YOLO(MODEL).export(format="ncnn", imgsz=32)
YOLO(file)(SOURCE, imgsz=32) # exported model inference
@pytest.mark.skipif(True, reason="Test disabled as keras and tensorflow version conflicts with tflite export.")
@pytest.mark.skipif(not LINUX or MACOS, reason="Skipping test on Windows and Macos")
def test_export_imx():
"""Test YOLOv8n exports to IMX format."""
model = YOLO("yolov8n.pt")
file = model.export(format="imx", imgsz=32)
YOLO(file)(SOURCE, imgsz=32)

@ -1,6 +1,6 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
__version__ = "8.3.28"
__version__ = "8.3.34"
import os

@ -671,6 +671,9 @@ def handle_yolo_solutions(args: List[str]) -> None:
)
s_n = "count" # Default solution if none provided
if args and args[0] == "help": # Add check for return if user call `yolo solutions help`
return
cls, method = SOLUTION_MAP[s_n] # solution class name, method name and default source
from ultralytics import solutions # import ultralytics solutions

@ -0,0 +1,23 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
# Dogs dataset http://vision.stanford.edu/aditya86/ImageNetDogs/ by Stanford
# Documentation: https://docs.ultralytics.com/datasets/pose/dog-pose/
# Example usage: yolo train data=dog-pose.yaml
# parent
# ├── ultralytics
# └── datasets
# └── dog-pose ← downloads here (337 MB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/dog-pose # dataset root dir
train: train # train images (relative to 'path') 6773 images
val: val # val images (relative to 'path') 1703 images
# Keypoints
kpt_shape: [24, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
# Classes
names:
0: dog
# Download script/URL (optional)
download: https://github.com/ultralytics/assets/releases/download/v0.0.0/dog-pose.zip

@ -2280,7 +2280,7 @@ def v8_transforms(dataset, imgsz, hyp, stretch=False):
Args:
dataset (Dataset): The dataset object containing image data and annotations.
imgsz (int): The target image size for resizing.
hyp (Dict): A dictionary of hyperparameters controlling various aspects of the transformations.
hyp (Namespace): A dictionary of hyperparameters controlling various aspects of the transformations.
stretch (bool): If True, applies stretching to the image. If False, uses LetterBox resizing.
Returns:
@ -2288,8 +2288,9 @@ def v8_transforms(dataset, imgsz, hyp, stretch=False):
Examples:
>>> from ultralytics.data.dataset import YOLODataset
>>> from ultralytics.utils import IterableSimpleNamespace
>>> dataset = YOLODataset(img_path="path/to/images", imgsz=640)
>>> hyp = {"mosaic": 1.0, "copy_paste": 0.5, "degrees": 10.0, "translate": 0.2, "scale": 0.9}
>>> hyp = IterableSimpleNamespace(mosaic=1.0, copy_paste=0.5, degrees=10.0, translate=0.2, scale=0.9)
>>> transforms = v8_transforms(dataset, imgsz=640, hyp=hyp)
>>> augmented_data = transforms(dataset[0])
"""

@ -577,7 +577,7 @@ def merge_multi_segment(segments):
return s
def yolo_bbox2segment(im_dir, save_dir=None, sam_model="sam_b.pt"):
def yolo_bbox2segment(im_dir, save_dir=None, sam_model="sam_b.pt", device=None):
"""
Converts existing object detection dataset (bounding boxes) to segmentation dataset or oriented bounding box (OBB)
in YOLO format. Generates segmentation data using SAM auto-annotator as needed.
@ -587,6 +587,7 @@ def yolo_bbox2segment(im_dir, save_dir=None, sam_model="sam_b.pt"):
save_dir (str | Path): Path to save the generated labels, labels will be saved
into `labels-segment` in the same directory level of `im_dir` if save_dir is None. Default: None.
sam_model (str): Segmentation model to use for intermediate segmentation data; optional.
device (int | str): The specific device to run SAM models. Default: None.
Notes:
The input directory structure assumed for dataset:
@ -621,7 +622,7 @@ def yolo_bbox2segment(im_dir, save_dir=None, sam_model="sam_b.pt"):
boxes[:, [0, 2]] *= w
boxes[:, [1, 3]] *= h
im = cv2.imread(label["im_file"])
sam_results = sam_model(im, bboxes=xywh2xyxy(boxes), verbose=False, save=False)
sam_results = sam_model(im, bboxes=xywh2xyxy(boxes), verbose=False, save=False, device=device)
label["segments"] = sam_results[0].masks.xyn
save_dir = Path(save_dir) if save_dir else Path(im_dir).parent / "labels-segment"

@ -18,6 +18,7 @@ TensorFlow.js | `tfjs` | yolo11n_web_model/
PaddlePaddle | `paddle` | yolo11n_paddle_model/
MNN | `mnn` | yolo11n.mnn
NCNN | `ncnn` | yolo11n_ncnn_model/
IMX | `imx` | yolo11n_imx_model/
Requirements:
$ pip install "ultralytics[export]"
@ -44,6 +45,7 @@ Inference:
yolo11n_paddle_model # PaddlePaddle
yolo11n.mnn # MNN
yolo11n_ncnn_model # NCNN
yolo11n_imx_model # IMX
TensorFlow.js:
$ cd .. && git clone https://github.com/zldrobit/tfjs-yolov5-example.git && cd tfjs-yolov5-example
@ -77,7 +79,6 @@ from ultralytics.utils import (
ARM64,
DEFAULT_CFG,
IS_JETSON,
IS_RASPBERRYPI,
LINUX,
LOGGER,
MACOS,
@ -94,7 +95,7 @@ from ultralytics.utils.checks import check_imgsz, check_is_path_safe, check_requ
from ultralytics.utils.downloads import attempt_download_asset, get_github_assets, safe_download
from ultralytics.utils.files import file_size, spaces_in_path
from ultralytics.utils.ops import Profile
from ultralytics.utils.torch_utils import TORCH_1_13, get_latest_opset, select_device, smart_inference_mode
from ultralytics.utils.torch_utils import TORCH_1_13, get_latest_opset, select_device
def export_formats():
@ -114,6 +115,7 @@ def export_formats():
["PaddlePaddle", "paddle", "_paddle_model", True, True],
["MNN", "mnn", ".mnn", True, True],
["NCNN", "ncnn", "_ncnn_model", True, True],
["IMX", "imx", "_imx_model", True, True],
]
return dict(zip(["Format", "Argument", "Suffix", "CPU", "GPU"], zip(*x)))
@ -171,7 +173,6 @@ class Exporter:
self.callbacks = _callbacks or callbacks.get_default_callbacks()
callbacks.add_integration_callbacks(self)
@smart_inference_mode()
def __call__(self, model=None) -> str:
"""Returns list of exported files/dirs after running callbacks."""
self.run_callbacks("on_export_start")
@ -194,9 +195,22 @@ class Exporter:
flags = [x == fmt for x in fmts]
if sum(flags) != 1:
raise ValueError(f"Invalid export format='{fmt}'. Valid formats are {fmts}")
jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle, mnn, ncnn = (
flags # export booleans
)
(
jit,
onnx,
xml,
engine,
coreml,
saved_model,
pb,
tflite,
edgetpu,
tfjs,
paddle,
mnn,
ncnn,
imx,
) = flags # export booleans
is_tf_format = any((saved_model, pb, tflite, edgetpu, tfjs))
# Device
@ -210,6 +224,9 @@ class Exporter:
self.device = select_device("cpu" if self.args.device is None else self.args.device)
# Checks
if imx and not self.args.int8:
LOGGER.warning("WARNING ⚠ IMX only supports int8 export, setting int8=True.")
self.args.int8 = True
if not hasattr(model, "names"):
model.names = default_class_names()
model.names = check_class_names(model.names)
@ -247,8 +264,7 @@ class Exporter:
"WARNING ⚠ INT8 export requires a missing 'data' arg for calibration. "
f"Using default 'data={self.args.data}'."
)
if mnn and (IS_RASPBERRYPI or IS_JETSON):
raise SystemError("MNN export not supported on Raspberry Pi and NVIDIA Jetson")
# Input
im = torch.zeros(self.args.batch, 3, *self.imgsz).to(self.device)
file = Path(
@ -264,6 +280,11 @@ class Exporter:
model.eval()
model.float()
model = model.fuse()
if imx:
from ultralytics.utils.torch_utils import FXModel
model = FXModel(model)
for m in model.modules():
if isinstance(m, (Detect, RTDETRDecoder)): # includes all Detect subclasses like Segment, Pose, OBB
m.dynamic = self.args.dynamic
@ -273,6 +294,15 @@ class Exporter:
elif isinstance(m, C2f) and not is_tf_format:
# EdgeTPU does not support FlexSplitV while split provides cleaner ONNX graph
m.forward = m.forward_split
if isinstance(m, Detect) and imx:
from ultralytics.utils.tal import make_anchors
m.anchors, m.strides = (
x.transpose(0, 1)
for x in make_anchors(
torch.cat([s / m.stride.unsqueeze(-1) for s in self.imgsz], dim=1), m.stride, 0.5
)
)
y = None
for _ in range(2):
@ -347,6 +377,8 @@ class Exporter:
f[11], _ = self.export_mnn()
if ncnn: # NCNN
f[12], _ = self.export_ncnn()
if imx:
f[13], _ = self.export_imx()
# Finish
f = [str(x) for x in f if x] # filter out '' and None
@ -1068,6 +1100,137 @@ class Exporter:
yaml_save(Path(f) / "metadata.yaml", self.metadata) # add metadata.yaml
return f, None
@try_export
def export_imx(self, prefix=colorstr("IMX:")):
"""YOLO IMX export."""
gptq = False
assert LINUX, "export only supported on Linux. See https://developer.aitrios.sony-semicon.com/en/raspberrypi-ai-camera/documentation/imx500-converter"
if getattr(self.model, "end2end", False):
raise ValueError("IMX export is not supported for end2end models.")
if "C2f" not in self.model.__str__():
raise ValueError("IMX export is only supported for YOLOv8 detection models")
check_requirements(("model-compression-toolkit==2.1.1", "sony-custom-layers==0.2.0", "tensorflow==2.12.0"))
check_requirements("imx500-converter[pt]==3.14.3") # Separate requirements for imx500-converter
import model_compression_toolkit as mct
import onnx
from sony_custom_layers.pytorch.object_detection.nms import multiclass_nms
try:
out = subprocess.run(
["java", "--version"], check=True, capture_output=True
) # Java 17 is required for imx500-converter
if "openjdk 17" not in str(out.stdout):
raise FileNotFoundError
except FileNotFoundError:
subprocess.run(["sudo", "apt", "install", "-y", "openjdk-17-jdk", "openjdk-17-jre"], check=True)
def representative_dataset_gen(dataloader=self.get_int8_calibration_dataloader(prefix)):
for batch in dataloader:
img = batch["img"]
img = img / 255.0
yield [img]
tpc = mct.get_target_platform_capabilities(
fw_name="pytorch", target_platform_name="imx500", target_platform_version="v1"
)
config = mct.core.CoreConfig(
mixed_precision_config=mct.core.MixedPrecisionQuantizationConfig(num_of_images=10),
quantization_config=mct.core.QuantizationConfig(concat_threshold_update=True),
)
resource_utilization = mct.core.ResourceUtilization(weights_memory=3146176 * 0.76)
quant_model = (
mct.gptq.pytorch_gradient_post_training_quantization( # Perform Gradient-Based Post Training Quantization
model=self.model,
representative_data_gen=representative_dataset_gen,
target_resource_utilization=resource_utilization,
gptq_config=mct.gptq.get_pytorch_gptq_config(n_epochs=1000, use_hessian_based_weights=False),
core_config=config,
target_platform_capabilities=tpc,
)[0]
if gptq
else mct.ptq.pytorch_post_training_quantization( # Perform post training quantization
in_module=self.model,
representative_data_gen=representative_dataset_gen,
target_resource_utilization=resource_utilization,
core_config=config,
target_platform_capabilities=tpc,
)[0]
)
class NMSWrapper(torch.nn.Module):
def __init__(
self,
model: torch.nn.Module,
score_threshold: float = 0.001,
iou_threshold: float = 0.7,
max_detections: int = 300,
):
"""
Wrapping PyTorch Module with multiclass_nms layer from sony_custom_layers.
Args:
model (nn.Module): Model instance.
score_threshold (float): Score threshold for non-maximum suppression.
iou_threshold (float): Intersection over union threshold for non-maximum suppression.
max_detections (float): The number of detections to return.
"""
super().__init__()
self.model = model
self.score_threshold = score_threshold
self.iou_threshold = iou_threshold
self.max_detections = max_detections
def forward(self, images):
# model inference
outputs = self.model(images)
boxes = outputs[0]
scores = outputs[1]
nms = multiclass_nms(
boxes=boxes,
scores=scores,
score_threshold=self.score_threshold,
iou_threshold=self.iou_threshold,
max_detections=self.max_detections,
)
return nms
quant_model = NMSWrapper(
model=quant_model,
score_threshold=self.args.conf or 0.001,
iou_threshold=self.args.iou,
max_detections=self.args.max_det,
).to(self.device)
f = Path(str(self.file).replace(self.file.suffix, "_imx_model"))
f.mkdir(exist_ok=True)
onnx_model = f / Path(str(self.file).replace(self.file.suffix, "_imx.onnx")) # js dir
mct.exporter.pytorch_export_model(
model=quant_model, save_model_path=onnx_model, repr_dataset=representative_dataset_gen
)
model_onnx = onnx.load(onnx_model) # load onnx model
for k, v in self.metadata.items():
meta = model_onnx.metadata_props.add()
meta.key, meta.value = k, str(v)
onnx.save(model_onnx, onnx_model)
subprocess.run(
["imxconv-pt", "-i", str(onnx_model), "-o", str(f), "--no-input-persistency", "--overwrite-output"],
check=True,
)
# Needed for imx models.
with open(f / "labels.txt", "w") as file:
file.writelines([f"{name}\n" for _, name in self.model.names.items()])
return f, None
def _add_tflite_metadata(self, file):
"""Add metadata to *.tflite models per https://www.tensorflow.org/lite/models/convert/metadata."""
import flatbuffers

@ -2,7 +2,7 @@
import inspect
from pathlib import Path
from typing import List, Union
from typing import Dict, List, Union
import numpy as np
import torch
@ -881,7 +881,7 @@ class Model(nn.Module):
return self
@property
def names(self) -> list:
def names(self) -> Dict[int, str]:
"""
Retrieves the class names associated with the loaded model.

@ -535,9 +535,9 @@ class Results(SimpleClass):
# Plot Detect results
if pred_boxes is not None and show_boxes:
for i, d in enumerate(reversed(pred_boxes)):
c, conf, id = int(d.cls), float(d.conf) if conf else None, None if d.id is None else int(d.id.item())
c, d_conf, id = int(d.cls), float(d.conf) if conf else None, None if d.id is None else int(d.id.item())
name = ("" if id is None else f"id:{id} ") + names[c]
label = (f"{name} {conf:.2f}" if conf else name) if labels else None
label = (f"{name} {d_conf:.2f}" if conf else name) if labels else None
box = d.xyxyxyxy.reshape(-1, 4, 2).squeeze() if is_obb else d.xyxy.squeeze()
annotator.box_label(
box,
@ -750,7 +750,7 @@ class Results(SimpleClass):
save_one_box(
d.xyxy,
self.orig_img.copy(),
file=Path(save_dir) / self.names[int(d.cls)] / f"{Path(file_name)}.jpg",
file=Path(save_dir) / self.names[int(d.cls)] / Path(file_name).with_suffix(".jpg"),
BGR=True,
)

@ -279,12 +279,7 @@ class BaseTrainer:
# Batch size
if self.batch_size < 1 and RANK == -1: # single-GPU only, estimate best batch size
self.args.batch = self.batch_size = check_train_batch_size(
model=self.model,
imgsz=self.args.imgsz,
amp=self.amp,
batch=self.batch_size,
)
self.args.batch = self.batch_size = self.auto_batch()
# Dataloaders
batch_size = self.batch_size // max(world_size, 1)
@ -478,6 +473,16 @@ class BaseTrainer:
self._clear_memory()
self.run_callbacks("teardown")
def auto_batch(self, max_num_obj=0):
"""Get batch size by calculating memory occupation of model."""
return check_train_batch_size(
model=self.model,
imgsz=self.args.imgsz,
amp=self.amp,
batch=self.batch_size,
max_num_obj=max_num_obj,
) # returns batch size
def _get_memory(self):
"""Get accelerator memory utilization in GB."""
if self.device.type == "mps":

@ -64,6 +64,9 @@ class FastSAMPredictor(SegmentationPredictor):
if not isinstance(results, list):
results = [results]
for result in results:
if len(result) == 0:
prompt_results.append(result)
continue
masks = result.masks.data
if masks.shape[1:] != result.orig_shape:
masks = scale_masks(masks[None], result.orig_shape)[0]

@ -68,8 +68,11 @@ class RTDETRTrainer(DetectionTrainer):
hyp=self.args,
rect=False,
cache=self.args.cache or None,
single_cls=self.args.single_cls or False,
prefix=colorstr(f"{mode}: "),
classes=self.args.classes,
data=self.data,
fraction=self.args.fraction if mode == "train" else 1.0,
)
def get_validator(self):

@ -141,3 +141,10 @@ class DetectionTrainer(BaseTrainer):
boxes = np.concatenate([lb["bboxes"] for lb in self.train_loader.dataset.labels], 0)
cls = np.concatenate([lb["cls"] for lb in self.train_loader.dataset.labels], 0)
plot_labels(boxes, cls.squeeze(), names=self.data["names"], save_dir=self.save_dir, on_plot=self.on_plot)
def auto_batch(self):
"""Get batch size by calculating memory occupation of model."""
train_dataset = self.build_dataset(self.trainset, mode="train", batch=16)
# 4 for mosaic augmentation
max_num_obj = max(len(l["cls"]) for l in train_dataset.labels) * 4
return super().auto_batch(max_num_obj)

@ -155,8 +155,8 @@ class DetectionValidator(BaseValidator):
# Evaluate
if nl:
stat["tp"] = self._process_batch(predn, bbox, cls)
if self.args.plots:
self.confusion_matrix.process_batch(predn, bbox, cls)
if self.args.plots:
self.confusion_matrix.process_batch(predn, bbox, cls)
for k in self.stats.keys():
self.stats[k].append(stat[k])

@ -138,8 +138,8 @@ class PoseValidator(DetectionValidator):
if nl:
stat["tp"] = self._process_batch(predn, bbox, cls)
stat["tp_p"] = self._process_batch(predn, bbox, cls, pred_kpts, pbatch["kpts"])
if self.args.plots:
self.confusion_matrix.process_batch(predn, bbox, cls)
if self.args.plots:
self.confusion_matrix.process_batch(predn, bbox, cls)
for k in self.stats.keys():
self.stats[k].append(stat[k])

@ -135,8 +135,8 @@ class SegmentationValidator(DetectionValidator):
stat["tp_m"] = self._process_batch(
predn, bbox, cls, pred_masks, gt_masks, self.args.overlap_mask, masks=True
)
if self.args.plots:
self.confusion_matrix.process_batch(predn, bbox, cls)
if self.args.plots:
self.confusion_matrix.process_batch(predn, bbox, cls)
for k in self.stats.keys():
self.stats[k].append(stat[k])

@ -123,6 +123,7 @@ class AutoBackend(nn.Module):
paddle,
mnn,
ncnn,
imx,
triton,
) = self._model_type(w)
fp16 &= pt or jit or onnx or xml or engine or nn_module or triton # FP16
@ -182,8 +183,8 @@ class AutoBackend(nn.Module):
check_requirements("opencv-python>=4.5.4")
net = cv2.dnn.readNetFromONNX(w)
# ONNX Runtime
elif onnx:
# ONNX Runtime and IMX
elif onnx or imx:
LOGGER.info(f"Loading {w} for ONNX Runtime inference...")
check_requirements(("onnx", "onnxruntime-gpu" if cuda else "onnxruntime"))
if IS_RASPBERRYPI or IS_JETSON:
@ -199,7 +200,22 @@ class AutoBackend(nn.Module):
device = torch.device("cpu")
cuda = False
LOGGER.info(f"Preferring ONNX Runtime {providers[0]}")
session = onnxruntime.InferenceSession(w, providers=providers)
if onnx:
session = onnxruntime.InferenceSession(w, providers=providers)
else:
check_requirements(
["model-compression-toolkit==2.1.1", "sony-custom-layers[torch]==0.2.0", "onnxruntime-extensions"]
)
w = next(Path(w).glob("*.onnx"))
LOGGER.info(f"Loading {w} for ONNX IMX inference...")
import mct_quantizers as mctq
from sony_custom_layers.pytorch.object_detection import nms_ort # noqa
session = onnxruntime.InferenceSession(
w, mctq.get_ort_session_options(), providers=["CPUExecutionProvider"]
)
task = "detect"
output_names = [x.name for x in session.get_outputs()]
metadata = session.get_modelmeta().custom_metadata_map
dynamic = isinstance(session.get_outputs()[0].shape[0], str)
@ -520,7 +536,7 @@ class AutoBackend(nn.Module):
y = self.net.forward()
# ONNX Runtime
elif self.onnx:
elif self.onnx or self.imx:
if self.dynamic:
im = im.cpu().numpy() # torch to numpy
y = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im})
@ -537,6 +553,9 @@ class AutoBackend(nn.Module):
)
self.session.run_with_iobinding(self.io)
y = self.bindings
if self.imx:
# boxes, conf, cls
y = np.concatenate([y[0], y[1][:, :, None], y[2][:, :, None]], axis=-1)
# OpenVINO
elif self.xml:

@ -240,7 +240,8 @@ class C2f(nn.Module):
def forward_split(self, x):
"""Forward pass using split() instead of chunk()."""
y = list(self.cv1(x).split((self.c, self.c), 1))
y = self.cv1(x).split((self.c, self.c), 1)
y = [y[0], y[1]]
y.extend(m(y[-1]) for m in self.m)
return self.cv2(torch.cat(y, 1))

@ -23,6 +23,7 @@ class Detect(nn.Module):
dynamic = False # force grid reconstruction
export = False # export mode
format = None # export format
end2end = False # end2end
max_det = 300 # max_det
shape = None
@ -101,7 +102,7 @@ class Detect(nn.Module):
# Inference path
shape = x[0].shape # BCHW
x_cat = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2)
if self.dynamic or self.shape != shape:
if self.format != "imx" and (self.dynamic or self.shape != shape):
self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5))
self.shape = shape
@ -119,6 +120,11 @@ class Detect(nn.Module):
grid_size = torch.tensor([grid_w, grid_h, grid_w, grid_h], device=box.device).reshape(1, 4, 1)
norm = self.strides / (self.stride[0] * grid_size)
dbox = self.decode_bboxes(self.dfl(box) * norm, self.anchors.unsqueeze(0) * norm[:, :2])
elif self.export and self.format == "imx":
dbox = self.decode_bboxes(
self.dfl(box) * self.strides, self.anchors.unsqueeze(0) * self.strides, xywh=False
)
return dbox.transpose(1, 2), cls.sigmoid().permute(0, 2, 1)
else:
dbox = self.decode_bboxes(self.dfl(box), self.anchors.unsqueeze(0)) * self.strides
@ -137,9 +143,9 @@ class Detect(nn.Module):
a[-1].bias.data[:] = 1.0 # box
b[-1].bias.data[: m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (.01 objects, 80 classes, 640 img)
def decode_bboxes(self, bboxes, anchors):
def decode_bboxes(self, bboxes, anchors, xywh=True):
"""Decode bounding boxes."""
return dist2bbox(bboxes, anchors, xywh=not self.end2end, dim=1)
return dist2bbox(bboxes, anchors, xywh=xywh and (not self.end2end), dim=1)
@staticmethod
def postprocess(preds: torch.Tensor, max_det: int, nc: int = 80):

@ -7,6 +7,7 @@ from .heatmap import Heatmap
from .object_counter import ObjectCounter
from .parking_management import ParkingManagement, ParkingPtsSelection
from .queue_management import QueueManager
from .region_counter import RegionCounter
from .speed_estimation import SpeedEstimator
from .streamlit_inference import inference
@ -21,4 +22,5 @@ __all__ = (
"SpeedEstimator",
"Analytics",
"inference",
"RegionCounter",
)

@ -54,7 +54,7 @@ class Analytics(BaseSolution):
self.y_label = "Total Counts"
# Predefined data
self.bg_color = "#00F344" # background color of frame
self.bg_color = "#F3F3F3" # background color of frame
self.fg_color = "#111E68" # foreground color of frame
self.title = "Ultralytics Solutions" # window name
self.max_points = 45 # maximum points to be drawn on window

@ -104,12 +104,12 @@ class Heatmap(ObjectCounter):
self.annotator.draw_region(reg_pts=self.region, color=(104, 0, 123), thickness=self.line_width * 2)
self.store_tracking_history(track_id, box) # Store track history
self.store_classwise_counts(cls) # store classwise counts in dict
current_centroid = ((box[0] + box[2]) / 2, (box[1] + box[3]) / 2)
# Store tracking previous position and perform object counting
prev_position = None
if len(self.track_history[track_id]) > 1:
prev_position = self.track_history[track_id][-2]
self.count_objects(self.track_line, box, track_id, prev_position, cls) # Perform object counting
self.count_objects(current_centroid, track_id, prev_position, cls) # Perform object counting
if self.region is not None:
self.display_counts(im0) # Display the counts on the frame

@ -46,13 +46,12 @@ class ObjectCounter(BaseSolution):
self.show_in = self.CFG["show_in"]
self.show_out = self.CFG["show_out"]
def count_objects(self, track_line, box, track_id, prev_position, cls):
def count_objects(self, current_centroid, track_id, prev_position, cls):
"""
Counts objects within a polygonal or linear region based on their tracks.
Args:
track_line (Dict): Last 30 frame track record for the object.
box (List[float]): Bounding box coordinates [x1, y1, x2, y2] for the specific track in the current frame.
current_centroid (Tuple[float, float]): Current centroid values in the current frame.
track_id (int): Unique identifier for the tracked object.
prev_position (Tuple[float, float]): Last frame position coordinates (x, y) of the track.
cls (int): Class index for classwise count updates.
@ -64,34 +63,55 @@ class ObjectCounter(BaseSolution):
>>> track_id = 1
>>> prev_position = (120, 220)
>>> cls = 0
>>> counter.count_objects(track_line, box, track_id, prev_position, cls)
>>> counter.count_objects(current_centroid, track_id, prev_position, cls)
"""
if prev_position is None or track_id in self.counted_ids:
return
centroid = self.r_s.centroid
dx = (box[0] - prev_position[0]) * (centroid.x - prev_position[0])
dy = (box[1] - prev_position[1]) * (centroid.y - prev_position[1])
if len(self.region) >= 3 and self.r_s.contains(self.Point(track_line[-1])):
self.counted_ids.append(track_id)
# For polygon region
if dx > 0:
self.in_count += 1
self.classwise_counts[self.names[cls]]["IN"] += 1
else:
self.out_count += 1
self.classwise_counts[self.names[cls]]["OUT"] += 1
elif len(self.region) < 3 and self.LineString([prev_position, box[:2]]).intersects(self.r_s):
self.counted_ids.append(track_id)
# For linear region
if dx > 0 and dy > 0:
self.in_count += 1
self.classwise_counts[self.names[cls]]["IN"] += 1
else:
self.out_count += 1
self.classwise_counts[self.names[cls]]["OUT"] += 1
if len(self.region) == 2: # Linear region (defined as a line segment)
line = self.LineString(self.region) # Check if the line intersects the trajectory of the object
if line.intersects(self.LineString([prev_position, current_centroid])):
# Determine orientation of the region (vertical or horizontal)
if abs(self.region[0][0] - self.region[1][0]) < abs(self.region[0][1] - self.region[1][1]):
# Vertical region: Compare x-coordinates to determine direction
if current_centroid[0] > prev_position[0]: # Moving right
self.in_count += 1
self.classwise_counts[self.names[cls]]["IN"] += 1
else: # Moving left
self.out_count += 1
self.classwise_counts[self.names[cls]]["OUT"] += 1
else:
# Horizontal region: Compare y-coordinates to determine direction
if current_centroid[1] > prev_position[1]: # Moving downward
self.in_count += 1
self.classwise_counts[self.names[cls]]["IN"] += 1
else: # Moving upward
self.out_count += 1
self.classwise_counts[self.names[cls]]["OUT"] += 1
self.counted_ids.append(track_id)
elif len(self.region) > 2: # Polygonal region
polygon = self.Polygon(self.region)
if polygon.contains(self.Point(current_centroid)):
# Determine motion direction for vertical or horizontal polygons
region_width = max([p[0] for p in self.region]) - min([p[0] for p in self.region])
region_height = max([p[1] for p in self.region]) - min([p[1] for p in self.region])
if region_width < region_height: # Vertical-oriented polygon
if current_centroid[0] > prev_position[0]: # Moving right
self.in_count += 1
self.classwise_counts[self.names[cls]]["IN"] += 1
else: # Moving left
self.out_count += 1
self.classwise_counts[self.names[cls]]["OUT"] += 1
else: # Horizontal-oriented polygon
if current_centroid[1] > prev_position[1]: # Moving downward
self.in_count += 1
self.classwise_counts[self.names[cls]]["IN"] += 1
else: # Moving upward
self.out_count += 1
self.classwise_counts[self.names[cls]]["OUT"] += 1
self.counted_ids.append(track_id)
def store_classwise_counts(self, cls):
"""
@ -174,12 +194,12 @@ class ObjectCounter(BaseSolution):
self.annotator.draw_centroid_and_tracks(
self.track_line, color=colors(int(cls), True), track_thickness=self.line_width
)
current_centroid = ((box[0] + box[2]) / 2, (box[1] + box[3]) / 2)
# store previous position of track for object counting
prev_position = None
if len(self.track_history[track_id]) > 1:
prev_position = self.track_history[track_id][-2]
self.count_objects(self.track_line, box, track_id, prev_position, cls) # Perform object counting
self.count_objects(current_centroid, track_id, prev_position, cls) # Perform object counting
self.display_counts(im0) # Display the counts on the frame
self.display_output(im0) # display output with base class function

@ -0,0 +1,112 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
from ultralytics.solutions.solutions import BaseSolution
from ultralytics.utils.plotting import Annotator, colors
class RegionCounter(BaseSolution):
"""
A class designed for real-time counting of objects within user-defined regions in a video stream.
This class inherits from `BaseSolution` and offers functionalities to define polygonal regions in a video
frame, track objects, and count those objects that pass through each defined region. This makes it useful
for applications that require counting in specified areas, such as monitoring zones or segmented sections.
Attributes:
region_template (dict): A template for creating new counting regions with default attributes including
the name, polygon coordinates, and display colors.
counting_regions (list): A list storing all defined regions, where each entry is based on `region_template`
and includes specific region settings like name, coordinates, and color.
Methods:
add_region: Adds a new counting region with specified attributes, such as the region's name, polygon points,
region color, and text color.
count: Processes video frames to count objects in each region, drawing regions and displaying counts
on the frame. Handles object detection, region definition, and containment checks.
"""
def __init__(self, **kwargs):
"""Initializes the RegionCounter class for real-time counting in different regions of the video streams."""
super().__init__(**kwargs)
self.region_template = {
"name": "Default Region",
"polygon": None,
"counts": 0,
"dragging": False,
"region_color": (255, 255, 255),
"text_color": (0, 0, 0),
}
self.counting_regions = []
def add_region(self, name, polygon_points, region_color, text_color):
"""
Adds a new region to the counting list based on the provided template with specific attributes.
Args:
name (str): Name assigned to the new region.
polygon_points (list[tuple]): List of (x, y) coordinates defining the region's polygon.
region_color (tuple): BGR color for region visualization.
text_color (tuple): BGR color for the text within the region.
"""
region = self.region_template.copy()
region.update(
{
"name": name,
"polygon": self.Polygon(polygon_points),
"region_color": region_color,
"text_color": text_color,
}
)
self.counting_regions.append(region)
def count(self, im0):
"""
Processes the input frame to detect and count objects within each defined region.
Args:
im0 (numpy.ndarray): Input image frame where objects and regions are annotated.
Returns:
im0 (numpy.ndarray): Processed image frame with annotated counting information.
"""
self.annotator = Annotator(im0, line_width=self.line_width)
self.extract_tracks(im0)
# Region initialization and conversion
if self.region is None:
self.initialize_region()
regions = {"Region#01": self.region}
else:
regions = self.region if isinstance(self.region, dict) else {"Region#01": self.region}
# Draw regions and process counts for each defined area
for idx, (region_name, reg_pts) in enumerate(regions.items(), start=1):
color = colors(idx, True)
self.annotator.draw_region(reg_pts=reg_pts, color=color, thickness=self.line_width * 2)
self.add_region(region_name, reg_pts, color, self.annotator.get_txt_color())
# Prepare regions for containment check
for region in self.counting_regions:
region["prepared_polygon"] = self.prep(region["polygon"])
# Process bounding boxes and count objects within each region
for box, cls in zip(self.boxes, self.clss):
self.annotator.box_label(box, label=self.names[cls], color=colors(cls, True))
bbox_center = ((box[0] + box[2]) / 2, (box[1] + box[3]) / 2)
for region in self.counting_regions:
if region["prepared_polygon"].contains(self.Point(bbox_center)):
region["counts"] += 1
# Display counts in each region
for region in self.counting_regions:
self.annotator.text_label(
region["polygon"].bounds,
label=str(region["counts"]),
color=region["region_color"],
txt_color=region["text_color"],
)
region["counts"] = 0 # Reset count for next frame
self.display_output(im0)
return im0

@ -50,10 +50,12 @@ class BaseSolution:
"""
check_requirements("shapely>=2.0.0")
from shapely.geometry import LineString, Point, Polygon
from shapely.prepared import prep
self.LineString = LineString
self.Polygon = Polygon
self.Point = Point
self.prep = prep
# Load config and update with args
DEFAULT_SOL_DICT.update(kwargs)

@ -11,7 +11,7 @@ from ultralytics.utils import DEFAULT_CFG, LOGGER, colorstr
from ultralytics.utils.torch_utils import autocast, profile
def check_train_batch_size(model, imgsz=640, amp=True, batch=-1):
def check_train_batch_size(model, imgsz=640, amp=True, batch=-1, max_num_obj=1):
"""
Compute optimal YOLO training batch size using the autobatch() function.
@ -20,6 +20,7 @@ def check_train_batch_size(model, imgsz=640, amp=True, batch=-1):
imgsz (int, optional): Image size used for training.
amp (bool, optional): Use automatic mixed precision if True.
batch (float, optional): Fraction of GPU memory to use. If -1, use default.
max_num_obj (int, optional): The maximum number of objects from dataset.
Returns:
(int): Optimal batch size computed using the autobatch() function.
@ -29,10 +30,12 @@ def check_train_batch_size(model, imgsz=640, amp=True, batch=-1):
Otherwise, a default fraction of 0.6 is used.
"""
with autocast(enabled=amp):
return autobatch(deepcopy(model).train(), imgsz, fraction=batch if 0.0 < batch < 1.0 else 0.6)
return autobatch(
deepcopy(model).train(), imgsz, fraction=batch if 0.0 < batch < 1.0 else 0.6, max_num_obj=max_num_obj
)
def autobatch(model, imgsz=640, fraction=0.60, batch_size=DEFAULT_CFG.batch):
def autobatch(model, imgsz=640, fraction=0.60, batch_size=DEFAULT_CFG.batch, max_num_obj=1):
"""
Automatically estimate the best YOLO batch size to use a fraction of the available CUDA memory.
@ -41,6 +44,7 @@ def autobatch(model, imgsz=640, fraction=0.60, batch_size=DEFAULT_CFG.batch):
imgsz (int, optional): The image size used as input for the YOLO model. Defaults to 640.
fraction (float, optional): The fraction of available CUDA memory to use. Defaults to 0.60.
batch_size (int, optional): The default batch size to use if an error is detected. Defaults to 16.
max_num_obj (int, optional): The maximum number of objects from dataset.
Returns:
(int): The optimal batch size.
@ -70,7 +74,7 @@ def autobatch(model, imgsz=640, fraction=0.60, batch_size=DEFAULT_CFG.batch):
batch_sizes = [1, 2, 4, 8, 16] if t < 16 else [1, 2, 4, 8, 16, 32, 64]
try:
img = [torch.empty(b, 3, imgsz, imgsz) for b in batch_sizes]
results = profile(img, model, n=1, device=device)
results = profile(img, model, n=1, device=device, max_num_obj=max_num_obj)
# Fit a solution
y = [x[2] for x in results if x] # memory [2]

@ -114,10 +114,13 @@ def benchmark(
assert LINUX or MACOS, "Windows Paddle exports not supported yet"
if i == 12: # MNN
assert not isinstance(model, YOLOWorld), "YOLOWorldv2 MNN exports not supported yet"
assert not IS_RASPBERRYPI, "MNN export not supported on Raspberry Pi"
assert not IS_JETSON, "MNN export not supported on NVIDIA Jetson"
if i == 13: # NCNN
assert not isinstance(model, YOLOWorld), "YOLOWorldv2 NCNN exports not supported yet"
if i == 14: # IMX
assert not is_end2end
assert not isinstance(model, YOLOWorld), "YOLOWorldv2 IMX exports not supported"
assert model.task == "detect", "IMX only supported for detection task"
assert "C2f" in model.__str__(), "IMX only supported for YOLOv8"
if "cpu" in device.type:
assert cpu, "inference not supported on CPU"
if "cuda" in device.type:

@ -16,8 +16,7 @@ def on_fit_epoch_end(trainer):
"""Sends training metrics to Ray Tune at end of each epoch."""
if ray.train._internal.session._get_session(): # replacement for deprecated ray.tune.is_session_enabled()
metrics = trainer.metrics
metrics["epoch"] = trainer.epoch
session.report(metrics)
session.report({**metrics, **{"epoch": trainer.epoch + 1}})
callbacks = (

@ -109,7 +109,12 @@ def _log_plots(plots, step):
def on_pretrain_routine_start(trainer):
"""Initiate and start project if module is present."""
wb.run or wb.init(project=trainer.args.project or "Ultralytics", name=trainer.args.name, config=vars(trainer.args))
if not wb.run:
wb.init(
project=str(trainer.args.project).replace("/", "-") if trainer.args.project else "Ultralytics",
name=str(trainer.args.name).replace("/", "-"),
config=vars(trainer.args),
)
def on_fit_epoch_end(trainer):
@ -138,7 +143,7 @@ def on_train_end(trainer):
art.add_file(trainer.best)
wb.run.log_artifact(art, aliases=["best"])
# Check if we actually have plots to save
if trainer.args.plots:
if trainer.args.plots and hasattr(trainer.validator.metrics, "curves_results"):
for curve_name, curve_values in zip(trainer.validator.metrics.curves, trainer.validator.metrics.curves_results):
x, y, x_title, y_title = curve_values
_plot_curve(

@ -3,6 +3,7 @@
import torch
import torch.nn as nn
from . import LOGGER
from .checks import check_version
from .metrics import bbox_iou, probiou
from .ops import xywhr2xyxyxyxy
@ -58,17 +59,46 @@ class TaskAlignedAssigner(nn.Module):
"""
self.bs = pd_scores.shape[0]
self.n_max_boxes = gt_bboxes.shape[1]
device = gt_bboxes.device
if self.n_max_boxes == 0:
device = gt_bboxes.device
return (
torch.full_like(pd_scores[..., 0], self.bg_idx).to(device),
torch.zeros_like(pd_bboxes).to(device),
torch.zeros_like(pd_scores).to(device),
torch.zeros_like(pd_scores[..., 0]).to(device),
torch.zeros_like(pd_scores[..., 0]).to(device),
torch.full_like(pd_scores[..., 0], self.bg_idx),
torch.zeros_like(pd_bboxes),
torch.zeros_like(pd_scores),
torch.zeros_like(pd_scores[..., 0]),
torch.zeros_like(pd_scores[..., 0]),
)
try:
return self._forward(pd_scores, pd_bboxes, anc_points, gt_labels, gt_bboxes, mask_gt)
except torch.OutOfMemoryError:
# Move tensors to CPU, compute, then move back to original device
LOGGER.warning("WARNING: CUDA OutOfMemoryError in TaskAlignedAssigner, using CPU")
cpu_tensors = [t.cpu() for t in (pd_scores, pd_bboxes, anc_points, gt_labels, gt_bboxes, mask_gt)]
result = self._forward(*cpu_tensors)
return tuple(t.to(device) for t in result)
def _forward(self, pd_scores, pd_bboxes, anc_points, gt_labels, gt_bboxes, mask_gt):
"""
Compute the task-aligned assignment. Reference code is available at
https://github.com/Nioolek/PPYOLOE_pytorch/blob/master/ppyoloe/assigner/tal_assigner.py.
Args:
pd_scores (Tensor): shape(bs, num_total_anchors, num_classes)
pd_bboxes (Tensor): shape(bs, num_total_anchors, 4)
anc_points (Tensor): shape(num_total_anchors, 2)
gt_labels (Tensor): shape(bs, n_max_boxes, 1)
gt_bboxes (Tensor): shape(bs, n_max_boxes, 4)
mask_gt (Tensor): shape(bs, n_max_boxes, 1)
Returns:
target_labels (Tensor): shape(bs, num_total_anchors)
target_bboxes (Tensor): shape(bs, num_total_anchors, 4)
target_scores (Tensor): shape(bs, num_total_anchors, num_classes)
fg_mask (Tensor): shape(bs, num_total_anchors)
target_gt_idx (Tensor): shape(bs, num_total_anchors)
"""
mask_pos, align_metric, overlaps = self.get_pos_mask(
pd_scores, pd_bboxes, gt_labels, gt_bboxes, anc_points, mask_gt
)
@ -306,7 +336,7 @@ def make_anchors(feats, strides, grid_cell_offset=0.5):
assert feats is not None
dtype, device = feats[0].dtype, feats[0].device
for i, stride in enumerate(strides):
_, _, h, w = feats[i].shape
h, w = feats[i].shape[2:] if isinstance(feats, list) else (int(feats[i][0]), int(feats[i][1]))
sx = torch.arange(end=w, device=device, dtype=dtype) + grid_cell_offset # shift x
sy = torch.arange(end=h, device=device, dtype=dtype) + grid_cell_offset # shift y
sy, sx = torch.meshgrid(sy, sx, indexing="ij") if TORCH_1_10 else torch.meshgrid(sy, sx)

@ -623,7 +623,7 @@ def convert_optimizer_state_dict_to_fp16(state_dict):
return state_dict
def profile(input, ops, n=10, device=None):
def profile(input, ops, n=10, device=None, max_num_obj=0):
"""
Ultralytics speed, memory and FLOPs profiler.
@ -671,6 +671,14 @@ def profile(input, ops, n=10, device=None):
t[2] = float("nan")
tf += (t[1] - t[0]) * 1000 / n # ms per op forward
tb += (t[2] - t[1]) * 1000 / n # ms per op backward
if max_num_obj: # simulate training with predictions per image grid (for AutoBatch)
torch.randn(
x.shape[0],
max_num_obj,
int(sum([(x.shape[-1] / s) * (x.shape[-2] / s) for s in m.stride.tolist()])),
device=device,
dtype=torch.float32,
)
mem = torch.cuda.memory_reserved() / 1e9 if torch.cuda.is_available() else 0 # (GB)
s_in, s_out = (tuple(x.shape) if isinstance(x, torch.Tensor) else "list" for x in (x, y)) # shapes
p = sum(x.numel() for x in m.parameters()) if isinstance(m, nn.Module) else 0 # parameters
@ -729,3 +737,48 @@ class EarlyStopping:
f"i.e. `patience=300` or use `patience=0` to disable EarlyStopping."
)
return stop
class FXModel(nn.Module):
"""
A custom model class for torch.fx compatibility.
This class extends `torch.nn.Module` and is designed to ensure compatibility with torch.fx for tracing and graph manipulation.
It copies attributes from an existing model and explicitly sets the model attribute to ensure proper copying.
Args:
model (torch.nn.Module): The original model to wrap for torch.fx compatibility.
"""
def __init__(self, model):
"""
Initialize the FXModel.
Args:
model (torch.nn.Module): The original model to wrap for torch.fx compatibility.
"""
super().__init__()
copy_attr(self, model)
# Explicitly set `model` since `copy_attr` somehow does not copy it.
self.model = model.model
def forward(self, x):
"""
Forward pass through the model.
This method performs the forward pass through the model, handling the dependencies between layers and saving intermediate outputs.
Args:
x (torch.Tensor): The input tensor to the model.
Returns:
(torch.Tensor): The output tensor from the model.
"""
y = [] # outputs
for m in self.model:
if m.f != -1: # if not from previous layer
# from earlier layers
x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f]
x = m(x) # run
y.append(x) # save output
return x

Loading…
Cancel
Save