mct-2.1.1
Laughing-q 4 weeks ago
commit 0a0129cd7d
  1. 3
      .github/workflows/ci.yaml
  2. 7
      .github/workflows/docker.yaml
  3. 4
      .github/workflows/stale.yml
  4. 3
      README.md
  5. 3
      README.zh-CN.md
  6. 6
      docker/Dockerfile
  7. 29
      docs/en/datasets/detect/open-images-v7.md
  8. 8
      docs/en/datasets/index.md
  9. 249
      docs/en/guides/nvidia-jetson.md
  10. 1
      docs/en/macros/export-args.md
  11. 2
      docs/en/macros/predict-args.md
  12. 2
      docs/en/macros/validation-args.md
  13. 2
      docs/en/models/index.md
  14. 28
      docs/en/models/sam-2.md
  15. 6
      docs/en/models/yolo11.md
  16. 6
      docs/en/models/yolov5.md
  17. 4
      docs/en/models/yolov8.md
  18. 4
      docs/en/modes/export.md
  19. 2
      docs/en/modes/predict.md
  20. 6
      docs/en/tasks/obb.md
  21. 17
      examples/heatmaps.ipynb
  22. 22
      examples/object_counting.ipynb
  23. 2
      examples/tutorial.ipynb
  24. 2
      mkdocs.yml
  25. 3
      pyproject.toml
  26. 2
      tests/test_cuda.py
  27. 2
      ultralytics/__init__.py
  28. 6
      ultralytics/cfg/__init__.py
  29. 15
      ultralytics/cfg/solutions/default.yaml
  30. 3
      ultralytics/data/converter.py
  31. 33
      ultralytics/engine/exporter.py
  32. 8
      ultralytics/models/sam/build.py
  33. 147
      ultralytics/models/sam/modules/sam.py
  34. 2
      ultralytics/models/sam/predict.py
  35. 5
      ultralytics/nn/autobackend.py
  36. 1
      ultralytics/solutions/parking_management.py
  37. 18
      ultralytics/solutions/solutions.py
  38. 14
      ultralytics/utils/__init__.py
  39. 43
      ultralytics/utils/callbacks/comet.py
  40. 24
      ultralytics/utils/callbacks/wb.py
  41. 10
      ultralytics/utils/checks.py

@ -39,7 +39,8 @@ on:
jobs:
HUB:
if: github.repository == 'ultralytics/ultralytics' && (github.event_name == 'schedule' || github.event_name == 'push' || (github.event_name == 'workflow_dispatch' && github.event.inputs.hub == 'true'))
# if: github.repository == 'ultralytics/ultralytics' && (github.event_name == 'schedule' || github.event_name == 'push' || (github.event_name == 'workflow_dispatch' && github.event.inputs.hub == 'true'))
if: github.repository == 'ultralytics/ultralytics' && 'workflow_dispatch' && github.event.inputs.hub == 'true'
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false

@ -84,11 +84,8 @@ jobs:
outputs:
new_release: ${{ steps.check_tag.outputs.new_release }}
steps:
- name: Cleanup disk
# Free up to 30GB of disk space per https://github.com/ultralytics/ultralytics/pull/15848
uses: jlumbroso/free-disk-space@v1.3.1
with:
tool-cache: true
- name: Cleanup disk space
uses: ultralytics/actions/cleanup-disk@main
- name: Checkout repo
uses: actions/checkout@v4

@ -5,6 +5,10 @@ on:
schedule:
- cron: "0 0 * * *" # Runs at 00:00 UTC every day
permissions:
pull-requests: write
issues: write
jobs:
stale:
runs-on: ubuntu-latest

@ -17,6 +17,7 @@
<a href="https://console.paperspace.com/github/ultralytics/ultralytics"><img src="https://assets.paperspace.io/img/gradient-badge.svg" alt="Run Ultralytics on Gradient"></a>
<a href="https://colab.research.google.com/github/ultralytics/ultralytics/blob/main/examples/tutorial.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open Ultralytics In Colab"></a>
<a href="https://www.kaggle.com/models/ultralytics/yolo11"><img src="https://kaggle.com/static/images/open-in-kaggle.svg" alt="Open Ultralytics In Kaggle"></a>
<a href="https://mybinder.org/v2/gh/ultralytics/ultralytics/HEAD?labpath=examples%2Ftutorial.ipynb"><img src="https://mybinder.org/badge_logo.svg" alt="Open Ultralytics In Binder"></a>
</div>
<br>
@ -26,7 +27,7 @@ We hope that the resources here will help you get the most out of YOLO. Please b
To request an Enterprise License please complete the form at [Ultralytics Licensing](https://www.ultralytics.com/license).
<img width="100%" src="https://github.com/user-attachments/assets/a311a4ed-bbf2-43b5-8012-5f183a28a845" alt="YOLO11 performance plots"></a>
<img width="100%" src="https://raw.githubusercontent.com/ultralytics/assets/refs/heads/main/yolo/performance-comparison.png" alt="YOLO11 performance plots"></a>
<div align="center">
<a href="https://github.com/ultralytics"><img src="https://github.com/ultralytics/assets/raw/main/social/logo-social-github.png" width="2%" alt="Ultralytics GitHub"></a>

@ -17,6 +17,7 @@
<a href="https://console.paperspace.com/github/ultralytics/ultralytics"><img src="https://assets.paperspace.io/img/gradient-badge.svg" alt="Run Ultralytics on Gradient"></a>
<a href="https://colab.research.google.com/github/ultralytics/ultralytics/blob/main/examples/tutorial.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open Ultralytics In Colab"></a>
<a href="https://www.kaggle.com/models/ultralytics/yolo11"><img src="https://kaggle.com/static/images/open-in-kaggle.svg" alt="Open Ultralytics In Kaggle"></a>
<a href="https://mybinder.org/v2/gh/ultralytics/ultralytics/HEAD?labpath=examples%2Ftutorial.ipynb"><img src="https://mybinder.org/badge_logo.svg" alt="Open Ultralytics In Binder"></a>
</div>
<br>
@ -26,7 +27,7 @@
想申请企业许可证,请完成 [Ultralytics Licensing](https://www.ultralytics.com/license) 上的表单。
<img width="100%" src="https://github.com/user-attachments/assets/a311a4ed-bbf2-43b5-8012-5f183a28a845" alt="YOLO11 performance plots"></a>
<img width="100%" src="https://raw.githubusercontent.com/ultralytics/assets/refs/heads/main/yolo/performance-comparison.png" alt="YOLO11 performance plots"></a>
<div align="center">
<a href="https://github.com/ultralytics"><img src="https://github.com/ultralytics/assets/raw/main/social/logo-social-github.png" width="2%" alt="Ultralytics GitHub"></a>

@ -3,7 +3,7 @@
# Image is CUDA-optimized for YOLO11 single/multi-GPU training and inference
# Start FROM PyTorch image https://hub.docker.com/r/pytorch/pytorch or nvcr.io/nvidia/pytorch:23.03-py3
FROM pytorch/pytorch:2.4.1-cuda12.1-cudnn9-runtime
FROM pytorch/pytorch:2.5.0-cuda12.4-cudnn9-runtime
# Set environment variables
# Avoid DDP error "MKL_THREADING_LAYER=INTEL is incompatible with libgomp.so.1 library" https://github.com/pytorch/pytorch/issues/37377
@ -41,8 +41,8 @@ ADD https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n.pt .
# Install pip packages
RUN python3 -m pip install --upgrade pip wheel
# Pin TensorRT-cu12==10.1.0 to avoid 10.2.0 bug https://github.com/ultralytics/ultralytics/pull/14239 (note -cu12 must be used)
RUN pip install -e ".[export]" "tensorrt-cu12==10.1.0" "albumentations>=1.4.6" comet pycocotools
# Note -cu12 must be used with tensorrt)
RUN pip install -e ".[export]" tensorrt-cu12 "albumentations>=1.4.6" comet pycocotools
# Run exports to AutoInstall packages
# Edge TPU export fails the first time so is run twice here

@ -29,6 +29,35 @@ keywords: Open Images V7, Google dataset, computer vision, YOLO11 models, object
| [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8l-oiv7.pt) | 640 | 34.9 | 596.9 | 2.43 | 44.1 | 167.4 |
| [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8x-oiv7.pt) | 640 | 36.3 | 860.6 | 3.56 | 68.7 | 260.6 |
You can use these pretrained for inference or fine-tuning as follows.
!!! example "Pretrained Model Usage Example"
=== "Python"
```python
from ultralytics import YOLO
# Load an Open Images Dataset V7 pretrained YOLOv8n model
model = YOLO("yolov8n-oiv7.pt")
# Run prediction
results = model.predict(source="image.jpg")
# Start training from the pretrained checkpoint
results = model.train(data="coco8.yaml", epochs=100, imgsz=640)
```
=== "CLI"
```bash
# Predict using an Open Images Dataset V7 pretrained model
yolo detect predict source=image.jpg model=yolov8n-oiv7.pt
# Start training from an Open Images Dataset V7 pretrained checkpoint
yolo detect train data=coco8.yaml model=yolov8n-oiv7.pt epochs=100 imgsz=640
```
![Open Images V7 classes visual](https://github.com/ultralytics/docs/releases/download/0/open-images-v7-classes-visual.avif)
## Key Features

@ -19,7 +19,11 @@ Ultralytics provides support for various datasets to facilitate computer vision
<strong>Watch:</strong> Ultralytics Datasets Overview
</p>
## Ultralytics Explorer 🚀 NEW
## Ultralytics Explorer
!!! warning "Community Note ⚠"
As of **`ultralytics>=8.3.10`**, Ultralytics explorer support has been deprecated. But don't worry! You can now access similar and even enhanced functionality through [Ultralytics HUB](https://hub.ultralytics.com/), our intuitive no-code platform designed to streamline your workflow. With Ultralytics HUB, you can continue exploring, visualizing, and managing your data effortlessly, all without writing a single line of code. Make sure to check it out and take advantage of its powerful features!🚀
Create [embeddings](https://www.ultralytics.com/glossary/embeddings) for your dataset, search for similar images, run SQL queries, perform semantic search and even search using natural language! You can get started with our GUI app or build your own using the API. Learn more [here](explorer/index.md).
@ -46,7 +50,7 @@ Create [embeddings](https://www.ultralytics.com/glossary/embeddings) for your da
- [VisDrone](detect/visdrone.md): A dataset containing object detection and multi-object tracking data from drone-captured imagery with over 10K images and video sequences.
- [VOC](detect/voc.md): The Pascal Visual Object Classes (VOC) dataset for object detection and segmentation with 20 object classes and over 11K images.
- [xView](detect/xview.md): A dataset for object detection in overhead imagery with 60 object categories and over 1 million annotated objects.
- [Roboflow 100](detect/roboflow-100.md): A diverse object detection benchmark with 100 datasets spanning seven imagery domains for comprehensive model evaluation.
- [RF100](detect/roboflow-100.md): A diverse object detection benchmark with 100 datasets spanning seven imagery domains for comprehensive model evaluation.
- [Brain-tumor](detect/brain-tumor.md): A dataset for detecting brain tumors includes MRI or CT scan images with details on tumor presence, location, and characteristics.
- [African-wildlife](detect/african-wildlife.md): A dataset featuring images of African wildlife, including buffalo, elephant, rhino, and zebras.
- [Signature](detect/signature.md): A dataset featuring images of various documents with annotated signatures, supporting document verification and fraud detection research.

@ -1,12 +1,12 @@
---
comments: true
description: Learn to deploy Ultralytics YOLOv8 on NVIDIA Jetson devices with our detailed guide. Explore performance benchmarks and maximize AI capabilities.
keywords: Ultralytics, YOLOv8, NVIDIA Jetson, JetPack, AI deployment, performance benchmarks, embedded systems, deep learning, TensorRT, computer vision
description: Learn to deploy Ultralytics YOLO11 on NVIDIA Jetson devices with our detailed guide. Explore performance benchmarks and maximize AI capabilities.
keywords: Ultralytics, YOLO11, NVIDIA Jetson, JetPack, AI deployment, performance benchmarks, embedded systems, deep learning, TensorRT, computer vision
---
# Quick Start Guide: NVIDIA Jetson with Ultralytics YOLOv8
# Quick Start Guide: NVIDIA Jetson with Ultralytics YOLO11
This comprehensive guide provides a detailed walkthrough for deploying Ultralytics YOLOv8 on [NVIDIA Jetson](https://www.nvidia.com/en-us/autonomous-machines/embedded-systems/) devices. Additionally, it showcases performance benchmarks to demonstrate the capabilities of YOLOv8 on these small and powerful devices.
This comprehensive guide provides a detailed walkthrough for deploying Ultralytics YOLO11 on [NVIDIA Jetson](https://www.nvidia.com/en-us/autonomous-machines/embedded-systems/) devices. Additionally, it showcases performance benchmarks to demonstrate the capabilities of YOLO11 on these small and powerful devices.
<p align="center">
<br>
@ -16,7 +16,7 @@ This comprehensive guide provides a detailed walkthrough for deploying Ultralyti
allowfullscreen>
</iframe>
<br>
<strong>Watch:</strong> How to Setup NVIDIA Jetson with Ultralytics YOLOv8
<strong>Watch:</strong> How to Setup NVIDIA Jetson with Ultralytics YOLO11
</p>
<img width="1024" src="https://github.com/ultralytics/docs/releases/download/0/nvidia-jetson-ecosystem.avif" alt="NVIDIA Jetson Ecosystem">
@ -77,7 +77,7 @@ The below table highlights NVIDIA JetPack versions supported by different NVIDIA
## Quick Start with Docker
The fastest way to get started with Ultralytics YOLOv8 on NVIDIA Jetson is to run with pre-built docker images for Jetson. Refer to the table above and choose the JetPack version according to the Jetson device you own.
The fastest way to get started with Ultralytics YOLO11 on NVIDIA Jetson is to run with pre-built docker images for Jetson. Refer to the table above and choose the JetPack version according to the Jetson device you own.
=== "JetPack 4"
@ -240,9 +240,9 @@ pip install onnxruntime_gpu-1.17.0-cp38-cp38-linux_aarch64.whl
Out of all the model export formats supported by Ultralytics, TensorRT delivers the best inference performance when working with NVIDIA Jetson devices and our recommendation is to use TensorRT with Jetson. We also have a detailed document on TensorRT [here](../integrations/tensorrt.md).
## Convert Model to TensorRT and Run Inference
### Convert Model to TensorRT and Run Inference
The YOLOv8n model in PyTorch format is converted to TensorRT to run inference with the exported model.
The YOLO11n model in PyTorch format is converted to TensorRT to run inference with the exported model.
!!! example
@ -251,14 +251,14 @@ The YOLOv8n model in PyTorch format is converted to TensorRT to run inference wi
```python
from ultralytics import YOLO
# Load a YOLOv8n PyTorch model
model = YOLO("yolov8n.pt")
# Load a YOLO11n PyTorch model
model = YOLO("yolo11n.pt")
# Export the model
model.export(format="engine") # creates 'yolov8n.engine'
# Export the model to TensorRT
model.export(format="engine") # creates 'yolo11n.engine'
# Load the exported TensorRT model
trt_model = YOLO("yolov8n.engine")
trt_model = YOLO("yolo11n.engine")
# Run inference
results = trt_model("https://ultralytics.com/images/bus.jpg")
@ -267,119 +267,160 @@ The YOLOv8n model in PyTorch format is converted to TensorRT to run inference wi
=== "CLI"
```bash
# Export a YOLOv8n PyTorch model to TensorRT format
yolo export model=yolov8n.pt format=engine # creates 'yolov8n.engine'
# Export a YOLO11n PyTorch model to TensorRT format
yolo export model=yolo11n.pt format=engine # creates 'yolo11n.engine'
# Run inference with the exported model
yolo predict model=yolov8n.engine source='https://ultralytics.com/images/bus.jpg'
yolo predict model=yolo11n.engine source='https://ultralytics.com/images/bus.jpg'
```
### Use NVIDIA Deep Learning Accelerator (DLA)
[NVIDIA Deep Learning Accelerator (DLA)](https://developer.nvidia.com/deep-learning-accelerator) is a specialized hardware component built into NVIDIA Jetson devices that optimizes deep learning inference for energy efficiency and performance. By offloading tasks from the GPU (freeing it up for more intensive processes), DLA enables models to run with lower power consumption while maintaining high throughput, ideal for embedded systems and real-time AI applications.
The following Jetson devices are equipped with DLA hardware:
- Jetson Orin NX 16GB
- Jetson AGX Orin Series
- Jetson AGX Xavier Series
- Jetson Xavier NX Series
!!! example
=== "Python"
```python
from ultralytics import YOLO
# Load a YOLO11n PyTorch model
model = YOLO("yolo11n.pt")
# Export the model to TensorRT with DLA enabled (only works with FP16 or INT8)
model.export(format="engine", device="dla:0", half=True) # dla:0 or dla:1 corresponds to the DLA cores
# Load the exported TensorRT model
trt_model = YOLO("yolo11n.engine")
# Run inference
results = trt_model("https://ultralytics.com/images/bus.jpg")
```
=== "CLI"
```bash
# Export a YOLO11n PyTorch model to TensorRT format with DLA enabled (only works with FP16 or INT8)
yolo export model=yolo11n.pt format=engine device="dla:0" half=True # dla:0 or dla:1 corresponds to the DLA cores
# Run inference with the exported model on the DLA
yolo predict model=yolo11n.engine source='https://ultralytics.com/images/bus.jpg'
```
!!! note
Visit the [Export page](../modes/export.md#arguments) to access additional arguments when exporting models to different model formats
## NVIDIA Jetson Orin YOLOv8 Benchmarks
## NVIDIA Jetson Orin YOLO11 Benchmarks
YOLOv8 benchmarks were run by the Ultralytics team on 10 different model formats measuring speed and [accuracy](https://www.ultralytics.com/glossary/accuracy): PyTorch, TorchScript, ONNX, OpenVINO, TensorRT, TF SavedModel, TF GraphDef, TF Lite, PaddlePaddle, NCNN. Benchmarks were run on Seeed Studio reComputer J4012 powered by Jetson Orin NX 16GB device at FP32 [precision](https://www.ultralytics.com/glossary/precision) with default input image size of 640.
YOLO11 benchmarks were run by the Ultralytics team on 10 different model formats measuring speed and [accuracy](https://www.ultralytics.com/glossary/accuracy): PyTorch, TorchScript, ONNX, OpenVINO, TensorRT, TF SavedModel, TF GraphDef, TF Lite, PaddlePaddle, NCNN. Benchmarks were run on Seeed Studio reComputer J4012 powered by Jetson Orin NX 16GB device at FP32 [precision](https://www.ultralytics.com/glossary/precision) with default input image size of 640.
### Comparison Chart
Even though all model exports are working with NVIDIA Jetson, we have only included **PyTorch, TorchScript, TensorRT** for the comparison chart below because, they make use of the GPU on the Jetson and are guaranteed to produce the best results. All the other exports only utilize the CPU and the performance is not as good as the above three. You can find benchmarks for all exports in the section after this chart.
<div style="text-align: center;">
<img width="800" src="https://github.com/ultralytics/docs/releases/download/0/nvidia-jetson-ecosystem-2.avif" alt="NVIDIA Jetson Ecosystem">
<img src="https://github.com/ultralytics/docs/releases/download/0/nvidia-jetson-benchmarks.avif" alt="NVIDIA Jetson Ecosystem">
</div>
### Detailed Comparison Table
The below table represents the benchmark results for five different models (YOLOv8n, YOLOv8s, YOLOv8m, YOLOv8l, YOLOv8x) across ten different formats (PyTorch, TorchScript, ONNX, OpenVINO, TensorRT, TF SavedModel, TF GraphDef, TF Lite, PaddlePaddle, NCNN), giving us the status, size, mAP50-95(B) metric, and inference time for each combination.
The below table represents the benchmark results for five different models (YOLO11n, YOLO11s, YOLO11m, YOLO11l, YOLO11x) across ten different formats (PyTorch, TorchScript, ONNX, OpenVINO, TensorRT, TF SavedModel, TF GraphDef, TF Lite, PaddlePaddle, NCNN), giving us the status, size, mAP50-95(B) metric, and inference time for each combination.
!!! performance
=== "YOLOv8n"
=== "YOLO11n"
| Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) |
|-----------------|--------|-------------------|-------------|------------------------|
| PyTorch | ✅ | 6.2 | 0.6381 | 14.3 |
| TorchScript | ✅ | 12.4 | 0.6117 | 13.3 |
| ONNX | ✅ | 12.2 | 0.6092 | 70.6 |
| OpenVINO | ✅ | 12.3 | 0.6092 | 104.2 |
| TensorRT (FP32) | ✅ | 16.1 | 0.6091 | 8.01 |
| TensorRT (FP16) | ✅ | 9.2 | 0.6093 | 4.55 |
| TensorRT (INT8) | ✅ | 5.9 | 0.2759 | 4.09 |
| TF SavedModel | ✅ | 30.6 | 0.6092 | 141.74 |
| TF GraphDef | ✅ | 12.3 | 0.6092 | 199.93 |
| TF Lite | ✅ | 12.3 | 0.6092 | 349.18 |
| PaddlePaddle | ✅ | 24.4 | 0.6030 | 555 |
| NCNN | ✅ | 12.2 | 0.6092 | 32 |
=== "YOLOv8s"
| PyTorch | ✅ | 5.4 | 0.6176 | 19.80 |
| TorchScript | ✅ | 10.5 | 0.6100 | 13.30 |
| ONNX | ✅ | 10.2 | 0.6082 | 67.92 |
| OpenVINO | ✅ | 10.4 | 0.6082 | 118.21 |
| TensorRT (FP32) | ✅ | 14.1 | 0.6100 | 7.94 |
| TensorRT (FP16) | ✅ | 8.3 | 0.6082 | 4.80 |
| TensorRT (INT8) | ✅ | 6.6 | 0.3256 | 4.17 |
| TF SavedModel | ✅ | 25.8 | 0.6082 | 185.88 |
| TF GraphDef | ✅ | 10.3 | 0.6082 | 256.66 |
| TF Lite | ✅ | 10.3 | 0.6082 | 284.64 |
| PaddlePaddle | ✅ | 20.4 | 0.6082 | 477.41 |
| NCNN | ✅ | 10.2 | 0.6106 | 32.18 |
=== "YOLO11s"
| Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) |
|-----------------|--------|-------------------|-------------|------------------------|
| PyTorch | ✅ | 21.5 | 0.6967 | 18 |
| TorchScript | ✅ | 43.0 | 0.7136 | 23.81 |
| ONNX | ✅ | 42.8 | 0.7136 | 185.55 |
| OpenVINO | ✅ | 42.9 | 0.7136 | 243.97 |
| TensorRT (FP32) | ✅ | 46.4 | 0.7136 | 14.01 |
| TensorRT (FP16) | ✅ | 24.2 | 0.722 | 7.16 |
| TensorRT (INT8) | ✅ | 13.7 | 0.4233 | 5.49 |
| TF SavedModel | ✅ | 107 | 0.7136 | 260.03 |
| TF GraphDef | ✅ | 42.8 | 0.7136 | 423.4 |
| TF Lite | ✅ | 42.8 | 0.7136 | 1046.64 |
| PaddlePaddle | ✅ | 85.5 | 0.7140 | 1464 |
| NCNN | ✅ | 42.7 | 0.7200 | 63 |
=== "YOLOv8m"
| PyTorch | ✅ | 18.4 | 0.7526 | 20.20 |
| TorchScript | ✅ | 36.5 | 0.7416 | 23.42 |
| ONNX | ✅ | 36.3 | 0.7416 | 162.01 |
| OpenVINO | ✅ | 36.4 | 0.7416 | 159.61 |
| TensorRT (FP32) | ✅ | 40.3 | 0.7416 | 13.93 |
| TensorRT (FP16) | ✅ | 21.7 | 0.7416 | 7.47 |
| TensorRT (INT8) | ✅ | 13.6 | 0.3179 | 5.66 |
| TF SavedModel | ✅ | 91.1 | 0.7416 | 316.46 |
| TF GraphDef | ✅ | 36.4 | 0.7416 | 506.71 |
| TF Lite | ✅ | 36.4 | 0.7416 | 842.97 |
| PaddlePaddle | ✅ | 72.5 | 0.7416 | 1172.57 |
| NCNN | ✅ | 36.2 | 0.7419 | 66.00 |
=== "YOLO11m"
| Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) |
|-----------------|--------|-------------------|-------------|------------------------|
| PyTorch | ✅ | 49.7 | 0.7370 | 36.4 |
| TorchScript | ✅ | 99.2 | 0.7285 | 53.58 |
| ONNX | ✅ | 99 | 0.7280 | 452.09 |
| OpenVINO | ✅ | 99.1 | 0.7280 | 544.36 |
| TensorRT (FP32) | ✅ | 102.4 | 0.7285 | 31.51 |
| TensorRT (FP16) | ✅ | 52.6 | 0.7324 | 14.88 |
| TensorRT (INT8) | ✅ | 28.6 | 0.3283 | 10.89 |
| TF SavedModel | ✅ | 247.5 | 0.7280 | 543.65 |
| TF GraphDef | ✅ | 99 | 0.7280 | 906.63 |
| TF Lite | ✅ | 99 | 0.7280 | 2758.08 |
| PaddlePaddle | ✅ | 197.9 | 0.7280 | 3678 |
| NCNN | ✅ | 98.9 | 0.7260 | 135 |
=== "YOLOv8l"
| PyTorch | ✅ | 38.8 | 0.7595 | 36.70 |
| TorchScript | ✅ | 77.3 | 0.7643 | 50.95 |
| ONNX | ✅ | 76.9 | 0.7643 | 416.34 |
| OpenVINO | ✅ | 77.1 | 0.7643 | 370.99 |
| TensorRT (FP32) | ✅ | 81.5 | 0.7640 | 30.49 |
| TensorRT (FP16) | ✅ | 42.2 | 0.7658 | 14.93 |
| TensorRT (INT8) | ✅ | 24.3 | 0.4118 | 10.32 |
| TF SavedModel | ✅ | 192.7 | 0.7643 | 597.08 |
| TF GraphDef | ✅ | 77.0 | 0.7643 | 1016.12 |
| TF Lite | ✅ | 77.0 | 0.7643 | 2494.60 |
| PaddlePaddle | ✅ | 153.8 | 0.7643 | 3218.99 |
| NCNN | ✅ | 76.8 | 0.7691 | 192.77 |
=== "YOLO11l"
| Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) |
|-----------------|--------|-------------------|-------------|------------------------|
| PyTorch | ✅ | 83.7 | 0.7768 | 61.3 |
| TorchScript | ✅ | 167.2 | 0.7554 | 87.9 |
| ONNX | ✅ | 166.8 | 0.7551 | 852.29 |
| OpenVINO | ✅ | 167 | 0.7551 | 1012.6 |
| TensorRT (FP32) | ✅ | 170.5 | 0.7554 | 49.79 |
| TensorRT (FP16) | ✅ | 86.1 | 0.7535 | 22.89 |
| TensorRT (INT8) | ✅ | 46.4 | 0.4048 | 14.61 |
| TF SavedModel | ✅ | 417.2 | 0.7551 | 990.45 |
| TF GraphDef | ✅ | 166.9 | 0.7551 | 1649.86 |
| TF Lite | ✅ | 166.9 | 0.7551 | 5652.37 |
| PaddlePaddle | ✅ | 333.6 | 0.7551 | 7114.67 |
| NCNN | ✅ | 166.8 | 0.7685 | 231.9 |
=== "YOLOv8x"
| PyTorch | ✅ | 49.0 | 0.7475 | 47.6 |
| TorchScript | ✅ | 97.6 | 0.7250 | 66.36 |
| ONNX | ✅ | 97.0 | 0.7250 | 532.58 |
| OpenVINO | ✅ | 97.3 | 0.7250 | 477.55 |
| TensorRT (FP32) | ✅ | 101.6 | 0.7250 | 38.71 |
| TensorRT (FP16) | ✅ | 52.6 | 0.7265 | 19.35 |
| TensorRT (INT8) | ✅ | 31.6 | 0.3856 | 13.50 |
| TF SavedModel | ✅ | 243.3 | 0.7250 | 895.24 |
| TF GraphDef | ✅ | 97.2 | 0.7250 | 1301.19 |
| TF Lite | ✅ | 97.2 | 0.7250 | 3202.93 |
| PaddlePaddle | ✅ | 193.9 | 0.7250 | 4206.98 |
| NCNN | ✅ | 96.9 | 0.7252 | 225.75 |
=== "YOLO11x"
| Format | Status | Size on disk (MB) | mAP50-95(B) | Inference time (ms/im) |
|-----------------|--------|-------------------|-------------|------------------------|
| PyTorch | ✅ | 130.5 | 0.7759 | 93 |
| TorchScript | ✅ | 260.7 | 0.7472 | 135.1 |
| ONNX | ✅ | 260.4 | 0.7479 | 1296.13 |
| OpenVINO | ✅ | 260.6 | 0.7479 | 1502.15 |
| TensorRT (FP32) | ✅ | 264.0 | 0.7469 | 80.01 |
| TensorRT (FP16) | ✅ | 133.3 | 0.7513 | 40.76 |
| TensorRT (INT8) | ✅ | 70.2 | 0.4277 | 22.08 |
| TF SavedModel | ✅ | 651.1 | 0.7479 | 1451.76 |
| TF GraphDef | ✅ | 260.5 | 0.7479 | 4029.36 |
| TF Lite | ✅ | 260.4 | 0.7479 | 8772.86 |
| PaddlePaddle | ✅ | 520.8 | 0.7479 | 10619.53 |
| NCNN | ✅ | 260.4 | 0.7646 | 376.38 |
| PyTorch | ✅ | 109.3 | 0.8288 | 85.60 |
| TorchScript | ✅ | 218.1 | 0.8308 | 121.67 |
| ONNX | ✅ | 217.5 | 0.8308 | 1073.14 |
| OpenVINO | ✅ | 217.8 | 0.8308 | 955.60 |
| TensorRT (FP32) | ✅ | 221.6 | 0.8307 | 75.84 |
| TensorRT (FP16) | ✅ | 113.1 | 0.8295 | 35.75 |
| TensorRT (INT8) | ✅ | 62.2 | 0.4783 | 22.23 |
| TF SavedModel | ✅ | 545.0 | 0.8308 | 1497.40 |
| TF GraphDef | ✅ | 217.8 | 0.8308 | 2552.42 |
| TF Lite | ✅ | 217.8 | 0.8308 | 7044.58 |
| PaddlePaddle | ✅ | 434.9 | 0.8308 | 8386.73 |
| NCNN | ✅ | 217.3 | 0.8304 | 486.36 |
[Explore more benchmarking efforts by Seeed Studio](https://www.seeedstudio.com/blog/2023/03/30/yolov8-performance-benchmarks-on-nvidia-jetson-devices) running on different versions of NVIDIA Jetson hardware.
@ -394,25 +435,25 @@ To reproduce the above Ultralytics benchmarks on all export [formats](../modes/e
```python
from ultralytics import YOLO
# Load a YOLOv8n PyTorch model
model = YOLO("yolov8n.pt")
# Load a YOLO11n PyTorch model
model = YOLO("yolo11n.pt")
# Benchmark YOLOv8n speed and accuracy on the COCO8 dataset for all all export formats
# Benchmark YOLO11n speed and accuracy on the COCO8 dataset for all all export formats
results = model.benchmarks(data="coco8.yaml", imgsz=640)
```
=== "CLI"
```bash
# Benchmark YOLOv8n speed and accuracy on the COCO8 dataset for all all export formats
yolo benchmark model=yolov8n.pt data=coco8.yaml imgsz=640
# Benchmark YOLO11n speed and accuracy on the COCO8 dataset for all all export formats
yolo benchmark model=yolo11n.pt data=coco8.yaml imgsz=640
```
Note that benchmarking results might vary based on the exact hardware and software configuration of a system, as well as the current workload of the system at the time the benchmarks are run. For the most reliable results use a dataset with a large number of images, i.e. `data='coco8.yaml' (4 val images), or `data='coco.yaml'` (5000 val images).
## Best Practices when using NVIDIA Jetson
When using NVIDIA Jetson, there are a couple of best practices to follow in order to enable maximum performance on the NVIDIA Jetson running YOLOv8.
When using NVIDIA Jetson, there are a couple of best practices to follow in order to enable maximum performance on the NVIDIA Jetson running YOLO11.
1. Enable MAX Power Mode
@ -445,29 +486,29 @@ When using NVIDIA Jetson, there are a couple of best practices to follow in orde
## Next Steps
Congratulations on successfully setting up YOLOv8 on your NVIDIA Jetson! For further learning and support, visit more guide at [Ultralytics YOLOv8 Docs](../index.md)!
Congratulations on successfully setting up YOLO11 on your NVIDIA Jetson! For further learning and support, visit more guide at [Ultralytics YOLO11 Docs](../index.md)!
## FAQ
### How do I deploy Ultralytics YOLOv8 on NVIDIA Jetson devices?
### How do I deploy Ultralytics YOLO11 on NVIDIA Jetson devices?
Deploying Ultralytics YOLOv8 on NVIDIA Jetson devices is a straightforward process. First, flash your Jetson device with the NVIDIA JetPack SDK. Then, either use a pre-built Docker image for quick setup or manually install the required packages. Detailed steps for each approach can be found in sections [Quick Start with Docker](#quick-start-with-docker) and [Start with Native Installation](#start-with-native-installation).
Deploying Ultralytics YOLO11 on NVIDIA Jetson devices is a straightforward process. First, flash your Jetson device with the NVIDIA JetPack SDK. Then, either use a pre-built Docker image for quick setup or manually install the required packages. Detailed steps for each approach can be found in sections [Quick Start with Docker](#quick-start-with-docker) and [Start with Native Installation](#start-with-native-installation).
### What performance benchmarks can I expect from YOLOv8 models on NVIDIA Jetson devices?
### What performance benchmarks can I expect from YOLO11 models on NVIDIA Jetson devices?
YOLOv8 models have been benchmarked on various NVIDIA Jetson devices showing significant performance improvements. For example, the TensorRT format delivers the best inference performance. The table in the [Detailed Comparison Table](#detailed-comparison-table) section provides a comprehensive view of performance metrics like mAP50-95 and inference time across different model formats.
YOLO11 models have been benchmarked on various NVIDIA Jetson devices showing significant performance improvements. For example, the TensorRT format delivers the best inference performance. The table in the [Detailed Comparison Table](#detailed-comparison-table) section provides a comprehensive view of performance metrics like mAP50-95 and inference time across different model formats.
### Why should I use TensorRT for deploying YOLOv8 on NVIDIA Jetson?
### Why should I use TensorRT for deploying YOLO11 on NVIDIA Jetson?
TensorRT is highly recommended for deploying YOLOv8 models on NVIDIA Jetson due to its optimal performance. It accelerates inference by leveraging the Jetson's GPU capabilities, ensuring maximum efficiency and speed. Learn more about how to convert to TensorRT and run inference in the [Use TensorRT on NVIDIA Jetson](#use-tensorrt-on-nvidia-jetson) section.
TensorRT is highly recommended for deploying YOLO11 models on NVIDIA Jetson due to its optimal performance. It accelerates inference by leveraging the Jetson's GPU capabilities, ensuring maximum efficiency and speed. Learn more about how to convert to TensorRT and run inference in the [Use TensorRT on NVIDIA Jetson](#use-tensorrt-on-nvidia-jetson) section.
### How can I install PyTorch and Torchvision on NVIDIA Jetson?
To install PyTorch and Torchvision on NVIDIA Jetson, first uninstall any existing versions that may have been installed via pip. Then, manually install the compatible PyTorch and Torchvision versions for the Jetson's ARM64 architecture. Detailed instructions for this process are provided in the [Install PyTorch and Torchvision](#install-pytorch-and-torchvision) section.
### What are the best practices for maximizing performance on NVIDIA Jetson when using YOLOv8?
### What are the best practices for maximizing performance on NVIDIA Jetson when using YOLO11?
To maximize performance on NVIDIA Jetson with YOLOv8, follow these best practices:
To maximize performance on NVIDIA Jetson with YOLO11, follow these best practices:
1. Enable MAX Power Mode to utilize all CPU and GPU cores.
2. Enable Jetson Clocks to run all cores at their maximum frequency.

@ -12,4 +12,5 @@
| `workspace` | `float` | `4.0` | Sets the maximum workspace size in GiB for TensorRT optimizations, balancing memory usage and performance. |
| `nms` | `bool` | `False` | Adds Non-Maximum Suppression (NMS) to the CoreML and MCT export, essential for accurate and efficient detection post-processing. |
| `batch` | `int` | `1` | Specifies export model batch inference size or the max number of images the exported model will process concurrently in `predict` mode. |
| `device` | `str` | `None` | Specifies the device for exporting: GPU (`device=0`), CPU (`device=cpu`), MPS for Apple silicon (`device=mps`) or DLA for NVIDIA Jetson (`device=dla:0` or `device=dla:1`). |
| `gptq` | `bool` | `False` | Enables GPTQ quantization for sony mct export. |

@ -15,3 +15,5 @@
| `classes` | `list[int]` | `None` | Filters predictions to a set of class IDs. Only detections belonging to the specified classes will be returned. Useful for focusing on relevant objects in multi-class detection tasks. |
| `retina_masks` | `bool` | `False` | Uses high-resolution segmentation masks if available in the model. This can enhance mask quality for segmentation tasks, providing finer detail. |
| `embed` | `list[int]` | `None` | Specifies the layers from which to extract feature vectors or [embeddings](https://www.ultralytics.com/glossary/embeddings). Useful for downstream tasks like clustering or similarity search. |
| `project` | `str` | `None` | Name of the project directory where prediction outputs are saved if `save` is enabled. |
| `name` | `str` | `None` | Name of the prediction run. Used for creating a subdirectory within the project folder, where prediction outputs are stored if `save` is enabled. |

@ -14,3 +14,5 @@
| `plots` | `bool` | `False` | When set to `True`, generates and saves plots of predictions versus ground truth for visual evaluation of the model's performance. |
| `rect` | `bool` | `False` | If `True`, uses rectangular inference for batching, reducing padding and potentially increasing speed and efficiency. |
| `split` | `str` | `val` | Determines the dataset split to use for validation (`val`, `test`, or `train`). Allows flexibility in choosing the data segment for performance evaluation. |
| `project` | `str` | `None` | Name of the project directory where validation outputs are saved. |
| `name` | `str` | `None` | Name of the validation run. Used for creating a subdirectory within the project folder, where valdiation logs and outputs are stored. |

@ -8,7 +8,7 @@ keywords: Ultralytics, supported models, YOLOv3, YOLOv4, YOLOv5, YOLOv6, YOLOv7,
Welcome to Ultralytics' model documentation! We offer support for a wide range of models, each tailored to specific tasks like [object detection](../tasks/detect.md), [instance segmentation](../tasks/segment.md), [image classification](../tasks/classify.md), [pose estimation](../tasks/pose.md), and [multi-object tracking](../modes/track.md). If you're interested in contributing your model architecture to Ultralytics, check out our [Contributing Guide](../help/contributing.md).
![Ultralytics YOLO11 Comparison Plots](https://github.com/user-attachments/assets/a311a4ed-bbf2-43b5-8012-5f183a28a845)
![Ultralytics YOLO11 Comparison Plots](https://raw.githubusercontent.com/ultralytics/assets/refs/heads/main/yolo/performance-comparison.png)
## Featured Models

@ -1,9 +1,13 @@
---
comments: true
description: Discover SAM 2, the next generation of Meta's Segment Anything Model, supporting real-time promptable segmentation in both images and videos with state-of-the-art performance. Learn about its key features, datasets, and how to use it.
keywords: SAM 2, Segment Anything, video segmentation, image segmentation, promptable segmentation, zero-shot performance, SA-V dataset, Ultralytics, real-time segmentation, AI, machine learning
keywords: SAM 2, SAM 2.1, Segment Anything, video segmentation, image segmentation, promptable segmentation, zero-shot performance, SA-V dataset, Ultralytics, real-time segmentation, AI, machine learning
---
!!! tip "SAM 2.1"
We have just supported the more accurate SAM2.1 model. Please give it a try!
# SAM 2: Segment Anything Model 2
SAM 2, the successor to Meta's [Segment Anything Model (SAM)](sam.md), is a cutting-edge tool designed for comprehensive object segmentation in both images and videos. It excels in handling complex visual data through a unified, promptable model architecture that supports real-time processing and zero-shot generalization.
@ -114,12 +118,16 @@ pip install ultralytics
The following table details the available SAM 2 models, their pre-trained weights, supported tasks, and compatibility with different operating modes like [Inference](../modes/predict.md), [Validation](../modes/val.md), [Training](../modes/train.md), and [Export](../modes/export.md).
| Model Type | Pre-trained Weights | Tasks Supported | Inference | Validation | Training | Export |
| ----------- | ------------------------------------------------------------------------------------- | -------------------------------------------- | --------- | ---------- | -------- | ------ |
| SAM 2 tiny | [sam2_t.pt](https://github.com/ultralytics/assets/releases/download/v8.2.0/sam2_t.pt) | [Instance Segmentation](../tasks/segment.md) | ✅ | ❌ | ❌ | ❌ |
| SAM 2 small | [sam2_s.pt](https://github.com/ultralytics/assets/releases/download/v8.2.0/sam2_s.pt) | [Instance Segmentation](../tasks/segment.md) | ✅ | ❌ | ❌ | ❌ |
| SAM 2 base | [sam2_b.pt](https://github.com/ultralytics/assets/releases/download/v8.2.0/sam2_b.pt) | [Instance Segmentation](../tasks/segment.md) | ✅ | ❌ | ❌ | ❌ |
| SAM 2 large | [sam2_l.pt](https://github.com/ultralytics/assets/releases/download/v8.2.0/sam2_l.pt) | [Instance Segmentation](../tasks/segment.md) | ✅ | ❌ | ❌ | ❌ |
| Model Type | Pre-trained Weights | Tasks Supported | Inference | Validation | Training | Export |
| ------------- | ----------------------------------------------------------------------------------------- | -------------------------------------------- | --------- | ---------- | -------- | ------ |
| SAM 2 tiny | [sam2_t.pt](https://github.com/ultralytics/assets/releases/download/v8.3.0/sam2_t.pt) | [Instance Segmentation](../tasks/segment.md) | ✅ | ❌ | ❌ | ❌ |
| SAM 2 small | [sam2_s.pt](https://github.com/ultralytics/assets/releases/download/v8.3.0/sam2_s.pt) | [Instance Segmentation](../tasks/segment.md) | ✅ | ❌ | ❌ | ❌ |
| SAM 2 base | [sam2_b.pt](https://github.com/ultralytics/assets/releases/download/v8.3.0/sam2_b.pt) | [Instance Segmentation](../tasks/segment.md) | ✅ | ❌ | ❌ | ❌ |
| SAM 2 large | [sam2_l.pt](https://github.com/ultralytics/assets/releases/download/v8.3.0/sam2_l.pt) | [Instance Segmentation](../tasks/segment.md) | ✅ | ❌ | ❌ | ❌ |
| SAM 2.1 tiny | [sam2.1_t.pt](https://github.com/ultralytics/assets/releases/download/v8.3.0/sam2.1_t.pt) | [Instance Segmentation](../tasks/segment.md) | ✅ | ❌ | ❌ | ❌ |
| SAM 2.1 small | [sam2.1_s.pt](https://github.com/ultralytics/assets/releases/download/v8.3.0/sam2.1_s.pt) | [Instance Segmentation](../tasks/segment.md) | ✅ | ❌ | ❌ | ❌ |
| SAM 2.1 base | [sam2.1_b.pt](https://github.com/ultralytics/assets/releases/download/v8.3.0/sam2.1_b.pt) | [Instance Segmentation](../tasks/segment.md) | ✅ | ❌ | ❌ | ❌ |
| SAM 2.1 large | [sam2.1_l.pt](https://github.com/ultralytics/assets/releases/download/v8.3.0/sam2.1_l.pt) | [Instance Segmentation](../tasks/segment.md) | ✅ | ❌ | ❌ | ❌ |
### SAM 2 Prediction Examples
@ -137,7 +145,7 @@ SAM 2 can be utilized across a broad spectrum of tasks, including real-time vide
from ultralytics import SAM
# Load a model
model = SAM("sam2_b.pt")
model = SAM("sam2.1_b.pt")
# Display model information (optional)
model.info()
@ -170,7 +178,7 @@ SAM 2 can be utilized across a broad spectrum of tasks, including real-time vide
from ultralytics import SAM
# Load a model
model = SAM("sam2_b.pt")
model = SAM("sam2.1_b.pt")
# Display model information (optional)
model.info()
@ -183,7 +191,7 @@ SAM 2 can be utilized across a broad spectrum of tasks, including real-time vide
```bash
# Run inference with a SAM 2 model
yolo predict model=sam2_b.pt source=path/to/video.mp4
yolo predict model=sam2.1_b.pt source=path/to/video.mp4
```
- This example demonstrates how SAM 2 can be used to segment the entire content of an image or video if no prompts (bboxes/points/masks) are provided.

@ -8,9 +8,13 @@ keywords: YOLO11, state-of-the-art object detection, YOLO series, Ultralytics, c
## Overview
!!! tip "Ultralytics YOLO11 Publication"
Ultralytics has not published a formal research paper for YOLO11 due to the rapidly evolving nature of the models. We focus on advancing the technology and making it easier to use, rather than producing static documentation. For the most up-to-date information on YOLO architecture, features, and usage, please refer to our [GitHub repository](https://github.com/ultralytics/ultralytics) and [documentation](https://docs.ultralytics.com).
YOLO11 is the latest iteration in the [Ultralytics](https://www.ultralytics.com/) YOLO series of real-time object detectors, redefining what's possible with cutting-edge [accuracy](https://www.ultralytics.com/glossary/accuracy), speed, and efficiency. Building upon the impressive advancements of previous YOLO versions, YOLO11 introduces significant improvements in architecture and training methods, making it a versatile choice for a wide range of [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) tasks.
![Ultralytics YOLO11 Comparison Plots](https://github.com/user-attachments/assets/a311a4ed-bbf2-43b5-8012-5f183a28a845)
![Ultralytics YOLO11 Comparison Plots](https://raw.githubusercontent.com/ultralytics/assets/refs/heads/main/yolo/performance-comparison.png)
<p align="center">
<br>

@ -4,7 +4,11 @@ description: Explore YOLOv5u, an advanced object detection model with optimized
keywords: YOLOv5, YOLOv5u, object detection, Ultralytics, anchor-free, pre-trained models, accuracy, speed, real-time detection
---
# YOLOv5
# Ultralytics YOLOv5
!!! tip "Ultralytics YOLOv5 Publication"
Ultralytics has not published a formal research paper for YOLOv5 due to the rapidly evolving nature of the models. We focus on advancing the technology and making it easier to use, rather than producing static documentation. For the most up-to-date information on YOLO architecture, features, and usage, please refer to our [GitHub repository](https://github.com/ultralytics/ultralytics) and [documentation](https://docs.ultralytics.com).
## Overview

@ -6,6 +6,10 @@ keywords: YOLOv8, real-time object detection, YOLO series, Ultralytics, computer
# Ultralytics YOLOv8
!!! tip "Ultralytics YOLOv8 Publication"
Ultralytics has not published a formal research paper for YOLOv8 due to the rapidly evolving nature of the models. We focus on advancing the technology and making it easier to use, rather than producing static documentation. For the most up-to-date information on YOLO architecture, features, and usage, please refer to our [GitHub repository](https://github.com/ultralytics/ultralytics) and [documentation](https://docs.ultralytics.com).
## Overview
YOLOv8 is the latest iteration in the YOLO series of real-time object detectors, offering cutting-edge performance in terms of accuracy and speed. Building upon the advancements of previous YOLO versions, YOLOv8 introduces new features and optimizations that make it an ideal choice for various [object detection](https://www.ultralytics.com/glossary/object-detection) tasks in a wide range of applications.

@ -136,13 +136,13 @@ INT8 quantization is an excellent way to compress the model and speed up inferen
from ultralytics import YOLO
model = YOLO("yolo11n.pt") # Load a model
model.export(format="onnx", int8=True)
model.export(format="engine", int8=True)
```
=== "CLI"
```bash
yolo export model=yolo11n.pt format=onnx int8=True # export model with INT8 quantization
yolo export model=yolo11n.pt format=engine int8=True # export TensorRT model with INT8 quantization
```
INT8 quantization can be applied to various formats, such as TensorRT and CoreML. More details can be found in the [Export section](../modes/export.md).

@ -665,7 +665,7 @@ For more details see the [`Probs` class documentation](../reference/engine/resul
model = YOLO("yolo11n-obb.pt")
# Run inference on an image
results = model("bus.jpg") # results list
results = model("boats.jpg") # results list
# View results
for r in results:

@ -141,14 +141,14 @@ Use a trained YOLO11n-obb model to run predictions on images.
model = YOLO("path/to/best.pt") # load a custom model
# Predict with the model
results = model("https://ultralytics.com/images/bus.jpg") # predict on an image
results = model("https://ultralytics.com/images/boats.jpg") # predict on an image
```
=== "CLI"
```bash
yolo obb predict model=yolo11n-obb.pt source='https://ultralytics.com/images/bus.jpg' # predict with official model
yolo obb predict model=path/to/best.pt source='https://ultralytics.com/images/bus.jpg' # predict with custom model
yolo obb predict model=yolo11n-obb.pt source='https://ultralytics.com/images/boats.jpg' # predict with official model
yolo obb predict model=path/to/best.pt source='https://ultralytics.com/images/boats.jpg' # predict with custom model
```
<p align="center">

@ -96,10 +96,7 @@
"source": [
"import cv2\n",
"\n",
"from ultralytics import YOLO, solutions\n",
"\n",
"# Load YOLO model\n",
"model = YOLO(\"yolo11n.pt\")\n",
"from ultralytics import solutions\n",
"\n",
"# Open video file\n",
"cap = cv2.VideoCapture(\"path/to/video/file.mp4\")\n",
@ -113,10 +110,9 @@
"\n",
"# Initialize heatmap object\n",
"heatmap_obj = solutions.Heatmap(\n",
" colormap=cv2.COLORMAP_PARULA,\n",
" view_img=True,\n",
" shape=\"circle\",\n",
" names=model.names,\n",
" colormap=cv2.COLORMAP_PARULA, # Color of the heatmap\n",
" show=True, # Display the image during processing\n",
" model=yolo11n.pt, # Ultralytics YOLO11 model file\n",
")\n",
"\n",
"while cap.isOpened():\n",
@ -125,11 +121,8 @@
" print(\"Video frame is empty or video processing has been successfully completed.\")\n",
" break\n",
"\n",
" # Perform tracking on the current frame\n",
" tracks = model.track(im0, persist=True, show=False)\n",
"\n",
" # Generate heatmap on the frame\n",
" im0 = heatmap_obj.generate_heatmap(im0, tracks)\n",
" im0 = heatmap_obj.generate_heatmap(im0)\n",
"\n",
" # Write the frame to the output video\n",
" video_writer.write(im0)\n",

@ -104,10 +104,7 @@
"source": [
"import cv2\n",
"\n",
"from ultralytics import YOLO, solutions\n",
"\n",
"# Load the pre-trained YOLO11 model\n",
"model = YOLO(\"yolo11n.pt\")\n",
"from ultralytics import solutions\n",
"\n",
"# Open the video file\n",
"cap = cv2.VideoCapture(\"path/to/video/file.mp4\")\n",
@ -119,19 +116,15 @@
"# Define points for a line or region of interest in the video frame\n",
"line_points = [(20, 400), (1080, 400)] # Line coordinates\n",
"\n",
"# Specify classes to count, for example: person (0) and car (2)\n",
"classes_to_count = [0, 2] # Class IDs for person and car\n",
"\n",
"# Initialize the video writer to save the output video\n",
"video_writer = cv2.VideoWriter(\"object_counting_output.avi\", cv2.VideoWriter_fourcc(*\"mp4v\"), fps, (w, h))\n",
"\n",
"# Initialize the Object Counter with visualization options and other parameters\n",
"counter = solutions.ObjectCounter(\n",
" view_img=True, # Display the image during processing\n",
" reg_pts=line_points, # Region of interest points\n",
" names=model.names, # Class names from the YOLO model\n",
" draw_tracks=True, # Draw tracking lines for objects\n",
" line_thickness=2, # Thickness of the lines drawn\n",
" show=True, # Display the image during processing\n",
" region=line_points, # Region of interest points\n",
" model=yolo11n.pt, # Ultralytics YOLO11 model file\n",
" line_width=2, # Thickness of the lines and bounding boxes\n",
")\n",
"\n",
"# Process video frames in a loop\n",
@ -141,11 +134,8 @@
" print(\"Video frame is empty or video processing has been successfully completed.\")\n",
" break\n",
"\n",
" # Perform object tracking on the current frame, filtering by specified classes\n",
" tracks = model.track(im0, persist=True, show=False, classes=classes_to_count)\n",
"\n",
" # Use the Object Counter to count objects in the frame and get the annotated image\n",
" im0 = counter.start_counting(im0, tracks)\n",
" im0 = counter.count(im0)\n",
"\n",
" # Write the annotated frame to the output video\n",
" video_writer.write(im0)\n",

@ -583,7 +583,7 @@
"\n",
"model = YOLO('yolo11n-obb.pt') # load a pretrained YOLO OBB model\n",
"model.train(data='dota8.yaml', epochs=3) # train the model\n",
"model('https://ultralytics.com/images/bus.jpg') # predict on an image"
"model('https://ultralytics.com/images/boats.jpg') # predict on an image"
],
"metadata": {
"id": "IJNKClOOB5YS"

@ -274,7 +274,7 @@ nav:
- VisDrone: datasets/detect/visdrone.md
- VOC: datasets/detect/voc.md
- xView: datasets/detect/xview.md
- Roboflow 100: datasets/detect/roboflow-100.md
- RF100: datasets/detect/roboflow-100.md
- Brain-tumor: datasets/detect/brain-tumor.md
- African-wildlife: datasets/detect/african-wildlife.md
- Signature: datasets/detect/signature.md

@ -61,7 +61,8 @@ classifiers = [
# Required dependencies ------------------------------------------------------------------------------------------------
dependencies = [
"numpy>=1.23.0", # temporary patch for compat errors https://github.com/ultralytics/yolov5/actions/runs/9538130424/job/26286956354
"numpy>=1.23.0",
"numpy<2.0.0; sys_platform == 'darwin'", # macOS OpenVINO errors https://github.com/ultralytics/ultralytics/pull/17221
"matplotlib>=3.3.0",
"opencv-python>=4.6.0",
"pillow>=7.1.2",

@ -116,7 +116,7 @@ def test_predict_sam():
from ultralytics.models.sam import Predictor as SAMPredictor
# Load a model
model = SAM(WEIGHTS_DIR / "sam_b.pt")
model = SAM(WEIGHTS_DIR / "sam2.1_b.pt")
# Display model information (optional)
model.info()

@ -1,6 +1,6 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
__version__ = "8.3.18"
__version__ = "8.3.24"
import os

@ -787,7 +787,7 @@ def entrypoint(debug=""):
from ultralytics import FastSAM
model = FastSAM(model)
elif "sam_" in stem or "sam2_" in stem:
elif "sam_" in stem or "sam2_" in stem or "sam2.1_" in stem:
from ultralytics import SAM
model = SAM(model)
@ -809,7 +809,9 @@ def entrypoint(debug=""):
# Mode
if mode in {"predict", "track"} and "source" not in overrides:
overrides["source"] = DEFAULT_CFG.source or ASSETS
overrides["source"] = (
"https://ultralytics.com/images/boats.jpg" if task == "obb" else DEFAULT_CFG.source or ASSETS
)
LOGGER.warning(f"WARNING ⚠ 'source' argument is missing. Using default 'source={overrides['source']}'.")
elif mode in {"train", "val"}:
if "data" not in overrides and "resume" not in overrides:

@ -1,18 +1,19 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
# Configuration for Ultralytics Solutions
model: "yolo11n.pt" # The Ultralytics YOLO11 model to be used (e.g., yolo11n.pt for YOLO11 nano version and yolov8n.pt for YOLOv8 nano version)
# Object counting settings
region: # Object counting, queue or speed estimation region points. Default region points are [(20, 400), (1080, 404), (1080, 360), (20, 360)]
line_width: 2 # Width of the annotator used to draw regions on the image/video frames + bounding boxes and tracks drawing. Default value is 2.
show: True # Flag to control whether to display output image or not, you can set this as False i.e. when deploying it on some embedded devices.
show_in: True # Flag to display objects moving *into* the defined region
show_out: True # Flag to display objects moving *out of* the defined region
classes: # To count specific classes. i.e, if you want to detect, track and count the person with COCO model, you can use classes=0, Default its None
# Heatmaps settings
colormap: # Colormap for heatmap, Only OPENCV supported colormaps can be used. By default COLORMAP_PARULA will be used for visualization.
# Workouts monitoring settings
up_angle: 145.0 # Workouts up_angle for counts, 145.0 is default value. You can adjust it for different workouts, based on position of keypoints.
down_angle: 90 # Workouts down_angle for counts, 90 is default value. You can change it for different workouts, based on position of keypoints.
kpts: [6, 8, 10] # Keypoints for workouts monitoring, i.e. If you want to consider keypoints for pushups that have mostly values of [6, 8, 10].
colormap: # Colormap for heatmap, Only OPENCV supported colormaps can be used. By default COLORMAP_PARULA will be used for visualization.
# Analytics settings
analytics_type: "line" # Analytics type i.e "line", "pie", "bar" or "area" charts. By default, "line" analytics will be used for processing.
json_file: # parking system regions file path.

@ -632,9 +632,10 @@ def yolo_bbox2segment(im_dir, save_dir=None, sam_model="sam_b.pt"):
txt_file = save_dir / lb_name
cls = label["cls"]
for i, s in enumerate(label["segments"]):
if len(s) == 0:
continue
line = (int(cls[i]), *s.reshape(-1))
texts.append(("%g " * len(line)).rstrip() % line)
if texts:
with open(txt_file, "a") as f:
f.writelines(text + "\n" for text in texts)
LOGGER.info(f"Generated segment labels saved in {save_dir}")

@ -213,9 +213,13 @@ class Exporter:
LOGGER.warning("WARNING ⚠ Sony MCT only supports int8 export, setting int8=True.")
self.args.int8 = True
# Device
dla = None
if fmt == "engine" and self.args.device is None:
LOGGER.warning("WARNING ⚠ TensorRT requires GPU export, automatically assigning device=0")
self.args.device = "0"
if fmt == "engine" and "dla" in str(self.args.device): # convert int/list to str first
dla = self.args.device.split(":")[-1]
assert dla in {"0", "1"}, f"Expected self.args.device='dla:0' or 'dla:1, but got {self.args.device}."
self.device = select_device("cpu" if self.args.device is None else self.args.device)
# Checks
if not hasattr(model, "names"):
@ -349,7 +353,7 @@ class Exporter:
if jit or ncnn: # TorchScript
f[0], _ = self.export_torchscript()
if engine: # TensorRT required before ONNX
f[1], _ = self.export_engine()
f[1], _ = self.export_engine(dla=dla)
if onnx: # ONNX
f[2], _ = self.export_onnx()
if xml: # OpenVINO
@ -495,6 +499,7 @@ class Exporter:
@try_export
def export_openvino(self, prefix=colorstr("OpenVINO:")):
"""YOLO OpenVINO export."""
# WARNING: numpy>=2.0.0 issue with OpenVINO on macOS https://github.com/ultralytics/ultralytics/pull/17221
check_requirements(f'openvino{"<=2024.0.0" if ARM64 else ">=2024.0.0"}') # fix OpenVINO issue on ARM64
import openvino as ov
@ -724,7 +729,7 @@ class Exporter:
return f, ct_model
@try_export
def export_engine(self, prefix=colorstr("TensorRT:")):
def export_engine(self, dla=None, prefix=colorstr("TensorRT:")):
"""YOLO TensorRT export https://developer.nvidia.com/tensorrt."""
assert self.im.device.type != "cpu", "export running on CPU but must be on GPU, i.e. use 'device=0'"
f_onnx, _ = self.export_onnx() # run before TRT import https://github.com/ultralytics/ultralytics/issues/7016
@ -733,10 +738,10 @@ class Exporter:
import tensorrt as trt # noqa
except ImportError:
if LINUX:
check_requirements("tensorrt>7.0.0,<=10.1.0")
check_requirements("tensorrt>7.0.0,!=10.1.0")
import tensorrt as trt # noqa
check_version(trt.__version__, ">=7.0.0", hard=True)
check_version(trt.__version__, "<=10.1.0", msg="https://github.com/ultralytics/ultralytics/pull/14239")
check_version(trt.__version__, "!=10.1.0", msg="https://github.com/ultralytics/ultralytics/pull/14239")
# Setup and checks
LOGGER.info(f"\n{prefix} starting export with TensorRT {trt.__version__}...")
@ -759,6 +764,20 @@ class Exporter:
network = builder.create_network(flag)
half = builder.platform_has_fast_fp16 and self.args.half
int8 = builder.platform_has_fast_int8 and self.args.int8
# Optionally switch to DLA if enabled
if dla is not None:
if not IS_JETSON:
raise ValueError("DLA is only available on NVIDIA Jetson devices")
LOGGER.info(f"{prefix} enabling DLA on core {dla}...")
if not self.args.half and not self.args.int8:
raise ValueError(
"DLA requires either 'half=True' (FP16) or 'int8=True' (INT8) to be enabled. Please enable one of them and try again."
)
config.default_device_type = trt.DeviceType.DLA
config.DLA_core = int(dla)
config.set_flag(trt.BuilderFlag.GPU_FALLBACK)
# Read ONNX file
parser = trt.OnnxParser(network, logger)
if not parser.parse_from_file(f_onnx):
@ -913,8 +932,10 @@ class Exporter:
tmp_file = f / "tmp_tflite_int8_calibration_images.npy" # int8 calibration images file
if self.args.data:
f.mkdir()
images = [batch["img"].permute(0, 2, 3, 1) for batch in self.get_int8_calibration_dataloader(prefix)]
images = torch.cat(images, 0).float()
images = [batch["img"] for batch in self.get_int8_calibration_dataloader(prefix)]
images = torch.nn.functional.interpolate(torch.cat(images, 0).float(), size=self.imgsz).permute(
0, 2, 3, 1
)
np.save(str(tmp_file), images.numpy().astype(np.float32)) # BHWC
np_data = [["images", tmp_file, [[[[0, 0, 0]]]], [[[[255, 255, 255]]]]]]

@ -263,6 +263,7 @@ def _build_sam2(
memory_attention = MemoryAttention(d_model=256, pos_enc_at_input=True, num_layers=4, layer=MemoryAttentionLayer())
memory_encoder = MemoryEncoder(out_dim=64)
is_sam2_1 = checkpoint is not None and "sam2.1" in checkpoint
sam2 = SAM2Model(
image_encoder=image_encoder,
memory_attention=memory_attention,
@ -288,6 +289,9 @@ def _build_sam2(
multimask_max_pt_num=1,
use_mlp_for_obj_ptr_proj=True,
compile_image_encoder=False,
no_obj_embed_spatial=is_sam2_1,
proj_tpos_enc_in_obj_ptrs=is_sam2_1,
use_signed_tpos_enc_to_obj_ptrs=is_sam2_1,
sam_mask_decoder_extra_args=dict(
dynamic_multimask_via_stability=True,
dynamic_multimask_stability_delta=0.05,
@ -313,6 +317,10 @@ sam_model_map = {
"sam2_s.pt": build_sam2_s,
"sam2_b.pt": build_sam2_b,
"sam2_l.pt": build_sam2_l,
"sam2.1_t.pt": build_sam2_t,
"sam2.1_s.pt": build_sam2_s,
"sam2.1_b.pt": build_sam2_b,
"sam2.1_l.pt": build_sam2_l,
}

@ -161,18 +161,19 @@ class SAM2Model(torch.nn.Module):
use_multimask_token_for_obj_ptr: bool = False,
iou_prediction_use_sigmoid=False,
memory_temporal_stride_for_eval=1,
add_all_frames_to_correct_as_cond=False,
non_overlap_masks_for_mem_enc=False,
use_obj_ptrs_in_encoder=False,
max_obj_ptrs_in_encoder=16,
add_tpos_enc_to_obj_ptrs=True,
proj_tpos_enc_in_obj_ptrs=False,
use_signed_tpos_enc_to_obj_ptrs=False,
only_obj_ptrs_in_the_past_for_eval=False,
pred_obj_scores: bool = False,
pred_obj_scores_mlp: bool = False,
fixed_no_obj_ptr: bool = False,
soft_no_obj_ptr: bool = False,
use_mlp_for_obj_ptr_proj: bool = False,
no_obj_embed_spatial: bool = False,
sam_mask_decoder_extra_args=None,
compile_image_encoder: bool = False,
):
@ -205,8 +206,6 @@ class SAM2Model(torch.nn.Module):
use_multimask_token_for_obj_ptr (bool): Whether to use multimask tokens for object pointers.
iou_prediction_use_sigmoid (bool): Whether to use sigmoid to restrict IoU prediction to [0-1].
memory_temporal_stride_for_eval (int): Memory bank's temporal stride during evaluation.
add_all_frames_to_correct_as_cond (bool): Whether to append frames with correction clicks to conditioning
frame list.
non_overlap_masks_for_mem_enc (bool): Whether to apply non-overlapping constraints on object masks in
memory encoder during evaluation.
use_obj_ptrs_in_encoder (bool): Whether to cross-attend to object pointers from other frames in the encoder.
@ -216,6 +215,9 @@ class SAM2Model(torch.nn.Module):
the encoder.
proj_tpos_enc_in_obj_ptrs (bool): Whether to add an extra linear projection layer for temporal positional
encoding in object pointers.
use_signed_tpos_enc_to_obj_ptrs (bool): whether to use signed distance (instead of unsigned absolute distance)
in the temporal positional encoding in the object pointers, only relevant when both `use_obj_ptrs_in_encoder=True`
and `add_tpos_enc_to_obj_ptrs=True`.
only_obj_ptrs_in_the_past_for_eval (bool): Whether to only attend to object pointers in the past
during evaluation.
pred_obj_scores (bool): Whether to predict if there is an object in the frame.
@ -223,6 +225,7 @@ class SAM2Model(torch.nn.Module):
fixed_no_obj_ptr (bool): Whether to have a fixed no-object pointer when there is no object present.
soft_no_obj_ptr (bool): Whether to mix in no-object pointer softly for easier recovery and error mitigation.
use_mlp_for_obj_ptr_proj (bool): Whether to use MLP for object pointer projection.
no_obj_embed_spatial (bool): Whether add no obj embedding to spatial frames.
sam_mask_decoder_extra_args (Dict | None): Extra arguments for constructing the SAM mask decoder.
compile_image_encoder (bool): Whether to compile the image encoder for faster inference.
@ -253,6 +256,7 @@ class SAM2Model(torch.nn.Module):
if proj_tpos_enc_in_obj_ptrs:
assert add_tpos_enc_to_obj_ptrs # these options need to be used together
self.proj_tpos_enc_in_obj_ptrs = proj_tpos_enc_in_obj_ptrs
self.use_signed_tpos_enc_to_obj_ptrs = use_signed_tpos_enc_to_obj_ptrs
self.only_obj_ptrs_in_the_past_for_eval = only_obj_ptrs_in_the_past_for_eval
# Part 2: memory attention to condition current frame's visual features
@ -309,9 +313,12 @@ class SAM2Model(torch.nn.Module):
self.no_obj_ptr = torch.nn.Parameter(torch.zeros(1, self.hidden_dim))
trunc_normal_(self.no_obj_ptr, std=0.02)
self.use_mlp_for_obj_ptr_proj = use_mlp_for_obj_ptr_proj
self.no_obj_embed_spatial = None
if no_obj_embed_spatial:
self.no_obj_embed_spatial = torch.nn.Parameter(torch.zeros(1, self.mem_dim))
trunc_normal_(self.no_obj_embed_spatial, std=0.02)
self._build_sam_heads()
self.add_all_frames_to_correct_as_cond = add_all_frames_to_correct_as_cond
self.max_cond_frames_in_attn = max_cond_frames_in_attn
# Model compilation
@ -533,8 +540,6 @@ class SAM2Model(torch.nn.Module):
if self.pred_obj_scores:
# Allow *soft* no obj ptr, unlike for masks
if self.soft_no_obj_ptr:
# Only hard possible with gt
assert not self.teacher_force_obj_scores_for_mem
lambda_is_obj_appearing = object_score_logits.sigmoid()
else:
lambda_is_obj_appearing = is_obj_appearing.float()
@ -647,6 +652,7 @@ class SAM2Model(torch.nn.Module):
if self.num_maskmem == 0: # Disable memory and skip fusion
return current_vision_feats[-1].permute(1, 2, 0).view(B, C, H, W)
num_obj_ptr_tokens = 0
tpos_sign_mul = -1 if track_in_reverse else 1
# Step 1: condition the visual features of the current frame on previous memories
if not is_init_cond_frame:
# Retrieve the memories encoded with the maskmem backbone
@ -664,7 +670,7 @@ class SAM2Model(torch.nn.Module):
# the earliest one has t_pos=1 and the latest one has t_pos=self.num_maskmem-1
# We also allow taking the memory frame non-consecutively (with r>1), in which case
# we take (self.num_maskmem - 2) frames among every r-th frames plus the last frame.
r = self.memory_temporal_stride_for_eval
r = 1 if self.training else self.memory_temporal_stride_for_eval
for t_pos in range(1, self.num_maskmem):
t_rel = self.num_maskmem - t_pos # how many frames before current frame
if t_rel == 1:
@ -718,7 +724,14 @@ class SAM2Model(torch.nn.Module):
ptr_cond_outputs = selected_cond_outputs
pos_and_ptrs = [
# Temporal pos encoding contains how far away each pointer is from current frame
(abs(frame_idx - t), out["obj_ptr"])
(
(
(frame_idx - t) * tpos_sign_mul
if self.use_signed_tpos_enc_to_obj_ptrs
else abs(frame_idx - t)
),
out["obj_ptr"],
)
for t, out in ptr_cond_outputs.items()
]
# Add up to (max_obj_ptrs_in_encoder - 1) non-conditioning frames before current frame
@ -787,6 +800,7 @@ class SAM2Model(torch.nn.Module):
current_vision_feats,
feat_sizes,
pred_masks_high_res,
object_score_logits,
is_mask_from_pts,
):
"""Encodes frame features and masks into a new memory representation for video segmentation."""
@ -819,10 +833,17 @@ class SAM2Model(torch.nn.Module):
)
maskmem_features = maskmem_out["vision_features"]
maskmem_pos_enc = maskmem_out["vision_pos_enc"]
# add a no-object embedding to the spatial memory to indicate that the frame
# is predicted to be occluded (i.e. no object is appearing in the frame)
if self.no_obj_embed_spatial is not None:
is_obj_appearing = (object_score_logits > 0).float()
maskmem_features += (1 - is_obj_appearing[..., None, None]) * self.no_obj_embed_spatial[
..., None, None
].expand(*maskmem_features.shape)
return maskmem_features, maskmem_pos_enc
def track_step(
def _track_step(
self,
frame_idx,
is_init_cond_frame,
@ -833,15 +854,7 @@ class SAM2Model(torch.nn.Module):
mask_inputs,
output_dict,
num_frames,
track_in_reverse=False, # tracking in reverse time order (for demo usage)
# Whether to run the memory encoder on the predicted masks. Sometimes we might want
# to skip the memory encoder with `run_mem_encoder=False`. For example,
# in demo we might call `track_step` multiple times for each user click,
# and only encode the memory when the user finalizes their clicks. And in ablation
# settings like SAM training on static images, we don't need the memory encoder.
run_mem_encoder=True,
# The previously predicted SAM mask logits (which can be fed together with new clicks in demo).
prev_sam_mask_logits=None,
prev_sam_mask_logits,
):
"""Performs a single tracking step, updating object masks and memory features based on current frame inputs."""
current_out = {"point_inputs": point_inputs, "mask_inputs": mask_inputs}
@ -861,7 +874,7 @@ class SAM2Model(torch.nn.Module):
sam_outputs = self._use_mask_as_output(pix_feat, high_res_features, mask_inputs)
else:
# fused the visual feature with previous memory features in the memory bank
pix_feat_with_mem = self._prepare_memory_conditioned_features(
pix_feat = self._prepare_memory_conditioned_features(
frame_idx=frame_idx,
is_init_cond_frame=is_init_cond_frame,
current_vision_feats=current_vision_feats[-1:],
@ -880,12 +893,78 @@ class SAM2Model(torch.nn.Module):
mask_inputs = prev_sam_mask_logits
multimask_output = self._use_multimask(is_init_cond_frame, point_inputs)
sam_outputs = self._forward_sam_heads(
backbone_features=pix_feat_with_mem,
backbone_features=pix_feat,
point_inputs=point_inputs,
mask_inputs=mask_inputs,
high_res_features=high_res_features,
multimask_output=multimask_output,
)
return current_out, sam_outputs, high_res_features, pix_feat
def _encode_memory_in_output(
self,
current_vision_feats,
feat_sizes,
point_inputs,
run_mem_encoder,
high_res_masks,
object_score_logits,
current_out,
):
"""Finally run the memory encoder on the predicted mask to encode, it into a new memory feature (that can be
used in future frames).
"""
if run_mem_encoder and self.num_maskmem > 0:
high_res_masks_for_mem_enc = high_res_masks
maskmem_features, maskmem_pos_enc = self._encode_new_memory(
current_vision_feats=current_vision_feats,
feat_sizes=feat_sizes,
pred_masks_high_res=high_res_masks_for_mem_enc,
object_score_logits=object_score_logits,
is_mask_from_pts=(point_inputs is not None),
)
current_out["maskmem_features"] = maskmem_features
current_out["maskmem_pos_enc"] = maskmem_pos_enc
else:
current_out["maskmem_features"] = None
current_out["maskmem_pos_enc"] = None
def track_step(
self,
frame_idx,
is_init_cond_frame,
current_vision_feats,
current_vision_pos_embeds,
feat_sizes,
point_inputs,
mask_inputs,
output_dict,
num_frames,
track_in_reverse=False, # tracking in reverse time order (for demo usage)
# Whether to run the memory encoder on the predicted masks. Sometimes we might want
# to skip the memory encoder with `run_mem_encoder=False`. For example,
# in demo we might call `track_step` multiple times for each user click,
# and only encode the memory when the user finalizes their clicks. And in ablation
# settings like SAM training on static images, we don't need the memory encoder.
run_mem_encoder=True,
# The previously predicted SAM mask logits (which can be fed together with new clicks in demo).
prev_sam_mask_logits=None,
):
"""Performs a single tracking step, updating object masks and memory features based on current frame inputs."""
current_out, sam_outputs, _, _ = self._track_step(
frame_idx,
is_init_cond_frame,
current_vision_feats,
current_vision_pos_embeds,
feat_sizes,
point_inputs,
mask_inputs,
output_dict,
num_frames,
track_in_reverse,
prev_sam_mask_logits,
)
(
_,
_,
@ -893,28 +972,28 @@ class SAM2Model(torch.nn.Module):
low_res_masks,
high_res_masks,
obj_ptr,
_,
object_score_logits,
) = sam_outputs
current_out["pred_masks"] = low_res_masks
current_out["pred_masks_high_res"] = high_res_masks
current_out["obj_ptr"] = obj_ptr
if not self.training:
# Only add this in inference (to avoid unused param in activation checkpointing;
# it's mainly used in the demo to encode spatial memories w/ consolidated masks)
current_out["object_score_logits"] = object_score_logits
# Finally run the memory encoder on the predicted mask to encode
# it into a new memory feature (that can be used in future frames)
if run_mem_encoder and self.num_maskmem > 0:
high_res_masks_for_mem_enc = high_res_masks
maskmem_features, maskmem_pos_enc = self._encode_new_memory(
current_vision_feats=current_vision_feats,
feat_sizes=feat_sizes,
pred_masks_high_res=high_res_masks_for_mem_enc,
is_mask_from_pts=(point_inputs is not None),
)
current_out["maskmem_features"] = maskmem_features
current_out["maskmem_pos_enc"] = maskmem_pos_enc
else:
current_out["maskmem_features"] = None
current_out["maskmem_pos_enc"] = None
self._encode_memory_in_output(
current_vision_feats,
feat_sizes,
point_inputs,
run_mem_encoder,
high_res_masks,
object_score_logits,
current_out,
)
return current_out

@ -478,7 +478,7 @@ class Predictor(BasePredictor):
results = []
for masks, orig_img, img_path in zip([pred_masks], orig_imgs, self.batch[0]):
if len(masks) == 0:
masks = None
masks, pred_bboxes = None, torch.zeros((0, 6), device=pred_masks.device)
else:
masks = ops.scale_masks(masks[None].float(), orig_img.shape[:2], padding=False)[0]
masks = masks > self.model.mask_threshold # to bool

@ -224,10 +224,10 @@ class AutoBackend(nn.Module):
import tensorrt as trt # noqa https://developer.nvidia.com/nvidia-tensorrt-download
except ImportError:
if LINUX:
check_requirements("tensorrt>7.0.0,<=10.1.0")
check_requirements("tensorrt>7.0.0,!=10.1.0")
import tensorrt as trt # noqa
check_version(trt.__version__, ">=7.0.0", hard=True)
check_version(trt.__version__, "<=10.1.0", msg="https://github.com/ultralytics/ultralytics/pull/14239")
check_version(trt.__version__, "!=10.1.0", msg="https://github.com/ultralytics/ultralytics/pull/14239")
if device.type == "cpu":
device = torch.device("cuda:0")
Binding = namedtuple("Binding", ("name", "dtype", "shape", "data", "ptr"))
@ -343,6 +343,7 @@ class AutoBackend(nn.Module):
model_path=w,
experimental_delegates=[load_delegate(delegate, options={"device": device})],
)
device = "cpu" # Required, otherwise PyTorch will try to use the wrong device
else: # TFLite
LOGGER.info(f"Loading {w} for TensorFlow Lite inference...")
interpreter = Interpreter(model_path=w) # load TFLite model

@ -168,7 +168,6 @@ class ParkingManagement(BaseSolution):
Examples:
>>> from ultralytics.solutions import ParkingManagement
>>> parking_manager = ParkingManagement(model="yolov8n.pt", json_file="parking_regions.json")
>>> results = parking_manager(source="parking_lot_video.mp4")
>>> print(f"Occupied spaces: {parking_manager.pr_info['Occupancy']}")
>>> print(f"Available spaces: {parking_manager.pr_info['Available']}")
"""

@ -1,16 +1,13 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
from collections import defaultdict
from pathlib import Path
import cv2
from ultralytics import YOLO
from ultralytics.utils import LOGGER, yaml_load
from ultralytics.utils import DEFAULT_CFG_DICT, DEFAULT_SOL_DICT, LOGGER
from ultralytics.utils.checks import check_imshow, check_requirements
DEFAULT_SOL_CFG_PATH = Path(__file__).resolve().parents[1] / "cfg/solutions/default.yaml"
class BaseSolution:
"""
@ -55,15 +52,18 @@ class BaseSolution:
self.Point = Point
# Load config and update with args
self.CFG = yaml_load(DEFAULT_SOL_CFG_PATH)
self.CFG.update(kwargs)
LOGGER.info(f"Ultralytics Solutions: ✅ {self.CFG}")
DEFAULT_SOL_DICT.update(kwargs)
DEFAULT_CFG_DICT.update(kwargs)
self.CFG = {**DEFAULT_SOL_DICT, **DEFAULT_CFG_DICT}
LOGGER.info(f"Ultralytics Solutions: ✅ {DEFAULT_SOL_DICT}")
self.region = self.CFG["region"] # Store region data for other classes usage
self.line_width = self.CFG["line_width"] # Store line_width for usage
self.line_width = (
self.CFG["line_width"] if self.CFG["line_width"] is not None else 2
) # Store line_width for usage
# Load Model and store classes names
self.model = YOLO(self.CFG["model"])
self.model = YOLO(self.CFG["model"] if self.CFG["model"] else "yolov8n.pt")
self.names = self.model.names
# Initialize environment and region setup

@ -38,6 +38,7 @@ FILE = Path(__file__).resolve()
ROOT = FILE.parents[1] # YOLO
ASSETS = ROOT / "assets" # default images
DEFAULT_CFG_PATH = ROOT / "cfg/default.yaml"
DEFAULT_SOL_CFG_PATH = ROOT / "cfg/solutions/default.yaml" # Ultralytics solutions yaml path
NUM_THREADS = min(8, max(1, os.cpu_count() - 1)) # number of YOLO multiprocessing threads
AUTOINSTALL = str(os.getenv("YOLO_AUTOINSTALL", True)).lower() == "true" # global auto-install mode
VERBOSE = str(os.getenv("YOLO_VERBOSE", True)).lower() == "true" # global verbose mode
@ -508,6 +509,7 @@ def yaml_print(yaml_file: Union[str, Path, dict]) -> None:
# Default configuration
DEFAULT_CFG_DICT = yaml_load(DEFAULT_CFG_PATH)
DEFAULT_SOL_DICT = yaml_load(DEFAULT_SOL_CFG_PATH) # Ultralytics solutions configuration
for k, v in DEFAULT_CFG_DICT.items():
if isinstance(v, str) and v.lower() == "none":
DEFAULT_CFG_DICT[k] = None
@ -566,12 +568,16 @@ def is_kaggle():
def is_jupyter():
"""
Check if the current script is running inside a Jupyter Notebook. Verified on Colab, Jupyterlab, Kaggle, Paperspace.
Check if the current script is running inside a Jupyter Notebook.
Returns:
(bool): True if running inside a Jupyter Notebook, False otherwise.
Note:
- Only works on Colab and Kaggle, other environments like Jupyterlab and Paperspace are not reliably detectable.
- "get_ipython" in globals() method suffers false positives when IPython package installed manually.
"""
return "get_ipython" in globals()
return IS_COLAB or IS_KAGGLE
def is_docker() -> bool:
@ -799,10 +805,10 @@ def get_user_config_dir(sub_dir="Ultralytics"):
PROC_DEVICE_MODEL = read_device_model() # is_jetson() and is_raspberrypi() depend on this constant
ONLINE = is_online()
IS_COLAB = is_colab()
IS_KAGGLE = is_kaggle()
IS_DOCKER = is_docker()
IS_JETSON = is_jetson()
IS_JUPYTER = is_jupyter()
IS_KAGGLE = is_kaggle()
IS_PIP_PACKAGE = is_pip_package()
IS_RASPBERRYPI = is_raspberrypi()
GIT_DIR = get_git_dir()
@ -1193,7 +1199,7 @@ class SettingsManager(JSONDict):
"neptune": True, # Neptune integration
"raytune": True, # Ray Tune integration
"tensorboard": True, # TensorBoard logging
"wandb": True, # Weights & Biases logging
"wandb": False, # Weights & Biases logging
"vscode_msg": True, # VSCode messaging
}

@ -1,6 +1,7 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
from ultralytics.utils import LOGGER, RANK, SETTINGS, TESTS_RUNNING, ops
from ultralytics.utils.metrics import ClassifyMetrics, DetMetrics, OBBMetrics, PoseMetrics, SegmentMetrics
try:
assert not TESTS_RUNNING # do not log pytest
@ -16,8 +17,11 @@ try:
COMET_SUPPORTED_TASKS = ["detect"]
# Names of plots created by Ultralytics that are logged to Comet
EVALUATION_PLOT_NAMES = "F1_curve", "P_curve", "R_curve", "PR_curve", "confusion_matrix"
CONFUSION_MATRIX_PLOT_NAMES = "confusion_matrix", "confusion_matrix_normalized"
EVALUATION_PLOT_NAMES = "F1_curve", "P_curve", "R_curve", "PR_curve"
LABEL_PLOT_NAMES = "labels", "labels_correlogram"
SEGMENT_METRICS_PLOT_PREFIX = "Box", "Mask"
POSE_METRICS_PLOT_PREFIX = "Box", "Pose"
_comet_image_prediction_count = 0
@ -86,7 +90,7 @@ def _create_experiment(args):
"max_image_predictions": _get_max_image_predictions_to_log(),
}
)
experiment.log_other("Created from", "yolov8")
experiment.log_other("Created from", "ultralytics")
except Exception as e:
LOGGER.warning(f"WARNING ⚠ Comet installed but not initialized correctly, not logging this run. {e}")
@ -274,11 +278,31 @@ def _log_image_predictions(experiment, validator, curr_step):
def _log_plots(experiment, trainer):
"""Logs evaluation plots and label plots for the experiment."""
plot_filenames = [trainer.save_dir / f"{plots}.png" for plots in EVALUATION_PLOT_NAMES]
_log_images(experiment, plot_filenames, None)
label_plot_filenames = [trainer.save_dir / f"{labels}.jpg" for labels in LABEL_PLOT_NAMES]
_log_images(experiment, label_plot_filenames, None)
plot_filenames = None
if isinstance(trainer.validator.metrics, SegmentMetrics) and trainer.validator.metrics.task == "segment":
plot_filenames = [
trainer.save_dir / f"{prefix}{plots}.png"
for plots in EVALUATION_PLOT_NAMES
for prefix in SEGMENT_METRICS_PLOT_PREFIX
]
elif isinstance(trainer.validator.metrics, PoseMetrics):
plot_filenames = [
trainer.save_dir / f"{prefix}{plots}.png"
for plots in EVALUATION_PLOT_NAMES
for prefix in POSE_METRICS_PLOT_PREFIX
]
elif isinstance(trainer.validator.metrics, DetMetrics) or isinstance(trainer.validator.metrics, OBBMetrics):
plot_filenames = [trainer.save_dir / f"{plots}.png" for plots in EVALUATION_PLOT_NAMES]
if plot_filenames is not None:
_log_images(experiment, plot_filenames, None)
confusion_matrix_filenames = [trainer.save_dir / f"{plots}.png" for plots in CONFUSION_MATRIX_PLOT_NAMES]
_log_images(experiment, confusion_matrix_filenames, None)
if not isinstance(trainer.validator.metrics, ClassifyMetrics):
label_plot_filenames = [trainer.save_dir / f"{labels}.jpg" for labels in LABEL_PLOT_NAMES]
_log_images(experiment, label_plot_filenames, None)
def _log_model(experiment, trainer):
@ -307,9 +331,6 @@ def on_train_epoch_end(trainer):
experiment.log_metrics(trainer.label_loss_items(trainer.tloss, prefix="train"), step=curr_step, epoch=curr_epoch)
if curr_epoch == 1:
_log_images(experiment, trainer.save_dir.glob("train_batch*.jpg"), curr_step)
def on_fit_epoch_end(trainer):
"""Logs model assets at the end of each epoch."""
@ -356,6 +377,8 @@ def on_train_end(trainer):
_log_confusion_matrix(experiment, trainer, curr_step, curr_epoch)
_log_image_predictions(experiment, trainer.validator, curr_step)
_log_images(experiment, trainer.save_dir.glob("train_batch*.jpg"), curr_step)
_log_images(experiment, trainer.save_dir.glob("val_batch*.jpg"), curr_step)
experiment.end()
global _comet_image_prediction_count

@ -137,17 +137,19 @@ def on_train_end(trainer):
if trainer.best.exists():
art.add_file(trainer.best)
wb.run.log_artifact(art, aliases=["best"])
for curve_name, curve_values in zip(trainer.validator.metrics.curves, trainer.validator.metrics.curves_results):
x, y, x_title, y_title = curve_values
_plot_curve(
x,
y,
names=list(trainer.validator.metrics.names.values()),
id=f"curves/{curve_name}",
title=curve_name,
x_title=x_title,
y_title=y_title,
)
# Check if we actually have plots to save
if trainer.args.plots:
for curve_name, curve_values in zip(trainer.validator.metrics.curves, trainer.validator.metrics.curves_results):
x, y, x_title, y_title = curve_values
_plot_curve(
x,
y,
names=list(trainer.validator.metrics.names.values()),
id=f"curves/{curve_name}",
title=curve_name,
x_title=x_title,
y_title=y_title,
)
wb.run.finish() # required or run continues on dashboard

@ -335,7 +335,7 @@ def check_font(font="Arial.ttf"):
return file
def check_python(minimum: str = "3.8.0", hard: bool = True, verbose: bool = True) -> bool:
def check_python(minimum: str = "3.8.0", hard: bool = True, verbose: bool = False) -> bool:
"""
Check current python version against the required minimum version.
@ -688,7 +688,7 @@ def check_amp(model):
im = ASSETS / "bus.jpg" # image to check
prefix = colorstr("AMP: ")
LOGGER.info(f"{prefix}running Automatic Mixed Precision (AMP) checks with YOLO11n...")
LOGGER.info(f"{prefix}running Automatic Mixed Precision (AMP) checks...")
warning_msg = "Setting 'amp=True'. If you experience zero-mAP or NaN losses you can disable AMP with amp=False."
try:
from ultralytics import YOLO
@ -696,11 +696,13 @@ def check_amp(model):
assert amp_allclose(YOLO("yolo11n.pt"), im)
LOGGER.info(f"{prefix}checks passed ✅")
except ConnectionError:
LOGGER.warning(f"{prefix}checks skipped ⚠, offline and unable to download YOLO11n. {warning_msg}")
LOGGER.warning(
f"{prefix}checks skipped ⚠. " f"Offline and unable to download YOLO11n for AMP checks. {warning_msg}"
)
except (AttributeError, ModuleNotFoundError):
LOGGER.warning(
f"{prefix}checks skipped ⚠. "
f"Unable to load YOLO11n due to possible Ultralytics package modifications. {warning_msg}"
f"Unable to load YOLO11n for AMP checks due to possible Ultralytics package modifications. {warning_msg}"
)
except AssertionError:
LOGGER.warning(

Loading…
Cancel
Save