diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index e0eff35cb8..9d7031e98e 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -146,6 +146,8 @@ jobs: flags: Benchmarks env: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} + - name: Prune uv Cache + run: uv cache prune --ci - name: Benchmark Summary run: | cat benchmarks.log @@ -203,6 +205,8 @@ jobs: flags: Tests env: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} + - name: Prune uv Cache + run: uv cache prune --ci GPU: if: github.repository == 'ultralytics/ultralytics' && (github.event_name != 'workflow_dispatch' || github.event.inputs.gpu == 'true') diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 5b0c7a96d3..40b7efc186 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -23,13 +23,15 @@ on: inputs: publish_docs: description: "Publish live to https://docs.ultralytics.com" - default: "true" + default: true type: boolean jobs: Docs: if: github.repository == 'ultralytics/ultralytics' runs-on: ubuntu-latest + env: + GITHUB_REF: ${{ github.head_ref || github.ref }} steps: - name: Git config run: | @@ -40,7 +42,7 @@ jobs: with: repository: ${{ github.event.pull_request.head.repo.full_name || github.repository }} token: ${{ secrets._GITHUB_TOKEN || secrets.GITHUB_TOKEN }} - ref: ${{ github.head_ref || github.ref }} + ref: ${{ env.GITHUB_REF }} fetch-depth: 0 - name: Set up Python uses: actions/setup-python@v5 @@ -56,7 +58,7 @@ jobs: continue-on-error: true run: | python docs/build_reference.py - git pull origin ${{ github.head_ref || github.ref }} + git pull origin "$GITHUB_REF" git add . git reset HEAD -- .github/workflows/ # workflow changes are not permitted with default token if ! git diff --staged --quiet; then @@ -75,7 +77,7 @@ jobs: continue-on-error: true if: always() run: | - git pull origin ${{ github.head_ref || github.ref }} + git pull origin "$GITHUB_REF" git add --update # only add updated files git reset HEAD -- .github/workflows/ # workflow changes are not permitted with default token if ! git diff --staged --quiet; then diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml index 84f39b7ebd..c98b876265 100644 --- a/.github/workflows/format.yml +++ b/.github/workflows/format.yml @@ -9,7 +9,7 @@ on: types: [opened, edited] discussion: types: [created] - pull_request_target: + pull_request: branches: [main] types: [opened, closed, synchronize, review_requested] @@ -20,7 +20,7 @@ jobs: - name: Run Ultralytics Formatting uses: ultralytics/actions@main with: - token: ${{ secrets._GITHUB_TOKEN }} # note GITHUB_TOKEN automatically generated + token: ${{ secrets._GITHUB_TOKEN || secrets.GITHUB_TOKEN}} labels: true # autolabel issues and PRs python: true # format Python code and docstrings prettier: true # format YAML, JSON, Markdown and CSS diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index b1dd1e4350..ba736048d1 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -13,34 +13,23 @@ on: description: Publish to PyPI jobs: - publish: + check: if: github.repository == 'ultralytics/ultralytics' && github.actor == 'glenn-jocher' - name: Publish runs-on: ubuntu-latest - environment: # for GitHub Deployments tab - name: Release - PyPI - url: https://pypi.org/p/ultralytics permissions: - id-token: write # for PyPI trusted publishing + contents: write + outputs: + increment: ${{ steps.check_pypi.outputs.increment }} + current_tag: ${{ steps.check_pypi.outputs.current_tag }} + previous_tag: ${{ steps.check_pypi.outputs.previous_tag }} steps: - - name: Checkout code - uses: actions/checkout@v4 - with: - token: ${{ secrets._GITHUB_TOKEN }} - - name: Git config - run: | - git config --global user.name "UltralyticsAssistant" - git config --global user.email "web@ultralytics.com" - - name: Set up Python environment - uses: actions/setup-python@v5 + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 with: python-version: "3.x" - cache: "pip" - - name: Install dependencies - run: | - python -m pip install --upgrade pip wheel - pip install ultralytics-actions build twine toml - - name: Check PyPI version + - uses: astral-sh/setup-uv@v4 + - run: uv pip install --system --no-cache ultralytics-actions + - id: check_pypi shell: python run: | import os @@ -51,56 +40,82 @@ jobs: os.system(f'echo "previous_tag=v{online_version}" >> $GITHUB_OUTPUT') if publish: print('Ready to publish new version to PyPI โœ….') - id: check_pypi - - name: Build package - if: (github.event_name == 'push' || github.event.inputs.pypi == 'true') && steps.check_pypi.outputs.increment == 'True' - run: python -m build - - name: Publish to PyPI - continue-on-error: true - if: (github.event_name == 'push' || github.event.inputs.pypi == 'true') && steps.check_pypi.outputs.increment == 'True' - uses: pypa/gh-action-pypi-publish@release/v1 - - name: Publish new tag - if: (github.event_name == 'push' || github.event.inputs.pypi == 'true') && steps.check_pypi.outputs.increment == 'True' - run: | - git tag -a "${{ steps.check_pypi.outputs.current_tag }}" -m "$(git log -1 --pretty=%B)" # i.e. "v0.1.2 commit message" - git push origin "${{ steps.check_pypi.outputs.current_tag }}" - - name: Publish new release - if: (github.event_name == 'push' || github.event.inputs.pypi == 'true') && steps.check_pypi.outputs.increment == 'True' + - name: Tag and Release + if: steps.check_pypi.outputs.increment == 'True' env: - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - GITHUB_TOKEN: ${{ secrets._GITHUB_TOKEN }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} CURRENT_TAG: ${{ steps.check_pypi.outputs.current_tag }} PREVIOUS_TAG: ${{ steps.check_pypi.outputs.previous_tag }} - run: ultralytics-actions-summarize-release - shell: bash + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + run: | + git config --global user.name "UltralyticsAssistant" + git config --global user.email "web@ultralytics.com" + git tag -a "$CURRENT_TAG" -m "$(git log -1 --pretty=%B)" + git push origin "$CURRENT_TAG" + ultralytics-actions-summarize-release + uv cache prune --ci + + build: + needs: check + if: needs.check.outputs.increment == 'True' + runs-on: ubuntu-latest + permissions: + contents: read + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.x" + - uses: astral-sh/setup-uv@v4 + - run: uv pip install --system --no-cache build + - run: python -m build + - uses: actions/upload-artifact@v4 + with: + name: dist + path: dist/ + - run: uv cache prune --ci + + publish: + needs: [check, build] + if: needs.check.outputs.increment == 'True' + runs-on: ubuntu-latest + environment: # for GitHub Deployments tab + name: Release - PyPI + url: https://pypi.org/p/ultralytics + permissions: + id-token: write # for PyPI trusted publishing + steps: + - uses: actions/download-artifact@v4 + with: + name: dist + path: dist/ + - uses: pypa/gh-action-pypi-publish@release/v1 + + notify: + needs: [check, publish] + if: always() && needs.check.outputs.increment == 'True' + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 - name: Extract PR Details env: - GH_TOKEN: ${{ secrets._GITHUB_TOKEN }} + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | - # Check if the event is a pull request or pull_request_target - if [ "${{ github.event_name }}" = "pull_request" ] || [ "${{ github.event_name }}" = "pull_request_target" ]; then - PR_NUMBER=${{ github.event.pull_request.number }} - PR_TITLE=$(gh pr view $PR_NUMBER --json title --jq '.title') - else - # Use gh to find the PR associated with the commit - COMMIT_SHA=${{ github.event.after }} - PR_JSON=$(gh pr list --search "${COMMIT_SHA}" --state merged --json number,title --jq '.[0]') - PR_NUMBER=$(echo $PR_JSON | jq -r '.number') - PR_TITLE=$(echo $PR_JSON | jq -r '.title') - fi - echo "PR_NUMBER=$PR_NUMBER" >> $GITHUB_ENV - echo "PR_TITLE=$PR_TITLE" >> $GITHUB_ENV - - - name: Notify on Slack (Success) - if: success() && github.event_name == 'push' && steps.check_pypi.outputs.increment == 'True' + PR_JSON=$(gh pr list --search "${GITHUB_SHA}" --state merged --json number,title --jq '.[0]') + PR_NUMBER=$(echo "${PR_JSON}" | jq -r '.number') + PR_TITLE=$(echo "${PR_JSON}" | jq -r '.title') + echo "PR_NUMBER=${PR_NUMBER}" >> "${GITHUB_ENV}" + echo "PR_TITLE=${PR_TITLE}" >> "${GITHUB_ENV}" + - name: Notify Success + if: needs.publish.result == 'success' && github.event_name == 'push' uses: slackapi/slack-github-action@v2.0.0 with: webhook-type: incoming-webhook webhook: ${{ secrets.SLACK_WEBHOOK_URL_YOLO }} payload: | - text: " GitHub Actions success for ${{ github.workflow }} โœ…\n\n\n*Repository:* https://github.com/${{ github.repository }}\n*Action:* https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\n*Author:* ${{ github.actor }}\n*Event:* NEW `${{ github.repository }} ${{ steps.check_pypi.outputs.current_tag }}` pip package published ๐Ÿ˜ƒ\n*Job Status:* ${{ job.status }}\n*Pull Request:* ${{ env.PR_TITLE }}\n" - - name: Notify on Slack (Failure) - if: failure() + text: " GitHub Actions success for ${{ github.workflow }} โœ…\n\n\n*Repository:* https://github.com/${{ github.repository }}\n*Action:* https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\n*Author:* ${{ github.actor }}\n*Event:* NEW `${{ github.repository }} ${{ needs.check.outputs.current_tag }}` pip package published ๐Ÿ˜ƒ\n*Job Status:* ${{ job.status }}\n*Pull Request:* ${{ env.PR_TITLE }}\n" + - name: Notify Failure + if: needs.publish.result != 'success' uses: slackapi/slack-github-action@v2.0.0 with: webhook-type: incoming-webhook diff --git a/docs/en/guides/optimizing-openvino-latency-vs-throughput-modes.md b/docs/en/guides/optimizing-openvino-latency-vs-throughput-modes.md index 154ec7a893..cffeb22350 100644 --- a/docs/en/guides/optimizing-openvino-latency-vs-throughput-modes.md +++ b/docs/en/guides/optimizing-openvino-latency-vs-throughput-modes.md @@ -61,7 +61,7 @@ OpenVINO's multi-device mode simplifies scaling throughput by automatically bala Optimizing Ultralytics YOLO models for latency and throughput with OpenVINO can significantly enhance your application's performance. By carefully applying the strategies outlined in this guide, developers can ensure their models run efficiently, meeting the demands of various deployment scenarios. Remember, the choice between optimizing for latency or throughput depends on your specific application needs and the characteristics of the deployment environment. -For more detailed technical information and the latest updates, refer to the [OpenVINO documentation](https://docs.openvino.ai/latest/index.html) and [Ultralytics YOLO repository](https://github.com/ultralytics/ultralytics). These resources provide in-depth guides, tutorials, and community support to help you get the most out of your deep learning models. +For more detailed technical information and the latest updates, refer to the [OpenVINO documentation](https://docs.openvino.ai/2024/index.html) and [Ultralytics YOLO repository](https://github.com/ultralytics/ultralytics). These resources provide in-depth guides, tutorials, and community support to help you get the most out of your deep learning models. --- diff --git a/docs/en/guides/raspberry-pi.md b/docs/en/guides/raspberry-pi.md index ef41d0f8cf..5e173db25b 100644 --- a/docs/en/guides/raspberry-pi.md +++ b/docs/en/guides/raspberry-pi.md @@ -2,6 +2,7 @@ comments: true description: Learn how to deploy Ultralytics YOLO11 on Raspberry Pi with our comprehensive guide. Get performance benchmarks, setup instructions, and best practices. keywords: Ultralytics, YOLO11, Raspberry Pi, setup, guide, benchmarks, computer vision, object detection, NCNN, Docker, camera modules +benchmark_version: 8.3.39 --- # Quick Start Guide: Raspberry Pi with Ultralytics YOLO11 @@ -144,7 +145,7 @@ We have only included benchmarks for YOLO11n and YOLO11s models because other mo
YOLO11 benchmarks on RPi 5 -
Benchmarked with Ultralytics v8.3.39
+
Benchmarked with Ultralytics {{ benchmark_version }}
### Detailed Comparison Table @@ -183,7 +184,7 @@ The below table represents the benchmark results for two different models (YOLO1 | MNN | โœ… | 36.2 | 0.7409 | 273.032 | | NCNN | โœ… | 36.2 | 0.7419 | 194.858 | - Benchmarked with Ultralytics `v8.3.39` + Benchmarked with Ultralytics {{ benchmark_version }} ## Reproduce Our Results diff --git a/docs/en/guides/triton-inference-server.md b/docs/en/guides/triton-inference-server.md index 0151cc078d..67d419bf52 100644 --- a/docs/en/guides/triton-inference-server.md +++ b/docs/en/guides/triton-inference-server.md @@ -48,6 +48,16 @@ from ultralytics import YOLO # Load a model model = YOLO("yolo11n.pt") # load an official model +# Retreive metadata during export +metadata = [] + + +def export_cb(exporter): + metadata.append(exporter.metadata) + + +model.add_callback("on_export_end", export_cb) + # Export the model onnx_file = model.export(format="onnx", dynamic=True) ``` @@ -107,7 +117,13 @@ The Triton Model Repository is a storage location where Triton can access and lo } } } - """ + parameters { + key: "metadata" + value: { + string_value: "%s" + } + } + """ % metadata[0] with open(triton_model_path / "config.pbtxt", "w") as f: f.write(data) diff --git a/docs/en/macros/augmentation-args.md b/docs/en/macros/augmentation-args.md index b4d6c9df6d..bee27ddd0d 100644 --- a/docs/en/macros/augmentation-args.md +++ b/docs/en/macros/augmentation-args.md @@ -13,7 +13,7 @@ | `bgr` | `float` | `0.0` | `0.0 - 1.0` | Flips the image channels from RGB to BGR with the specified probability, useful for increasing robustness to incorrect channel ordering. | | `mosaic` | `float` | `1.0` | `0.0 - 1.0` | Combines four training images into one, simulating different scene compositions and object interactions. Highly effective for complex scene understanding. | | `mixup` | `float` | `0.0` | `0.0 - 1.0` | Blends two images and their labels, creating a composite image. Enhances the model's ability to generalize by introducing label noise and visual variability. | -| `copy_paste` | `float` | `0.0` | `0.0 - 1.0` | Copies objects from one image and pastes them onto another, useful for increasing object instances and learning object occlusion. | +| `copy_paste` | `float` | `0.0` | `0.0 - 1.0` | Copies and pastes objects across images, useful for increasing object instances and learning object occlusion. Requires segmentation labels. | | `copy_paste_mode` | `str` | `flip` | - | Copy-Paste augmentation method selection among the options of (`"flip"`, `"mixup"`). | | `auto_augment` | `str` | `randaugment` | - | Automatically applies a predefined augmentation policy (`randaugment`, `autoaugment`, `augmix`), optimizing for classification tasks by diversifying the visual features. | | `erasing` | `float` | `0.4` | `0.0 - 0.9` | Randomly erases a portion of the image during classification training, encouraging the model to focus on less obvious features for recognition. | diff --git a/docs/en/macros/predict-args.md b/docs/en/macros/predict-args.md index b40650d49a..f1960687f9 100644 --- a/docs/en/macros/predict-args.md +++ b/docs/en/macros/predict-args.md @@ -6,6 +6,7 @@ | `imgsz` | `int or tuple` | `640` | Defines the image size for inference. Can be a single integer `640` for square resizing or a (height, width) tuple. Proper sizing can improve detection [accuracy](https://www.ultralytics.com/glossary/accuracy) and processing speed. | | `half` | `bool` | `False` | Enables half-[precision](https://www.ultralytics.com/glossary/precision) (FP16) inference, which can speed up model inference on supported GPUs with minimal impact on accuracy. | | `device` | `str` | `None` | Specifies the device for inference (e.g., `cpu`, `cuda:0` or `0`). Allows users to select between CPU, a specific GPU, or other compute devices for model execution. | +| `batch` | `int` | `1` | Specifies the batch size for inference (only works when the source is [a directory, video file or `.txt` file](/modes/predict.md/#inference-sources)). A larger batch size can provide higher throughput, shortening the total amount of time required for inference. | | `max_det` | `int` | `300` | Maximum number of detections allowed per image. Limits the total number of objects the model can detect in a single inference, preventing excessive outputs in dense scenes. | | `vid_stride` | `int` | `1` | Frame stride for video inputs. Allows skipping frames in videos to speed up processing at the cost of temporal resolution. A value of 1 processes every frame, higher values skip frames. | | `stream_buffer` | `bool` | `False` | Determines whether to queue incoming frames for video streams. If `False`, old frames get dropped to accomodate new frames (optimized for real-time applications). If `True', queues new frames in a buffer, ensuring no frames get skipped, but will cause latency if inference FPS is lower than stream FPS. | diff --git a/docs/en/reference/utils/ops.md b/docs/en/reference/utils/ops.md index b62ba7b4eb..ab6cafbca8 100644 --- a/docs/en/reference/utils/ops.md +++ b/docs/en/reference/utils/ops.md @@ -129,4 +129,8 @@ keywords: Ultralytics, utility operations, non-max suppression, bounding box tra ## ::: ultralytics.utils.ops.clean_str +



+ +## ::: ultralytics.utils.ops.empty_like +

diff --git a/docs/en/solutions/index.md b/docs/en/solutions/index.md index 243fbf2757..dd7b61d252 100644 --- a/docs/en/solutions/index.md +++ b/docs/en/solutions/index.md @@ -29,7 +29,6 @@ Here's our curated list of Ultralytics solutions that can be used to create awes - [Parking Management](../guides/parking-management.md) ๐Ÿš€: Organize and direct vehicle flow in parking areas with YOLO11, optimizing space utilization and user experience. - [Analytics](../guides/analytics.md) ๐Ÿ“Š: Conduct comprehensive data analysis to discover patterns and make informed decisions, leveraging YOLO11 for descriptive, predictive, and prescriptive analytics. - [Live Inference with Streamlit](../guides/streamlit-live-inference.md) ๐Ÿš€: Leverage the power of YOLO11 for real-time [object detection](https://www.ultralytics.com/glossary/object-detection) directly through your web browser with a user-friendly Streamlit interface. -- [Live Inference with Streamlit](../guides/streamlit-live-inference.md) ๐Ÿš€: Leverage the power of YOLO11 for real-time [object detection](https://www.ultralytics.com/glossary/object-detection) directly through your web browser with a user-friendly Streamlit interface. - [Track Objects in Zone](../guides/trackzone.md) ๐ŸŽฏ NEW: Learn how to track objects within specific zones of video frames using YOLO11 for precise and efficient monitoring. ## Solutions Usage @@ -39,7 +38,7 @@ Here's our curated list of Ultralytics solutions that can be used to create awes `yolo SOLUTIONS SOLUTION_NAME ARGS` - **SOLUTIONS** is a required keyword. - - **SOLUTION_NAME** (optional) is one of: `['count', 'heatmap', 'queue', 'speed', 'workout', 'analytics']`. + - **SOLUTION_NAME** (optional) is one of: `['count', 'heatmap', 'queue', 'speed', 'workout', 'analytics', 'trackzone']`. - **ARGS** (optional) are custom `arg=value` pairs, such as `show_in=True`, to override default settings. === "CLI" diff --git a/docs/en/yolov5/tutorials/model_export.md b/docs/en/yolov5/tutorials/model_export.md index a3a945c1e1..5cee3fdde9 100644 --- a/docs/en/yolov5/tutorials/model_export.md +++ b/docs/en/yolov5/tutorials/model_export.md @@ -31,7 +31,7 @@ YOLOv5 inference is officially supported in 11 formats: | [PyTorch](https://pytorch.org/) | - | `yolov5s.pt` | | [TorchScript](https://pytorch.org/docs/stable/jit.html) | `torchscript` | `yolov5s.torchscript` | | [ONNX](https://onnx.ai/) | `onnx` | `yolov5s.onnx` | -| [OpenVINO](https://docs.openvino.ai/latest/index.html) | `openvino` | `yolov5s_openvino_model/` | +| [OpenVINO](https://docs.openvino.ai/2024/index.html) | `openvino` | `yolov5s_openvino_model/` | | [TensorRT](https://developer.nvidia.com/tensorrt) | `engine` | `yolov5s.engine` | | [CoreML](https://github.com/apple/coremltools) | `coreml` | `yolov5s.mlmodel` | | [TensorFlow SavedModel](https://www.tensorflow.org/guide/saved_model) | `saved_model` | `yolov5s_saved_model/` | diff --git a/examples/tutorial.ipynb b/examples/tutorial.ipynb index 5eadcf3e60..457334a689 100644 --- a/examples/tutorial.ipynb +++ b/examples/tutorial.ipynb @@ -45,6 +45,25 @@ "" ] }, + { + "cell_type": "markdown", + "source": [ + "
\n", + " \n", + " \n", + " \"Ultralytics\n", + " \n", + "

\n", + " Watch: How to Train\n", + " Ultralytics\n", + " YOLO11 Model on Custom Dataset using Google Colab Notebook ๐Ÿš€

\n", + "\n", + "
" + ], + "metadata": { + "id": "DXHD1DC5M64G" + } + }, { "cell_type": "markdown", "metadata": { diff --git a/mkdocs.yml b/mkdocs.yml index 283d52f3d9..e84bca25a5 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -69,8 +69,9 @@ theme: - content.tabs.link # all code tabs change simultaneously # Customization -copyright: ยฉ 2024 Ultralytics Inc. All rights reserved. +copyright: ยฉ 2024 Ultralytics Inc. All rights reserved. extra: # version: + homepage: https://www.ultralytics.com/ # provider: mike # version drop-down menu robots: robots.txt analytics: @@ -90,7 +91,7 @@ extra: # version: - icon: fontawesome/brands/python link: https://pypi.org/project/ultralytics/ - icon: fontawesome/brands/discord - link: https://ultralytics.com/discord + link: https://discord.com/invite/ultralytics - icon: fontawesome/brands/reddit link: https://reddit.com/r/ultralytics diff --git a/ultralytics/__init__.py b/ultralytics/__init__.py index 601d1bb363..d178a35d8f 100644 --- a/ultralytics/__init__.py +++ b/ultralytics/__init__.py @@ -1,6 +1,6 @@ # Ultralytics YOLO ๐Ÿš€, AGPL-3.0 license -__version__ = "8.3.40" +__version__ = "8.3.48" import os diff --git a/ultralytics/cfg/__init__.py b/ultralytics/cfg/__init__.py index e4c239f3d4..b36418fbf5 100644 --- a/ultralytics/cfg/__init__.py +++ b/ultralytics/cfg/__init__.py @@ -77,7 +77,7 @@ SOLUTIONS_HELP_MSG = f""" yolo solutions SOLUTION ARGS - Where SOLUTION (optional) is one of {list(SOLUTION_MAP.keys())} + Where SOLUTION (optional) is one of {list(SOLUTION_MAP.keys())[:-1]} ARGS (optional) are any number of custom 'arg=value' pairs like 'show_in=True' that override defaults at https://docs.ultralytics.com/usage/cfg @@ -96,7 +96,7 @@ SOLUTIONS_HELP_MSG = f""" 5. Generate analytical graphs yolo solutions analytics analytics_type="pie" - 6. Track Objects Within Specific Zones + 6. Track objects within specific zones yolo solutions trackzone source="path/to/video/file.mp4" region=[(150, 150), (1130, 150), (1130, 570), (150, 570)] """ CLI_HELP_MSG = f""" @@ -125,7 +125,7 @@ CLI_HELP_MSG = f""" yolo streamlit-predict 6. Ultralytics solutions usage - yolo solutions count or in {list(SOLUTION_MAP.keys())} source="path/to/video/file.mp4" + yolo solutions count or in {list(SOLUTION_MAP.keys())[1:-1]} source="path/to/video/file.mp4" 7. Run special commands: yolo help diff --git a/ultralytics/engine/exporter.py b/ultralytics/engine/exporter.py index c0e29e7e1c..ae84cab9a1 100644 --- a/ultralytics/engine/exporter.py +++ b/ultralytics/engine/exporter.py @@ -73,7 +73,7 @@ from ultralytics.data import build_dataloader from ultralytics.data.dataset import YOLODataset from ultralytics.data.utils import check_cls_dataset, check_det_dataset from ultralytics.nn.autobackend import check_class_names, default_class_names -from ultralytics.nn.modules import C2f, Detect, RTDETRDecoder +from ultralytics.nn.modules import C2f, Classify, Detect, RTDETRDecoder from ultralytics.nn.tasks import DetectionModel, SegmentationModel, WorldModel from ultralytics.utils import ( ARM64, @@ -287,6 +287,8 @@ class Exporter: model = FXModel(model) for m in model.modules(): + if isinstance(m, Classify): + m.export = True if isinstance(m, (Detect, RTDETRDecoder)): # includes all Detect subclasses like Segment, Pose, OBB m.dynamic = self.args.dynamic m.export = True diff --git a/ultralytics/engine/model.py b/ultralytics/engine/model.py index 874613d2f1..db8d87ebc2 100644 --- a/ultralytics/engine/model.py +++ b/ultralytics/engine/model.py @@ -136,6 +136,7 @@ class Model(nn.Module): # Check if Triton Server model elif self.is_triton_model(model): self.model_name = self.model = model + self.overrides["task"] = task or "detect" # set `task=detect` if not explicitly set return # Load or create new YOLO model diff --git a/ultralytics/engine/predictor.py b/ultralytics/engine/predictor.py index c28e1895d0..c5250166e9 100644 --- a/ultralytics/engine/predictor.py +++ b/ultralytics/engine/predictor.py @@ -155,7 +155,7 @@ class BasePredictor: same_shapes = len({x.shape for x in im}) == 1 letterbox = LetterBox( self.imgsz, - auto=same_shapes and (self.model.pt or getattr(self.model, "dynamic", False)), + auto=same_shapes and (self.model.pt or (getattr(self.model, "dynamic", False) and not self.model.imx)), stride=self.model.stride, ) return [letterbox(image=x) for x in im] diff --git a/ultralytics/models/sam/predict.py b/ultralytics/models/sam/predict.py index 540d1007a3..b657ef7051 100644 --- a/ultralytics/models/sam/predict.py +++ b/ultralytics/models/sam/predict.py @@ -1105,7 +1105,7 @@ class SAM2VideoPredictor(SAM2Predictor): for obj_temp_output_dict in temp_output_dict_per_obj.values(): temp_frame_inds.update(obj_temp_output_dict[storage_key].keys()) consolidated_frame_inds[storage_key].update(temp_frame_inds) - # consolidate the temprary output across all objects on this frame + # consolidate the temporary output across all objects on this frame for frame_idx in temp_frame_inds: consolidated_out = self._consolidate_temp_output_across_obj( frame_idx, is_cond=is_cond, run_mem_encoder=True diff --git a/ultralytics/models/yolo/classify/predict.py b/ultralytics/models/yolo/classify/predict.py index b75a194984..385f75bc0d 100644 --- a/ultralytics/models/yolo/classify/predict.py +++ b/ultralytics/models/yolo/classify/predict.py @@ -53,7 +53,8 @@ class ClassificationPredictor(BasePredictor): if not isinstance(orig_imgs, list): # input images are a torch.Tensor, not a list orig_imgs = ops.convert_torch2numpy_batch(orig_imgs) + preds = preds[0] if isinstance(preds, (list, tuple)) else preds return [ - Results(orig_img, path=img_path, names=self.model.names, probs=pred.softmax(0)) + Results(orig_img, path=img_path, names=self.model.names, probs=pred) for pred, orig_img, img_path in zip(preds, orig_imgs, self.batch[0]) ] diff --git a/ultralytics/models/yolo/classify/val.py b/ultralytics/models/yolo/classify/val.py index e54f04118a..67333f2604 100644 --- a/ultralytics/models/yolo/classify/val.py +++ b/ultralytics/models/yolo/classify/val.py @@ -71,6 +71,10 @@ class ClassificationValidator(BaseValidator): self.metrics.confusion_matrix = self.confusion_matrix self.metrics.save_dir = self.save_dir + def postprocess(self, preds): + """Preprocesses the classification predictions.""" + return preds[0] if isinstance(preds, (list, tuple)) else preds + def get_stats(self): """Returns a dictionary of metrics obtained by processing targets and predictions.""" self.metrics.process(self.targets, self.pred) diff --git a/ultralytics/nn/autobackend.py b/ultralytics/nn/autobackend.py index 60b9f6389a..b6df3753ec 100644 --- a/ultralytics/nn/autobackend.py +++ b/ultralytics/nn/autobackend.py @@ -96,7 +96,7 @@ class AutoBackend(nn.Module): Initialize the AutoBackend for inference. Args: - weights (str): Path to the model weights file. Defaults to 'yolov8n.pt'. + weights (str | torch.nn.Module): Path to the model weights file or a module instance. Defaults to 'yolo11n.pt'. device (torch.device): Device to run the model on. Defaults to CPU. dnn (bool): Use OpenCV DNN module for ONNX inference. Defaults to False. data (str | Path | optional): Path to the additional data.yaml file containing class names. Optional. @@ -462,6 +462,7 @@ class AutoBackend(nn.Module): from ultralytics.utils.triton import TritonRemoteModel model = TritonRemoteModel(w) + metadata = model.metadata # Any other format (unsupported) else: @@ -700,8 +701,7 @@ class AutoBackend(nn.Module): # print(type(x), len(x)) if isinstance(x, (list, tuple)) else print(type(x), x.shape) # debug shapes if isinstance(y, (list, tuple)): if len(self.names) == 999 and (self.task == "segment" or len(y) == 2): # segments and names not defined - ip, ib = (0, 1) if len(y[0].shape) == 4 else (1, 0) # index of protos, boxes - nc = y[ib].shape[1] - y[ip].shape[3] - 4 # y = (1, 160, 160, 32), (1, 116, 8400) + nc = y[0].shape[1] - y[1].shape[1] - 4 # y = (1, 32, 160, 160), (1, 116, 8400) self.names = {i: f"class{i}" for i in range(nc)} return self.from_numpy(y[0]) if len(y) == 1 else [self.from_numpy(x) for x in y] else: diff --git a/ultralytics/nn/modules/head.py b/ultralytics/nn/modules/head.py index 25964ac2e5..0afb5fd16f 100644 --- a/ultralytics/nn/modules/head.py +++ b/ultralytics/nn/modules/head.py @@ -282,6 +282,8 @@ class Pose(Detect): class Classify(nn.Module): """YOLO classification head, i.e. x(b,c1,20,20) to x(b,c2).""" + export = False # export mode + def __init__(self, c1, c2, k=1, s=1, p=None, g=1): """Initializes YOLO classification head to transform input tensor from (b,c1,20,20) to (b,c2) shape.""" super().__init__() @@ -296,7 +298,10 @@ class Classify(nn.Module): if isinstance(x, list): x = torch.cat(x, 1) x = self.linear(self.drop(self.pool(self.conv(x)).flatten(1))) - return x + if self.training: + return x + y = x.softmax(1) # get final output + return y if self.export else (y, x) class WorldDetect(Detect): diff --git a/ultralytics/solutions/heatmap.py b/ultralytics/solutions/heatmap.py index c9dd808798..bf2903b7a9 100644 --- a/ultralytics/solutions/heatmap.py +++ b/ultralytics/solutions/heatmap.py @@ -27,12 +27,8 @@ class Heatmap(ObjectCounter): Examples: >>> from ultralytics.solutions import Heatmap >>> heatmap = Heatmap(model="yolov8n.pt", colormap=cv2.COLORMAP_JET) - >>> results = heatmap("path/to/video.mp4") - >>> for result in results: - ... print(result.speed) # Print inference speed - ... cv2.imshow("Heatmap", result.plot()) - ... if cv2.waitKey(1) & 0xFF == ord("q"): - ... break + >>> frame = cv2.imread("frame.jpg") + >>> processed_frame = heatmap.generate_heatmap(frame) """ def __init__(self, **kwargs): diff --git a/ultralytics/solutions/queue_management.py b/ultralytics/solutions/queue_management.py index ca0acb14f8..043bd371d5 100644 --- a/ultralytics/solutions/queue_management.py +++ b/ultralytics/solutions/queue_management.py @@ -27,10 +27,13 @@ class QueueManager(BaseSolution): display_output: Displays the processed output. Examples: - >>> queue_manager = QueueManager(source="video.mp4", region=[100, 100, 200, 200, 300, 300]) - >>> for frame in video_stream: - ... processed_frame = queue_manager.process_queue(frame) - ... cv2.imshow("Queue Management", processed_frame) + >>> cap = cv2.VideoCapture("Path/to/video/file.mp4") + >>> queue_manager = QueueManager(region=[100, 100, 200, 200, 300, 300]) + >>> while cap.isOpened(): + >>> success, im0 = cap.read() + >>> if not success: + >>> break + >>> out = queue.process_queue(im0) """ def __init__(self, **kwargs): diff --git a/ultralytics/trackers/utils/matching.py b/ultralytics/trackers/utils/matching.py index b062d938e5..4a3a420af4 100644 --- a/ultralytics/trackers/utils/matching.py +++ b/ultralytics/trackers/utils/matching.py @@ -13,7 +13,7 @@ try: except (ImportError, AssertionError, AttributeError): from ultralytics.utils.checks import check_requirements - check_requirements("lapx>=0.5.2") # update to lap package from https://github.com/rathaROG/lapx + check_requirements("lap>=0.5.12") # https://github.com/gatagat/lap import lap diff --git a/ultralytics/utils/checks.py b/ultralytics/utils/checks.py index 3a8201a54e..fe858eb023 100644 --- a/ultralytics/utils/checks.py +++ b/ultralytics/utils/checks.py @@ -669,8 +669,22 @@ def check_amp(model): from ultralytics.utils.torch_utils import autocast device = next(model.parameters()).device # get model device + prefix = colorstr("AMP: ") if device.type in {"cpu", "mps"}: return False # AMP only used on CUDA devices + else: + # GPUs that have issues with AMP + pattern = re.compile( + r"(nvidia|geforce|quadro|tesla).*?(1660|1650|1630|t400|t550|t600|t1000|t1200|t2000|k40m)", re.IGNORECASE + ) + + gpu = torch.cuda.get_device_name(device) + if bool(pattern.search(gpu)): + LOGGER.warning( + f"{prefix}checks failed โŒ. AMP training on {gpu} GPU may cause " + f"NaN losses or zero-mAP results, so AMP will be disabled during training." + ) + return False def amp_allclose(m, im): """All close FP32 vs AMP results.""" @@ -683,7 +697,6 @@ def check_amp(model): return a.shape == b.shape and torch.allclose(a, b.float(), atol=0.5) # close to 0.5 absolute tolerance im = ASSETS / "bus.jpg" # image to check - prefix = colorstr("AMP: ") LOGGER.info(f"{prefix}running Automatic Mixed Precision (AMP) checks...") warning_msg = "Setting 'amp=True'. If you experience zero-mAP or NaN losses you can disable AMP with amp=False." try: diff --git a/ultralytics/utils/loss.py b/ultralytics/utils/loss.py index 7395286899..c6557df4c0 100644 --- a/ultralytics/utils/loss.py +++ b/ultralytics/utils/loss.py @@ -604,6 +604,7 @@ class v8ClassificationLoss: def __call__(self, preds, batch): """Compute the classification loss between predictions and true labels.""" + preds = preds[1] if isinstance(preds, (list, tuple)) else preds loss = F.cross_entropy(preds, batch["cls"], reduction="mean") loss_items = loss.detach() return loss, loss_items diff --git a/ultralytics/utils/ops.py b/ultralytics/utils/ops.py index ac53546ed1..9a05b3a8b5 100644 --- a/ultralytics/utils/ops.py +++ b/ultralytics/utils/ops.py @@ -400,7 +400,7 @@ def xyxy2xywh(x): y (np.ndarray | torch.Tensor): The bounding box coordinates in (x, y, width, height) format. """ assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}" - y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x) # faster than clone/copy + y = empty_like(x) # faster than clone/copy y[..., 0] = (x[..., 0] + x[..., 2]) / 2 # x center y[..., 1] = (x[..., 1] + x[..., 3]) / 2 # y center y[..., 2] = x[..., 2] - x[..., 0] # width @@ -420,7 +420,7 @@ def xywh2xyxy(x): y (np.ndarray | torch.Tensor): The bounding box coordinates in (x1, y1, x2, y2) format. """ assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}" - y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x) # faster than clone/copy + y = empty_like(x) # faster than clone/copy xy = x[..., :2] # centers wh = x[..., 2:] / 2 # half width-height y[..., :2] = xy - wh # top left xy @@ -443,7 +443,7 @@ def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0): x1,y1 is the top-left corner, x2,y2 is the bottom-right corner of the bounding box. """ assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}" - y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x) # faster than clone/copy + y = empty_like(x) # faster than clone/copy y[..., 0] = w * (x[..., 0] - x[..., 2] / 2) + padw # top left x y[..., 1] = h * (x[..., 1] - x[..., 3] / 2) + padh # top left y y[..., 2] = w * (x[..., 0] + x[..., 2] / 2) + padw # bottom right x @@ -469,7 +469,7 @@ def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0): if clip: x = clip_boxes(x, (h - eps, w - eps)) assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}" - y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x) # faster than clone/copy + y = empty_like(x) # faster than clone/copy y[..., 0] = ((x[..., 0] + x[..., 2]) / 2) / w # x center y[..., 1] = ((x[..., 1] + x[..., 3]) / 2) / h # y center y[..., 2] = (x[..., 2] - x[..., 0]) / w # width @@ -625,8 +625,9 @@ def resample_segments(segments, n=1000): """ for i, s in enumerate(segments): s = np.concatenate((s, s[0:1, :]), axis=0) - x = np.linspace(0, len(s) - 1, n) + x = np.linspace(0, len(s) - 1, n - len(s) if len(s) < n else n) xp = np.arange(len(s)) + x = np.insert(x, np.searchsorted(x, xp), xp) if len(s) < n else x segments[i] = ( np.concatenate([np.interp(x, xp, s[:, i]) for i in range(2)], dtype=np.float32).reshape(2, -1).T ) # segment xy @@ -837,3 +838,10 @@ def clean_str(s): (str): a string with special characters replaced by an underscore _ """ return re.sub(pattern="[|@#!ยกยท$โ‚ฌ%&()=?ยฟ^*;:,ยจยด><+]", repl="_", string=s) + + +def empty_like(x): + """Creates empty torch.Tensor or np.ndarray with same shape as input and float32 dtype.""" + return ( + torch.empty_like(x, dtype=torch.float32) if isinstance(x, torch.Tensor) else np.empty_like(x, dtype=np.float32) + ) diff --git a/ultralytics/utils/triton.py b/ultralytics/utils/triton.py index 3f873a6faf..cc53ed5714 100644 --- a/ultralytics/utils/triton.py +++ b/ultralytics/utils/triton.py @@ -66,6 +66,7 @@ class TritonRemoteModel: self.np_input_formats = [type_map[x] for x in self.input_formats] self.input_names = [x["name"] for x in config["input"]] self.output_names = [x["name"] for x in config["output"]] + self.metadata = eval(config.get("parameters", {}).get("metadata", {}).get("string_value", "None")) def __call__(self, *inputs: np.ndarray) -> List[np.ndarray]: """