Merge branch 'main' into mkdocs-exclude

mkdocs-exclude
Glenn Jocher 6 months ago committed by GitHub
commit ad1d146bac
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 50
      .github/workflows/ci.yaml
  2. 2
      .github/workflows/cla.yml
  3. 99
      .github/workflows/docs.yml
  4. 155
      .github/workflows/publish.yml
  5. 2
      README.md
  6. 2
      README.zh-CN.md
  7. 8
      docs/build_docs.py
  8. 20
      docs/en/datasets/classify/cifar10.md
  9. 29
      docs/en/datasets/classify/imagewoof.md
  10. 2
      docs/en/datasets/detect/roboflow-100.md
  11. 40
      docs/en/datasets/detect/visdrone.md
  12. 4
      docs/en/datasets/explorer/explorer.ipynb
  13. 32
      docs/en/datasets/segment/crack-seg.md
  14. 30
      docs/en/datasets/segment/package-seg.md
  15. 6
      docs/en/guides/kfold-cross-validation.md
  16. 11
      docs/en/guides/parking-management.md
  17. 2
      docs/en/guides/sahi-tiled-inference.md
  18. 12
      docs/en/hub/datasets.md
  19. 1
      docs/en/hub/index.md
  20. 1
      docs/en/index.md
  21. 2
      docs/en/integrations/ibm-watsonx.md
  22. 5
      docs/en/integrations/jupyterlab.md
  23. 139
      docs/en/integrations/tensorboard.md
  24. 2
      docs/en/robots.txt
  25. 1
      examples/YOLOv8-Action-Recognition/action_recognition.py
  26. 2
      examples/YOLOv8-CPP-Inference/main.cpp
  27. 3
      examples/YOLOv8-ONNXRuntime-CPP/CMakeLists.txt
  28. 13
      examples/YOLOv8-ONNXRuntime-CPP/README.md
  29. 2
      examples/YOLOv8-ONNXRuntime/main.py
  30. 7
      examples/YOLOv8-OpenCV-int8-tflite-Python/main.py
  31. 2
      examples/YOLOv8-Region-Counter/yolov8_region_counter.py
  32. 22
      examples/YOLOv8-Segmentation-ONNXRuntime-Python/main.py
  33. 2
      examples/heatmaps.ipynb
  34. 2
      examples/hub.ipynb
  35. 2
      examples/object_counting.ipynb
  36. 2
      examples/object_tracking.ipynb
  37. 2
      examples/tutorial.ipynb
  38. 4
      mkdocs.yml
  39. 5
      pyproject.toml
  40. 4
      tests/test_solutions.py
  41. 2
      ultralytics/__init__.py
  42. 30
      ultralytics/cfg/__init__.py
  43. 2
      ultralytics/data/annotator.py
  44. 116
      ultralytics/data/augment.py
  45. 6
      ultralytics/data/base.py
  46. 15
      ultralytics/data/converter.py
  47. 30
      ultralytics/data/explorer/explorer.py
  48. 2
      ultralytics/data/loaders.py
  49. 14
      ultralytics/data/split_dota.py
  50. 16
      ultralytics/data/utils.py
  51. 6
      ultralytics/engine/exporter.py
  52. 105
      ultralytics/engine/model.py
  53. 2
      ultralytics/engine/predictor.py
  54. 60
      ultralytics/engine/results.py
  55. 19
      ultralytics/engine/trainer.py
  56. 15
      ultralytics/engine/tuner.py
  57. 8
      ultralytics/engine/validator.py
  58. 10
      ultralytics/hub/__init__.py
  59. 8
      ultralytics/hub/auth.py
  60. 50
      ultralytics/hub/session.py
  61. 23
      ultralytics/models/fastsam/model.py
  62. 4
      ultralytics/models/fastsam/predict.py
  63. 1
      ultralytics/models/fastsam/utils.py
  64. 8
      ultralytics/models/nas/model.py
  65. 3
      ultralytics/models/nas/predict.py
  66. 2
      ultralytics/models/nas/val.py
  67. 2
      ultralytics/models/rtdetr/predict.py
  68. 2
      ultralytics/models/rtdetr/train.py
  69. 2
      ultralytics/models/rtdetr/val.py
  70. 22
      ultralytics/models/sam/model.py
  71. 11
      ultralytics/models/sam/modules/decoders.py
  72. 10
      ultralytics/models/sam/modules/sam.py
  73. 2
      ultralytics/models/sam/modules/tiny_encoder.py
  74. 2
      ultralytics/models/sam/modules/transformer.py
  75. 2
      ultralytics/models/sam/modules/utils.py
  76. 20
      ultralytics/models/sam/predict.py
  77. 46
      ultralytics/models/utils/loss.py
  78. 6
      ultralytics/models/utils/ops.py
  79. 2
      ultralytics/models/yolo/classify/predict.py
  80. 2
      ultralytics/models/yolo/classify/train.py
  81. 2
      ultralytics/models/yolo/classify/val.py
  82. 2
      ultralytics/models/yolo/detect/predict.py
  83. 2
      ultralytics/models/yolo/detect/train.py
  84. 2
      ultralytics/models/yolo/detect/val.py
  85. 8
      ultralytics/models/yolo/model.py
  86. 2
      ultralytics/models/yolo/obb/predict.py
  87. 2
      ultralytics/models/yolo/obb/train.py
  88. 4
      ultralytics/models/yolo/obb/val.py
  89. 2
      ultralytics/models/yolo/pose/predict.py
  90. 2
      ultralytics/models/yolo/pose/train.py
  91. 2
      ultralytics/models/yolo/pose/val.py
  92. 2
      ultralytics/models/yolo/segment/predict.py
  93. 2
      ultralytics/models/yolo/segment/train.py
  94. 2
      ultralytics/models/yolo/segment/val.py
  95. 2
      ultralytics/models/yolo/world/train.py
  96. 4
      ultralytics/nn/autobackend.py
  97. 4
      ultralytics/nn/modules/__init__.py
  98. 28
      ultralytics/nn/modules/block.py
  99. 4
      ultralytics/nn/modules/conv.py
  100. 12
      ultralytics/nn/modules/head.py
  101. Some files were not shown because too many files have changed in this diff Show More

@ -38,56 +38,6 @@ on:
type: boolean
jobs:
Docs:
# TODO: break this job out into it's own action with pull_request_target to allow changes to user forks
if: github.repository == 'ultralytics/ultralytics'
runs-on: macos-14
steps:
- name: Checkout Repository
uses: actions/checkout@v4
with:
repository: ${{ github.event.pull_request.head.repo.full_name || github.repository }}
token: ${{ secrets.GITHUB_TOKEN }}
ref: ${{ github.head_ref || github.ref }}
fetch-depth: 0
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.x"
cache: "pip" # caching pip dependencies
- name: Install Dependencies
run: pip install tqdm mkdocs-material "mkdocstrings[python]" mkdocs-jupyter mkdocs-redirects mkdocs-ultralytics-plugin mkdocs-macros-plugin mkdocs-exclude
- name: Update Docs Reference Section
run: python docs/build_reference.py
- name: Commit and Push Reference Section Changes
run: |
git pull origin ${{ github.head_ref || github.ref }}
git add .
git reset HEAD -- .github/workflows/ # workflow changes are not permitted with default token
git config --global user.name "UltralyticsAssistant"
git config --global user.email "web@ultralytics.com"
if ! git diff --staged --quiet; then
git commit -m "Auto-update Ultralytics Docs Reference Section by https://ultralytics.com/actions"
git push
else
echo "No changes to commit"
fi
- name: Build Docs and Check for Warnings
run: python docs/build_docs.py
- name: Commit and Push Docs changes
continue-on-error: true
if: always() && github.event_name == 'pull_request'
run: |
git pull origin ${{ github.head_ref || github.ref }}
git add --update # only add updated files
git reset HEAD -- .github/workflows/ # workflow changes are not permitted with default token
if ! git diff --staged --quiet; then
git commit -m "Auto-update Ultralytics Docs by https://ultralytics.com/actions"
git push
else
echo "No changes to commit"
fi
HUB:
if: github.repository == 'ultralytics/ultralytics' && (github.event_name == 'schedule' || github.event_name == 'push' || (github.event_name == 'workflow_dispatch' && github.event.inputs.hub == 'true'))
runs-on: ${{ matrix.os }}

@ -26,7 +26,7 @@ jobs:
steps:
- name: CLA Assistant
if: (github.event.comment.body == 'recheck' || github.event.comment.body == 'I have read the CLA Document and I sign the CLA') || github.event_name == 'pull_request_target'
uses: contributor-assistant/github-action@v2.4.0
uses: contributor-assistant/github-action@v2.5.1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# Must be repository secret PAT

@ -0,0 +1,99 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
# Test and publish docs to https://docs.ultralytics.com
# Ignores the following Docs rules to match Google-style docstrings:
# D100: Missing docstring in public module
# D104: Missing docstring in public package
# D203: 1 blank line required before class docstring
# D205: 1 blank line required between summary line and description
# D212: Multi-line docstring summary should start at the first line
# D213: Multi-line docstring summary should start at the second line
# D401: First line of docstring should be in imperative mood
# D406: Section name should end with a newline
# D407: Missing dashed underline after section
# D413: Missing blank line after last section
name: Publish Docs
on:
push:
branches: [main]
pull_request_target:
branches: [main]
workflow_dispatch:
jobs:
Docs:
if: github.repository == 'ultralytics/ultralytics'
runs-on: macos-14
steps:
- name: Git config
run: |
git config --global user.name "UltralyticsAssistant"
git config --global user.email "web@ultralytics.com"
- name: Checkout Repository
uses: actions/checkout@v4
with:
repository: ${{ github.event.pull_request.head.repo.full_name || github.repository }}
token: ${{ secrets.GITHUB_TOKEN }}
ref: ${{ github.head_ref || github.ref }}
fetch-depth: 0
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.x"
cache: "pip" # caching pip dependencies
- name: Install Dependencies
run: pip install ruff black tqdm mkdocs-material "mkdocstrings[python]" mkdocs-jupyter mkdocs-redirects mkdocs-ultralytics-plugin mkdocs-macros-plugin
- name: Update Docs Reference Section and Push Changes
if: github.event_name == 'pull_request_target'
run: |
python docs/build_reference.py
ruff check --fix --fix-unsafe --select D --ignore=D100,D104,D203,D205,D212,D213,D401,D406,D407,D413 . || true
git pull origin ${{ github.head_ref || github.ref }}
git add .
git reset HEAD -- .github/workflows/ # workflow changes are not permitted with default token
if ! git diff --staged --quiet; then
git commit -m "Auto-update Ultralytics Docs Reference by https://ultralytics.com/actions"
git push
else
echo "No changes to commit"
fi
- name: Ruff checks
run: ruff check --select D --ignore=D100,D104,D203,D205,D212,D213,D401,D406,D407,D413 .
- name: Build Docs and Check for Warnings
run: |
export JUPYTER_PLATFORM_DIRS=1
python docs/build_docs.py
- name: Commit and Push Docs changes
continue-on-error: true
if: always() && github.event_name == 'pull_request_target'
run: |
git pull origin ${{ github.head_ref || github.ref }}
git add --update # only add updated files
git reset HEAD -- .github/workflows/ # workflow changes are not permitted with default token
if ! git diff --staged --quiet; then
git commit -m "Auto-update Ultralytics Docs by https://ultralytics.com/actions"
git push
else
echo "No changes to commit"
fi
- name: Publish Docs to https://docs.ultralytics.com
if: github.event_name == 'push'
env:
PERSONAL_ACCESS_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN }}
INDEXNOW_KEY: ${{ secrets.INDEXNOW_KEY_DOCS }}
run: |
git clone https://github.com/ultralytics/docs.git docs-repo
cd docs-repo
git checkout gh-pages || git checkout -b gh-pages
rm -rf *
cp -R ../site/* .
echo "$INDEXNOW_KEY" > "$INDEXNOW_KEY.txt"
git add .
if git diff --staged --quiet; then
echo "No changes to commit"
else
LATEST_HASH=$(git rev-parse --short=7 HEAD)
git commit -m "Update Docs for 'ultralytics ${{ steps.check_pypi.outputs.version }} - $LATEST_HASH'"
git push https://$PERSONAL_ACCESS_TOKEN@github.com/ultralytics/docs.git gh-pages
fi

@ -1,7 +1,7 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
# Publish pip package to PyPI https://pypi.org/project/ultralytics/ and Docs to https://docs.ultralytics.com
# Publish pip package to PyPI https://pypi.org/project/ultralytics/
name: Publish to PyPI and Deploy Docs
name: Publish to PyPI
on:
push:
@ -11,20 +11,17 @@ on:
pypi:
type: boolean
description: Publish to PyPI
docs:
type: boolean
description: Deploy Docs
jobs:
publish:
if: github.repository == 'ultralytics/ultralytics' && github.actor == 'glenn-jocher'
name: Publish
runs-on: macos-14
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: "0" # pulls all commits (needed correct last updated dates in Docs)
token: ${{ secrets.PERSONAL_ACCESS_TOKEN || secrets.GITHUB_TOKEN }} # use your PAT here
- name: Git config
run: |
git config --global user.name "UltralyticsAssistant"
@ -32,45 +29,72 @@ jobs:
- name: Set up Python environment
uses: actions/setup-python@v5
with:
python-version: "3.11"
python-version: "3.x"
cache: "pip" # caching pip dependencies
- name: Install dependencies
run: |
python -m pip install --upgrade pip wheel build twine
pip install -e ".[dev]" openai --extra-index-url https://download.pytorch.org/whl/cpu
python -m pip install --upgrade pip wheel
pip install openai requests build twine toml
- name: Check PyPI version
shell: python
run: |
import os
import ultralytics
from ultralytics.utils.checks import check_latest_pypi_version
latest_pypi_version = check_latest_pypi_version()
v_local = tuple(map(int, ultralytics.__version__.split('.')))
v_pypi = tuple(map(int, latest_pypi_version.split('.')))
print(f'Local version is {v_local}')
print(f'PyPI version is {v_pypi}')
d = [a - b for a, b in zip(v_local, v_pypi)] # diff
increment_patch = (d[0] == d[1] == 0) and (0 < d[2] < 3) # publish if patch version increments by 1 or 2
increment_minor = (d[0] == 0) and (d[1] == 1) and v_local[2] == 0 # publish if minor version increments
increment = increment_patch or increment_minor
os.system(f'echo "increment={increment}" >> $GITHUB_OUTPUT')
os.system(f'echo "version={ultralytics.__version__}" >> $GITHUB_OUTPUT')
os.system(f'echo "previous_version={latest_pypi_version}" >> $GITHUB_OUTPUT')
if increment:
print('Local version is higher than PyPI version. Publishing new version to PyPI ✅.')
import requests
import toml
# Load version and package name from pyproject.toml
pyproject = toml.load('pyproject.toml')
package_name = pyproject['project']['name']
local_version = pyproject['project'].get('version', 'dynamic')
# If version is dynamic, extract it from the specified file
if local_version == 'dynamic':
version_attr = pyproject['tool']['setuptools']['dynamic']['version']['attr']
module_path, attr_name = version_attr.rsplit('.', 1)
with open(f"{module_path.replace('.', '/')}/__init__.py") as f:
local_version = next(line.split('=')[1].strip().strip("'\"") for line in f if line.startswith(attr_name))
print(f"Local Version: {local_version}")
# Get online version from PyPI
response = requests.get(f"https://pypi.org/pypi/{package_name}/json")
online_version = response.json()['info']['version'] if response.status_code == 200 else None
print(f"Online Version: {online_version or 'Not Found'}")
# Determine if a new version should be published
publish = False
if online_version:
local_ver = tuple(map(int, local_version.split('.')))
online_ver = tuple(map(int, online_version.split('.')))
major_diff = local_ver[0] - online_ver[0]
minor_diff = local_ver[1] - online_ver[1]
patch_diff = local_ver[2] - online_ver[2]
publish = (
(major_diff == 0 and minor_diff == 0 and 0 < patch_diff <= 2) or
(major_diff == 0 and minor_diff == 1 and local_ver[2] == 0) or
(major_diff == 1 and local_ver[1] == 0 and local_ver[2] == 0)
)
else:
publish = True # First release
os.system(f'echo "increment={publish}" >> $GITHUB_OUTPUT')
os.system(f'echo "version={local_version}" >> $GITHUB_OUTPUT')
os.system(f'echo "previous_version={online_version or "N/A"}" >> $GITHUB_OUTPUT')
if publish:
print('Ready to publish new version to PyPI ✅.')
id: check_pypi
- name: Publish new tag
if: (github.event_name == 'push' || github.event.inputs.pypi == 'true') && steps.check_pypi.outputs.increment == 'True'
run: |
git tag -a "v${{ steps.check_pypi.outputs.version }}" -m "$(git log -1 --pretty=%B)" || true # i.e. "v0.1.2 commit message"
git push origin "v${{ steps.check_pypi.outputs.version }}" || true
git tag -a "v${{ steps.check_pypi.outputs.version }}" -m "$(git log -1 --pretty=%B)" # i.e. "v0.1.2 commit message"
git push origin "v${{ steps.check_pypi.outputs.version }}"
- name: Publish new release
if: (github.event_name == 'push' || github.event.inputs.pypi == 'true') && steps.check_pypi.outputs.increment == 'True'
env:
OPENAI_AZURE_API_KEY: ${{ secrets.OPENAI_AZURE_API_KEY }}
OPENAI_AZURE_ENDPOINT: ${{ secrets.OPENAI_AZURE_ENDPOINT }}
OPENAI_AZURE_API_VERSION: ${{ secrets.OPENAI_AZURE_API_VERSION }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
GITHUB_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN || secrets.GITHUB_TOKEN }}
CURRENT_TAG: ${{ steps.check_pypi.outputs.version }}
PREVIOUS_TAG: ${{ steps.check_pypi.outputs.previous_version }}
shell: python
@ -82,26 +106,18 @@ jobs:
import subprocess
# Retrieve environment variables
OPENAI_AZURE_API_KEY = os.getenv('OPENAI_AZURE_API_KEY')
OPENAI_AZURE_ENDPOINT = os.getenv('OPENAI_AZURE_ENDPOINT')
OPENAI_AZURE_API_VERSION = os.getenv('OPENAI_AZURE_API_VERSION')
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
GITHUB_TOKEN = os.getenv('GITHUB_TOKEN')
CURRENT_TAG = os.getenv('CURRENT_TAG')
PREVIOUS_TAG = os.getenv('PREVIOUS_TAG')
# Check for required environment variables
if not all([OPENAI_AZURE_API_KEY, OPENAI_AZURE_ENDPOINT, OPENAI_AZURE_API_VERSION, GITHUB_TOKEN, CURRENT_TAG, PREVIOUS_TAG]):
print(OPENAI_AZURE_API_KEY)
print(OPENAI_AZURE_ENDPOINT)
print(OPENAI_AZURE_API_VERSION)
print(GITHUB_TOKEN)
print(CURRENT_TAG)
print(PREVIOUS_TAG)
if not all([OPENAI_API_KEY, GITHUB_TOKEN, CURRENT_TAG, PREVIOUS_TAG]):
raise ValueError("One or more required environment variables are missing.")
latest_tag = f"v{CURRENT_TAG}"
previous_tag = f"v{PREVIOUS_TAG}"
repo = 'ultralytics/ultralytics'
repo = os.getenv('GITHUB_REPOSITORY')
headers = {"Authorization": f"token {GITHUB_TOKEN}", "Accept": "application/vnd.github.v3.diff"}
# Get the diff between the tags
@ -109,29 +125,23 @@ jobs:
response = requests.get(url, headers=headers)
diff = response.text if response.status_code == 200 else f"Failed to get diff: {response.content}"
# Set up client
client = openai.AzureOpenAI(
api_key=OPENAI_AZURE_API_KEY,
api_version=OPENAI_AZURE_API_VERSION,
azure_endpoint=OPENAI_AZURE_ENDPOINT
)
# Get summary
messages = [
{
"role": "system",
"content": "You are an Ultralytics AI assistant skilled in software development and technical communication. Your task is to summarize GitHub releases from Ultralytics in a way that is detailed, accurate, and understandable to both expert developers and non-expert users. Focus on highlighting the key changes and their impact in simple and intuitive terms."
"content": "You are an Ultralytics AI assistant skilled in software development and technical communication. Your task is to summarize GitHub releases in a way that is detailed, accurate, and understandable to both expert developers and non-expert users. Focus on highlighting the key changes and their impact in simple and intuitive terms."
},
{
"role": "user",
"content": f"Summarize the updates made in the [Ultralytics](https://ultralytics.com) '{latest_tag}' tag, focusing on major changes, their purpose, and potential impact. Keep the summary clear and suitable for a broad audience. Add emojis to enliven the summary. Reply directly with a summary along these example guidelines, though feel free to adjust as appropriate:\n\n"
"content": f"Summarize the updates made in the '{latest_tag}' tag, focusing on major changes, their purpose, and potential impact. Keep the summary clear and suitable for a broad audience. Add emojis to enliven the summary. Reply directly with a summary along these example guidelines, though feel free to adjust as appropriate:\n\n"
f"## 🌟 Summary (single-line synopsis)\n"
f"## 📊 Key Changes (bullet points highlighting any major changes)\n"
f"## 🎯 Purpose & Impact (bullet points explaining any benefits and potential impact to users)\n"
f"\n\nHere's the release diff:\n\n{diff[:300000]}",
}
]
completion = client.chat.completions.create(model="gpt-4o-2024-05-13", messages=messages)
client = openai.OpenAI(api_key=OPENAI_API_KEY)
completion = client.chat.completions.create(model="gpt-4o-2024-08-06", messages=messages)
summary = completion.choices[0].message.content.strip()
# Get the latest commit message
@ -161,37 +171,20 @@ jobs:
run: |
python -m build
python -m twine upload dist/* -u __token__ -p $PYPI_TOKEN
- name: Deploy Docs
continue-on-error: true
if: (github.event_name == 'push' || github.event.inputs.docs == 'true') && github.repository == 'ultralytics/ultralytics' && github.actor == 'glenn-jocher'
env:
PERSONAL_ACCESS_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN }}
INDEXNOW_KEY: ${{ secrets.INDEXNOW_KEY_DOCS }}
run: |
pip install black
export JUPYTER_PLATFORM_DIRS=1
python docs/build_docs.py
git clone https://github.com/ultralytics/docs.git docs-repo
cd docs-repo
git checkout gh-pages || git checkout -b gh-pages
rm -rf *
cp -R ../site/* .
echo "$INDEXNOW_KEY" > "$INDEXNOW_KEY.txt"
git add .
LATEST_HASH=$(git rev-parse --short=7 HEAD)
git commit -m "Update Docs for 'ultralytics ${{ steps.check_pypi.outputs.version }} - $LATEST_HASH'"
git push https://$PERSONAL_ACCESS_TOKEN@github.com/ultralytics/docs.git gh-pages
- name: Extract PR Details
env:
GH_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN || secrets.GITHUB_TOKEN }}
run: |
if [ "${{ github.event_name }}" = "pull_request" ]; then
PR_JSON=$(curl -s -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" https://api.github.com/repos/${{ github.repository }}/pulls/${{ github.event.pull_request.number }})
# Check if the event is a pull request or pull_request_target
if [ "${{ github.event_name }}" = "pull_request" ] || [ "${{ github.event_name }}" = "pull_request_target" ]; then
PR_NUMBER=${{ github.event.pull_request.number }}
PR_TITLE=$(echo $PR_JSON | jq -r '.title')
PR_TITLE=$(gh pr view $PR_NUMBER --json title --jq '.title')
else
# Use gh to find the PR associated with the commit
COMMIT_SHA=${{ github.event.after }}
PR_JSON=$(curl -s -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" "https://api.github.com/search/issues?q=repo:${{ github.repository }}+is:pr+is:merged+sha:$COMMIT_SHA")
PR_NUMBER=$(echo $PR_JSON | jq -r '.items[0].number')
PR_TITLE=$(echo $PR_JSON | jq -r '.items[0].title')
PR_JSON=$(gh pr list --search "${COMMIT_SHA}" --state merged --json number,title --jq '.[0]')
PR_NUMBER=$(echo $PR_JSON | jq -r '.number')
PR_TITLE=$(echo $PR_JSON | jq -r '.title')
fi
echo "PR_NUMBER=$PR_NUMBER" >> $GITHUB_ENV
echo "PR_TITLE=$PR_TITLE" >> $GITHUB_ENV
@ -200,7 +193,7 @@ jobs:
uses: slackapi/slack-github-action@v1.26.0
with:
payload: |
{"text": "<!channel> GitHub Actions success for ${{ github.workflow }} ✅\n\n\n*Repository:* https://github.com/${{ github.repository }}\n*Action:* https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\n*Author:* ${{ github.actor }}\n*Event:* NEW 'ultralytics ${{ steps.check_pypi.outputs.version }}' pip package published 😃\n*Job Status:* ${{ job.status }}\n*Pull Request:* <https://github.com/${{ github.repository }}/pull/${{ env.PR_NUMBER }}> ${{ env.PR_TITLE }}\n"}
{"text": "<!channel> GitHub Actions success for ${{ github.workflow }} ✅\n\n\n*Repository:* https://github.com/${{ github.repository }}\n*Action:* https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\n*Author:* ${{ github.actor }}\n*Event:* NEW '${{ github.repository }} v${{ steps.check_pypi.outputs.version }}' pip package published 😃\n*Job Status:* ${{ job.status }}\n*Pull Request:* <https://github.com/${{ github.repository }}/pull/${{ env.PR_NUMBER }}> ${{ env.PR_TITLE }}\n"}
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_YOLO }}
- name: Notify on Slack (Failure)

@ -4,7 +4,7 @@
<img width="100%" src="https://raw.githubusercontent.com/ultralytics/assets/main/yolov8/banner-yolov8.png" alt="YOLO Vision banner"></a>
</p>
[中文](https://docs.ultralytics.com/zh/) | [한국어](https://docs.ultralytics.com/ko/) | [日本語](https://docs.ultralytics.com/ja/) | [Русский](https://docs.ultralytics.com/ru/) | [Deutsch](https://docs.ultralytics.com/de/) | [Français](https://docs.ultralytics.com/fr/) | [Español](https://docs.ultralytics.com/es/) | [Português](https://docs.ultralytics.com/pt/) | [Türkçe](https://docs.ultralytics.com/tr/) | [Tiếng Việt](https://docs.ultralytics.com/vi/) | [ि](https://docs.ultralytics.com/hi/) | [العربية](https://docs.ultralytics.com/ar/) <br>
[中文](https://docs.ultralytics.com/zh/) | [한국어](https://docs.ultralytics.com/ko/) | [日本語](https://docs.ultralytics.com/ja/) | [Русский](https://docs.ultralytics.com/ru/) | [Deutsch](https://docs.ultralytics.com/de/) | [Français](https://docs.ultralytics.com/fr/) | [Español](https://docs.ultralytics.com/es/) | [Português](https://docs.ultralytics.com/pt/) | [Türkçe](https://docs.ultralytics.com/tr/) | [Tiếng Việt](https://docs.ultralytics.com/vi/) | [العربية](https://docs.ultralytics.com/ar/) <br>
<div>
<a href="https://github.com/ultralytics/ultralytics/actions/workflows/ci.yaml"><img src="https://github.com/ultralytics/ultralytics/actions/workflows/ci.yaml/badge.svg" alt="Ultralytics CI"></a>

@ -4,7 +4,7 @@
<img width="100%" src="https://raw.githubusercontent.com/ultralytics/assets/main/yolov8/banner-yolov8.png" alt="YOLO Vision banner"></a>
</p>
[中文](https://docs.ultralytics.com/zh/) | [한국어](https://docs.ultralytics.com/ko/) | [日本語](https://docs.ultralytics.com/ja/) | [Русский](https://docs.ultralytics.com/ru/) | [Deutsch](https://docs.ultralytics.com/de/) | [Français](https://docs.ultralytics.com/fr/) | [Español](https://docs.ultralytics.com/es/) | [Português](https://docs.ultralytics.com/pt/) | [Türkçe](https://docs.ultralytics.com/tr/) | [Tiếng Việt](https://docs.ultralytics.com/vi/) | [ि](https://docs.ultralytics.com/hi/) | [العربية](https://docs.ultralytics.com/ar/) <br>
[中文](https://docs.ultralytics.com/zh/) | [한국어](https://docs.ultralytics.com/ko/) | [日本語](https://docs.ultralytics.com/ja/) | [Русский](https://docs.ultralytics.com/ru/) | [Deutsch](https://docs.ultralytics.com/de/) | [Français](https://docs.ultralytics.com/fr/) | [Español](https://docs.ultralytics.com/es/) | [Português](https://docs.ultralytics.com/pt/) | [Türkçe](https://docs.ultralytics.com/tr/) | [Tiếng Việt](https://docs.ultralytics.com/vi/) | [العربية](https://docs.ultralytics.com/ar/) <br>
<div>
<a href="https://github.com/ultralytics/ultralytics/actions/workflows/ci.yaml"><img src="https://github.com/ultralytics/ultralytics/actions/workflows/ci.yaml/badge.svg" alt="Ultralytics CI"></a>

@ -1,8 +1,8 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
"""
This Python script is designed to automate the building and post-processing of MkDocs documentation, particularly for
projects with multilingual content. It streamlines the workflow for generating localized versions of the documentation
and updating HTML links to ensure they are correctly formatted.
Automates the building and post-processing of MkDocs documentation, particularly for projects with multilingual content.
It streamlines the workflow for generating localized versions of the documentation and updating HTML links to ensure
they are correctly formatted.
Key Features:
- Automated building of MkDocs documentation: The script compiles both the main documentation and
@ -64,7 +64,6 @@ def prepare_docs_markdown(clone_repos=True):
def update_page_title(file_path: Path, new_title: str):
"""Update the title of an HTML file."""
# Read the content of the file
with open(file_path, encoding="utf-8") as file:
content = file.read()
@ -153,7 +152,6 @@ def update_markdown_files(md_filepath: Path):
def update_docs_html():
"""Updates titles, edit links, head sections, and converts plaintext links in HTML documentation."""
# Update 404 titles
update_page_title(SITE / "404.html", new_title="Ultralytics Docs - Not Found")

@ -153,14 +153,18 @@ Each subset comprises images categorized into 10 classes, with their annotations
If you use the CIFAR-10 dataset in your research or development projects, make sure to cite the following paper:
```bibtex
@TECHREPORT{Krizhevsky09learningmultiple,
author={Alex Krizhevsky},
title={Learning multiple layers of features from tiny images},
institution={},
year={2009}
}
```
!!! Quote ""
=== "BibTeX"
```bibtex
@TECHREPORT{Krizhevsky09learningmultiple,
author={Alex Krizhevsky},
title={Learning multiple layers of features from tiny images},
institution={},
year={2009}
}
```
Acknowledging the dataset's creators helps support continued research and development in the field. For more details, see the [citations and acknowledgments](#citations-and-acknowledgments) section.

@ -59,18 +59,29 @@ ImageWoof dataset comes in three different sizes to accommodate various research
To use these variants in your training, simply replace 'imagewoof' in the dataset argument with 'imagewoof320' or 'imagewoof160'. For example:
```python
from ultralytics import YOLO
!!! Example "Example"
# Load a model
model = YOLO("yolov8n-cls.pt") # load a pretrained model (recommended for training)
=== "Python"
```python
from ultralytics import YOLO
# Load a model
model = YOLO("yolov8n-cls.pt") # load a pretrained model (recommended for training)
# For medium-sized dataset
model.train(data="imagewoof320", epochs=100, imgsz=224)
# For medium-sized dataset
model.train(data="imagewoof320", epochs=100, imgsz=224)
# For small-sized dataset
model.train(data="imagewoof160", epochs=100, imgsz=224)
```
# For small-sized dataset
model.train(data="imagewoof160", epochs=100, imgsz=224)
```
=== "CLI"
```bash
# Load a pretrained model and train on the small-sized dataset
yolo classify train model=yolov8n-cls.pt data=imagewoof320 epochs=100 imgsz=224
```
It's important to note that using smaller images will likely yield lower performance in terms of classification accuracy. However, it's an excellent way to iterate quickly in the early stages of model development and prototyping.

@ -203,7 +203,7 @@ The **Roboflow 100** dataset is accessible on [GitHub](https://github.com/robofl
When using the Roboflow 100 dataset in your research, ensure to properly cite it. Here is the recommended citation:
!!! Quote
!!! Quote ""
=== "BibTeX"

@ -8,6 +8,17 @@ keywords: VisDrone, drone dataset, computer vision, object detection, object tra
The [VisDrone Dataset](https://github.com/VisDrone/VisDrone-Dataset) is a large-scale benchmark created by the AISKYEYE team at the Lab of Machine Learning and Data Mining, Tianjin University, China. It contains carefully annotated ground truth data for various computer vision tasks related to drone-based image and video analysis.
<p align="center">
<br>
<iframe loading="lazy" width="720" height="405" src="https://www.youtube.com/embed/28JV4rbzklM"
title="YouTube video player" frameborder="0"
allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share"
allowfullscreen>
</iframe>
<br>
<strong>Watch:</strong> How to Train Ultralytics YOLO Models on the VisDrone Dataset for Drone Image Analysis
</p>
VisDrone is composed of 288 video clips with 261,908 frames and 10,209 static images, captured by various drone-mounted cameras. The dataset covers a wide range of aspects, including location (14 different cities across China), environment (urban and rural), objects (pedestrians, vehicles, bicycles, etc.), and density (sparse and crowded scenes). The dataset was collected using various drone platforms under different scenarios and weather and lighting conditions. These frames are manually annotated with over 2.6 million bounding boxes of targets such as pedestrians, cars, bicycles, and tricycles. Attributes like scene visibility, object class, and occlusion are also provided for better data utilization.
## Dataset Structure
@ -148,16 +159,19 @@ The configuration file for the VisDrone dataset, `VisDrone.yaml`, can be found i
If you use the VisDrone dataset in your research or development work, please cite the following paper:
!!! Quote "BibTeX"
```bibtex
@ARTICLE{9573394,
author={Zhu, Pengfei and Wen, Longyin and Du, Dawei and Bian, Xiao and Fan, Heng and Hu, Qinghua and Ling, Haibin},
journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
title={Detection and Tracking Meet Drones Challenge},
year={2021},
volume={},
number={},
pages={1-1},
doi={10.1109/TPAMI.2021.3119563}}
```
!!! Quote ""
=== "BibTeX"
```bibtex
@ARTICLE{9573394,
author={Zhu, Pengfei and Wen, Longyin and Du, Dawei and Bian, Xiao and Fan, Heng and Hu, Qinghua and Ling, Haibin},
journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
title={Detection and Tracking Meet Drones Challenge},
year={2021},
volume={},
number={},
pages={1-1},
doi={10.1109/TPAMI.2021.3119563}
}
```

@ -13,7 +13,7 @@
" <a href=\"https://ultralytics.com/yolov8\" target=\"_blank\">\n",
" <img width=\"1024\", src=\"https://raw.githubusercontent.com/ultralytics/assets/main/yolov8/banner-yolov8.png\"></a>\n",
"\n",
" [中文](https://docs.ultralytics.com/zh/) | [한국어](https://docs.ultralytics.com/ko/) | [日本語](https://docs.ultralytics.com/ja/) | [Русский](https://docs.ultralytics.com/ru/) | [Deutsch](https://docs.ultralytics.com/de/) | [Français](https://docs.ultralytics.com/fr/) | [Español](https://docs.ultralytics.com/es/) | [Português](https://docs.ultralytics.com/pt/) | [ि](https://docs.ultralytics.com/hi/) | [العربية](https://docs.ultralytics.com/ar/)\n",
" [中文](https://docs.ultralytics.com/zh/) | [한국어](https://docs.ultralytics.com/ko/) | [日本語](https://docs.ultralytics.com/ja/) | [Русский](https://docs.ultralytics.com/ru/) | [Deutsch](https://docs.ultralytics.com/de/) | [Français](https://docs.ultralytics.com/fr/) | [Español](https://docs.ultralytics.com/es/) | [Português](https://docs.ultralytics.com/pt/) | [Türkçe](https://docs.ultralytics.com/tr/) | [Tiếng Việt](https://docs.ultralytics.com/vi/) | [العربية](https://docs.ultralytics.com/ar/)\n",
"\n",
" <a href=\"https://console.paperspace.com/github/ultralytics/ultralytics\"><img src=\"https://assets.paperspace.io/img/gradient-badge.svg\" alt=\"Run on Gradient\"/></a>\n",
" <a href=\"https://colab.research.google.com/github/ultralytics/ultralytics/blob/main/examples/tutorial.ipynb\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"></a>\n",
@ -598,4 +598,4 @@
},
"nbformat": 4,
"nbformat_minor": 5
}
}

@ -135,19 +135,23 @@ Ultralytics YOLO offers advanced real-time object detection, segmentation, and c
If you incorporate the Crack Segmentation Dataset into your research, please use the following BibTeX reference:
```bibtex
@misc{ crack-bphdr_dataset,
title = { crack Dataset },
type = { Open Source Dataset },
author = { University },
howpublished = { \url{ https://universe.roboflow.com/university-bswxt/crack-bphdr } },
url = { https://universe.roboflow.com/university-bswxt/crack-bphdr },
journal = { Roboflow Universe },
publisher = { Roboflow },
year = { 2022 },
month = { dec },
note = { visited on 2024-01-23 },
}
```
!!! Quote ""
=== "BibTeX"
```bibtex
@misc{ crack-bphdr_dataset,
title = { crack Dataset },
type = { Open Source Dataset },
author = { University },
howpublished = { \url{ https://universe.roboflow.com/university-bswxt/crack-bphdr } },
url = { https://universe.roboflow.com/university-bswxt/crack-bphdr },
journal = { Roboflow Universe },
publisher = { Roboflow },
year = { 2022 },
month = { dec },
note = { visited on 2024-01-23 },
}
```
This citation format ensures proper accreditation to the creators of the dataset and acknowledges its use in your research.

@ -99,24 +99,28 @@ The [Roboflow Package Segmentation Dataset](https://universe.roboflow.com/factor
### How do I train an Ultralytics YOLOv8 model on the Package Segmentation Dataset?
You can train an Ultralytics YOLOv8n model using both Python and CLI methods. For Python, use the snippet below:
You can train an Ultralytics YOLOv8n model using both Python and CLI methods. Use the snippets below:
```python
from ultralytics import YOLO
!!! Example "Train Example"
# Load a model
model = YOLO("yolov8n-seg.pt") # load a pretrained model
=== "Python"
```python
from ultralytics import YOLO
# Train the model
results = model.train(data="package-seg.yaml", epochs=100, imgsz=640)
```
# Load a model
model = YOLO("yolov8n-seg.pt") # load a pretrained model
For CLI:
# Train the model
results = model.train(data="package-seg.yaml", epochs=100, imgsz=640)
```
```bash
# Start training from a pretrained *.pt model
yolo segment train data=package-seg.yaml model=yolov8n-seg.pt epochs=100 imgsz=640
```
=== "CLI"
```bash
# Start training from a pretrained *.pt model
yolo segment train data=package-seg.yaml model=yolov8n-seg.pt epochs=100 imgsz=640
```
Refer to the model [Training](../../modes/train.md) page for more details.

@ -82,7 +82,7 @@ Without further ado, let's dive in!
```python
import pandas as pd
indx = [l.stem for l in labels] # uses base filename as ID (no extension)
indx = [label.stem for label in labels] # uses base filename as ID (no extension)
labels_df = pd.DataFrame([], columns=cls_idx, index=indx)
```
@ -97,9 +97,9 @@ Without further ado, let's dive in!
with open(label, "r") as lf:
lines = lf.readlines()
for l in lines:
for line in lines:
# classes for YOLO label uses integer at first position of each line
lbl_counter[int(l.split(" ")[0])] += 1
lbl_counter[int(line.split(" ")[0])] += 1
labels_df.loc[label.stem] = lbl_counter

@ -10,6 +10,17 @@ keywords: parking management, YOLOv8, Ultralytics, vehicle detection, real-time
Parking management with [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics/) ensures efficient and safe parking by organizing spaces and monitoring availability. YOLOv8 can improve parking lot management through real-time vehicle detection, and insights into parking occupancy.
<p align="center">
<br>
<iframe loading="lazy" width="720" height="405" src="https://www.youtube.com/embed/WwXnljc7ZUM"
title="YouTube video player" frameborder="0"
allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share"
allowfullscreen>
</iframe>
<br>
<strong>Watch:</strong> How to Implement Parking Management Using Ultralytics YOLOv8 🚀
</p>
## Advantages of Parking Management System?
- **Efficiency**: Parking lot management optimizes the use of parking spaces and reduces congestion.

@ -248,9 +248,9 @@ Learn more about the [benefits of sliced inference](#benefits-of-sliced-inferenc
Yes, you can visualize prediction results when using YOLOv8 with SAHI. Here's how you can export and visualize the results:
```python
result.export_visuals(export_dir="demo_data/")
from IPython.display import Image
result.export_visuals(export_dir="demo_data/")
Image("demo_data/prediction_visual.png")
```

@ -98,6 +98,18 @@ Next, [train a model](./models.md#train-model) on your dataset.
![Ultralytics HUB screenshot of the Dataset page with an arrow pointing to the Train Model button](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/datasets/hub_upload_dataset_9.jpg)
## Download Dataset
Navigate to the Dataset page of the dataset you want to download, open the dataset actions dropdown and click on the **Download** option. This action will start downloading your dataset.
![Ultralytics HUB screenshot of the Dataset page with an arrow pointing to the Download option](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/datasets/hub_download_dataset_1.jpg)
??? tip "Tip"
You can download a dataset directly from the [Datasets](https://hub.ultralytics.com/datasets) page.
![Ultralytics HUB screenshot of the Datasets page with an arrow pointing to the Download option of one of the datasets](https://raw.githubusercontent.com/ultralytics/assets/main/docs/hub/datasets/hub_download_dataset_2.jpg)
## Share Dataset
!!! info "Info"

@ -18,7 +18,6 @@ keywords: Ultralytics HUB, YOLO models, train YOLO, YOLOv5, YOLOv8, object detec
<a href="https://docs.ultralytics.com/pt/hub/">Português</a> |
<a href="https://docs.ultralytics.com/tr/hub/">Türkçe</a> |
<a href="https://docs.ultralytics.com/vi/hub/">Tiếng Việt</a> |
<a href="https://docs.ultralytics.com/hi/hub/">ि</a> |
<a href="https://docs.ultralytics.com/ar/hub/">العربية</a>
<br>
<br>

@ -16,7 +16,6 @@ keywords: Ultralytics, YOLOv8, object detection, image segmentation, deep learni
<a href="https://docs.ultralytics.com/pt/">Português</a> |
<a href="https://docs.ultralytics.com/tr/">Türkçe</a> |
<a href="https://docs.ultralytics.com/vi/">Tiếng Việt</a> |
<a href="https://docs.ultralytics.com/hi/">ि</a> |
<a href="https://docs.ultralytics.com/ar/">العربية</a>
<br>
<br>

@ -114,7 +114,7 @@ After installing Kaggle, we can load the dataset into Watsonx.
os.environ["KAGGLE_KEY"] = "apiKey"
# Load dataset
!kaggle datasets download atiqishrak/trash-dataset-icra19 --unzip
os.system("kaggle datasets download atiqishrak/trash-dataset-icra19 --unzip")
# Store working directory path as work_dir
work_dir = os.getcwd()

@ -117,7 +117,7 @@ To train a YOLOv8 model using JupyterLab:
1. Install JupyterLab and the Ultralytics package:
```python
```bash
pip install jupyterlab ultralytics
```
@ -138,7 +138,8 @@ To train a YOLOv8 model using JupyterLab:
```
5. Visualize training results using JupyterLab's built-in plotting capabilities:
```python
```ipython
%matplotlib inline
from ultralytics.utils.plotting import plot_results
plot_results(results)

@ -61,125 +61,98 @@ Before diving into the usage instructions, be sure to check out the range of [YO
=== "Python"
```python
rom ultralytics import YOLO
from ultralytics import YOLO
Load a pre-trained model
odel = YOLO('yolov8n.pt')
# Load a pre-trained model
model = YOLO("yolov8n.pt")
Train the model
esults = model.train(data='coco8.yaml', epochs=100, imgsz=640)
``
ning the usage code snippet above, you can expect the following output:
text
ard: Start with 'tensorboard --logdir path_to_your_tensorboard_logs', view at http://localhost:6006/
# Train the model
results = model.train(data="coco8.yaml", epochs=100, imgsz=640)
```
put indicates that TensorBoard is now actively monitoring your YOLOv8 training session. You can access the TensorBoard dashboard by visiting the provided URL (http://localhost:6006/) to view real-time training metrics and model performance. For users working in Google Colab, the TensorBoard will be displayed in the same cell where you executed the TensorBoard configuration commands.
information related to the model training process, be sure to check our [YOLOv8 Model Training guide](../modes/train.md). If you are interested in learning more about logging, checkpoints, plotting, and file management, read our [usage guide on configuration](../usage/cfg.md).
standing Your TensorBoard for YOLOv8 Training
's focus on understanding the various features and components of TensorBoard in the context of YOLOv8 training. The three key sections of the TensorBoard are Time Series, Scalars, and Graphs.
Series
Upon running the usage code snippet above, you can expect the following output:
Series feature in the TensorBoard offers a dynamic and detailed perspective of various training metrics over time for YOLOv8 models. It focuses on the progression and trends of metrics across training epochs. Here's an example of what you can expect to see.
(https://github.com/ultralytics/ultralytics/assets/25847604/20b3e038-0356-465e-a37e-1ea232c68354)
Features of Time Series in TensorBoard
er Tags and Pinned Cards**: This functionality allows users to filter specific metrics and pin cards for quick comparison and access. It's particularly useful for focusing on specific aspects of the training process.
iled Metric Cards**: Time Series divides metrics into different categories like learning rate (lr), training (train), and validation (val) metrics, each represented by individual cards.
```bash
TensorBoard: Start with 'tensorboard --logdir path_to_your_tensorboard_logs', view at http://localhost:6006/
```
hical Display**: Each card in the Time Series section shows a detailed graph of a specific metric over the course of training. This visual representation aids in identifying trends, patterns, or anomalies in the training process.
This output indicates that TensorBoard is now actively monitoring your YOLOv8 training session. You can access the TensorBoard dashboard by visiting the provided URL (http://localhost:6006/) to view real-time training metrics and model performance. For users working in Google Colab, the TensorBoard will be displayed in the same cell where you executed the TensorBoard configuration commands.
epth Analysis**: Time Series provides an in-depth analysis of each metric. For instance, different learning rate segments are shown, offering insights into how adjustments in learning rate impact the model's learning curve.
For more information related to the model training process, be sure to check our [YOLOv8 Model Training guide](../modes/train.md). If you are interested in learning more about logging, checkpoints, plotting, and file management, read our [usage guide on configuration](../usage/cfg.md).
ortance of Time Series in YOLOv8 Training
## Understanding Your TensorBoard for YOLOv8 Training
Series section is essential for a thorough analysis of the YOLOv8 model's training progress. It lets you track the metrics in real time to promptly identify and solve issues. It also offers a detailed view of each metrics progression, which is crucial for fine-tuning the model and enhancing its performance.
Now, let's focus on understanding the various features and components of TensorBoard in the context of YOLOv8 training. The three key sections of the TensorBoard are Time Series, Scalars, and Graphs.
ars
### Time Series
in the TensorBoard are crucial for plotting and analyzing simple metrics like loss and accuracy during the training of YOLOv8 models. They offer a clear and concise view of how these metrics evolve with each training epoch, providing insights into the model's learning effectiveness and stability. Here's an example of what you can expect to see.
The Time Series feature in the TensorBoard offers a dynamic and detailed perspective of various training metrics over time for YOLOv8 models. It focuses on the progression and trends of metrics across training epochs. Here's an example of what you can expect to see.
(https://github.com/ultralytics/ultralytics/assets/25847604/f9228193-13e9-4768-9edf-8fa15ecd24fa)
![image](https://github.com/ultralytics/ultralytics/assets/25847604/20b3e038-0356-465e-a37e-1ea232c68354)
Features of Scalars in TensorBoard
#### Key Features of Time Series in TensorBoard
ning Rate (lr) Tags**: These tags show the variations in the learning rate across different segments (e.g., `pg0`, `pg1`, `pg2`). This helps us understand the impact of learning rate adjustments on the training process.
- **Filter Tags and Pinned Cards**: This functionality allows users to filter specific metrics and pin cards for quick comparison and access. It's particularly useful for focusing on specific aspects of the training process.
ics Tags**: Scalars include performance indicators such as:
- **Detailed Metric Cards**: Time Series divides metrics into different categories like learning rate (lr), training (train), and validation (val) metrics, each represented by individual cards.
AP50 (B)`: Mean Average Precision at 50% Intersection over Union (IoU), crucial for assessing object detection accuracy.
- **Graphical Display**: Each card in the Time Series section shows a detailed graph of a specific metric over the course of training. This visual representation aids in identifying trends, patterns, or anomalies in the training process.
AP50-95 (B)`: Mean Average Precision calculated over a range of IoU thresholds, offering a more comprehensive evaluation of accuracy.
- **In-Depth Analysis**: Time Series provides an in-depth analysis of each metric. For instance, different learning rate segments are shown, offering insights into how adjustments in learning rate impact the model's learning curve.
recision (B)`: Indicates the ratio of correctly predicted positive observations, key to understanding prediction accuracy.
#### Importance of Time Series in YOLOv8 Training
ecall (B)`: Important for models where missing a detection is significant, this metric measures the ability to detect all relevant instances.
The Time Series section is essential for a thorough analysis of the YOLOv8 model's training progress. It lets you track the metrics in real time to promptly identify and solve issues. It also offers a detailed view of each metrics progression, which is crucial for fine-tuning the model and enhancing its performance.
learn more about the different metrics, read our guide on [performance metrics](../guides/yolo-performance-metrics.md).
### Scalars
ning and Validation Tags (`train`, `val`)**: These tags display metrics specifically for the training and validation datasets, allowing for a comparative analysis of model performance across different data sets.
Scalars in the TensorBoard are crucial for plotting and analyzing simple metrics like loss and accuracy during the training of YOLOv8 models. They offer a clear and concise view of how these metrics evolve with each training epoch, providing insights into the model's learning effectiveness and stability. Here's an example of what you can expect to see.
ortance of Monitoring Scalars
![image](https://github.com/ultralytics/ultralytics/assets/25847604/f9228193-13e9-4768-9edf-8fa15ecd24fa)
g scalar metrics is crucial for fine-tuning the YOLOv8 model. Variations in these metrics, such as spikes or irregular patterns in loss graphs, can highlight potential issues such as overfitting, underfitting, or inappropriate learning rate settings. By closely monitoring these scalars, you can make informed decisions to optimize the training process, ensuring that the model learns effectively and achieves the desired performance.
#### Key Features of Scalars in TensorBoard
erence Between Scalars and Time Series
- **Learning Rate (lr) Tags**: These tags show the variations in the learning rate across different segments (e.g., `pg0`, `pg1`, `pg2`). This helps us understand the impact of learning rate adjustments on the training process.
th Scalars and Time Series in TensorBoard are used for tracking metrics, they serve slightly different purposes. Scalars focus on plotting simple metrics such as loss and accuracy as scalar values. They provide a high-level overview of how these metrics change with each training epoch. While, the time-series section of the TensorBoard offers a more detailed timeline view of various metrics. It is particularly useful for monitoring the progression and trends of metrics over time, providing a deeper dive into the specifics of the training process.
- **Metrics Tags**: Scalars include performance indicators such as:
hs
- `mAP50 (B)`: Mean Average Precision at 50% Intersection over Union (IoU), crucial for assessing object detection accuracy.
hs section of the TensorBoard visualizes the computational graph of the YOLOv8 model, showing how operations and data flow within the model. It's a powerful tool for understanding the model's structure, ensuring that all layers are connected correctly, and for identifying any potential bottlenecks in data flow. Here's an example of what you can expect to see.
- `mAP50-95 (B)`: Mean Average Precision calculated over a range of IoU thresholds, offering a more comprehensive evaluation of accuracy.
(https://github.com/ultralytics/ultralytics/assets/25847604/039028e0-4ab3-4170-bfa8-f93ce483f615)
- `Precision (B)`: Indicates the ratio of correctly predicted positive observations, key to understanding prediction accuracy.
re particularly useful for debugging the model, especially in complex architectures typical in deep learning models like YOLOv8. They help in verifying layer connections and the overall design of the model.
- `Recall (B)`: Important for models where missing a detection is significant, this metric measures the ability to detect all relevant instances.
ry
- To learn more about the different metrics, read our guide on [performance metrics](../guides/yolo-performance-metrics.md).
de aims to help you use TensorBoard with YOLOv8 for visualization and analysis of machine learning model training. It focuses on explaining how key TensorBoard features can provide insights into training metrics and model performance during YOLOv8 training sessions.
- **Training and Validation Tags (`train`, `val`)**: These tags display metrics specifically for the training and validation datasets, allowing for a comparative analysis of model performance across different data sets.
re detailed exploration of these features and effective utilization strategies, you can refer to TensorFlow's official [TensorBoard documentation](https://www.tensorflow.org/tensorboard/get_started) and their [GitHub repository](https://github.com/tensorflow/tensorboard).
#### Importance of Monitoring Scalars
learn more about the various integrations of Ultralytics? Check out the [Ultralytics integrations guide page](../integrations/index.md) to see what other exciting capabilities are waiting to be discovered!
Observing scalar metrics is crucial for fine-tuning the YOLOv8 model. Variations in these metrics, such as spikes or irregular patterns in loss graphs, can highlight potential issues such as overfitting, underfitting, or inappropriate learning rate settings. By closely monitoring these scalars, you can make informed decisions to optimize the training process, ensuring that the model learns effectively and achieves the desired performance.
## FAQ
### Difference Between Scalars and Time Series
do I integrate YOLOv8 with TensorBoard for real-time visualization?
While both Scalars and Time Series in TensorBoard are used for tracking metrics, they serve slightly different purposes. Scalars focus on plotting simple metrics such as loss and accuracy as scalar values. They provide a high-level overview of how these metrics change with each training epoch. While, the time-series section of the TensorBoard offers a more detailed timeline view of various metrics. It is particularly useful for monitoring the progression and trends of metrics over time, providing a deeper dive into the specifics of the training process.
ing YOLOv8 with TensorBoard allows for real-time visual insights during model training. First, install the necessary package:
### Graphs
ple "Installation"
The Graphs section of the TensorBoard visualizes the computational graph of the YOLOv8 model, showing how operations and data flow within the model. It's a powerful tool for understanding the model's structure, ensuring that all layers are connected correctly, and for identifying any potential bottlenecks in data flow. Here's an example of what you can expect to see.
"CLI"
```bash
# Install the required package for YOLOv8 and Tensorboard
pip install ultralytics
```
![image](https://github.com/ultralytics/ultralytics/assets/25847604/039028e0-4ab3-4170-bfa8-f93ce483f615)
Next, configure TensorBoard to log your training runs, then start TensorBoard:
Graphs are particularly useful for debugging the model, especially in complex architectures typical in deep learning models like YOLOv8. They help in verifying layer connections and the overall design of the model.
!!! Example "Configure TensorBoard for Google Colab"
## Summary
=== "Python"
This guide aims to help you use TensorBoard with YOLOv8 for visualization and analysis of machine learning model training. It focuses on explaining how key TensorBoard features can provide insights into training metrics and model performance during YOLOv8 training sessions.
```ipython
%load_ext tensorboard
%tensorboard --logdir path/to/runs
```
For a more detailed exploration of these features and effective utilization strategies, you can refer to TensorFlow's official [TensorBoard documentation](https://www.tensorflow.org/tensorboard/get_started) and their [GitHub repository](https://github.com/tensorflow/tensorboard).
Finally, during training, YOLOv8 automatically logs metrics like loss and accuracy to TensorBoard. You can monitor these metrics by visiting [http://localhost:6006/](http://localhost:6006/).
Want to learn more about the various integrations of Ultralytics? Check out the [Ultralytics integrations guide page](../integrations/index.md) to see what other exciting capabilities are waiting to be discovered!
For a comprehensive guide, refer to our [YOLOv8 Model Training guide](../modes/train.md).
## FAQ
### What benefits does using TensorBoard with YOLOv8 offer?
@ -225,16 +198,16 @@ Yes, you can use TensorBoard in a Google Colab environment to train YOLOv8 model
%tensorboard --logdir path/to/runs
```
Then, run the YOLOv8 training script:
Then, run the YOLOv8 training script:
```python
from ultralytics import YOLO
```python
from ultralytics import YOLO
# Load a pre-trained model
model = YOLO("yolov8n.pt")
# Load a pre-trained model
model = YOLO("yolov8n.pt")
# Train the model
results = model.train(data="coco8.yaml", epochs=100, imgsz=640)
```
# Train the model
results = model.train(data="coco8.yaml", epochs=100, imgsz=640)
```
TensorBoard will visualize the training progress within Colab, providing real-time insights into metrics like loss and accuracy. For additional details on configuring YOLOv8 training, see our detailed [YOLOv8 Installation guide](../quickstart.md).

@ -4,11 +4,9 @@ Sitemap: https://docs.ultralytics.com/ar/sitemap.xml
Sitemap: https://docs.ultralytics.com/de/sitemap.xml
Sitemap: https://docs.ultralytics.com/es/sitemap.xml
Sitemap: https://docs.ultralytics.com/fr/sitemap.xml
Sitemap: https://docs.ultralytics.com/hi/sitemap.xml
Sitemap: https://docs.ultralytics.com/it/sitemap.xml
Sitemap: https://docs.ultralytics.com/ja/sitemap.xml
Sitemap: https://docs.ultralytics.com/ko/sitemap.xml
Sitemap: https://docs.ultralytics.com/nl/sitemap.xml
Sitemap: https://docs.ultralytics.com/pt/sitemap.xml
Sitemap: https://docs.ultralytics.com/ru/sitemap.xml
Sitemap: https://docs.ultralytics.com/tr/sitemap.xml

@ -203,7 +203,6 @@ class HuggingFaceVideoClassifier:
Returns:
torch.Tensor: The model's output.
"""
input_ids = self.processor(text=self.labels, return_tensors="pt", padding=True)["input_ids"].to(self.device)
inputs = {"pixel_values": sequences, "input_ids": input_ids}

@ -24,7 +24,7 @@ int main(int argc, char **argv)
//
// Note that in this example the classes are hard-coded and 'classes.txt' is a place holder.
Inference inf(projectBasePath + "/yolov8s.onnx", cv::Size(640, 480), "classes.txt", runOnGPU);
Inference inf(projectBasePath + "/yolov8s.onnx", cv::Size(640, 640), "classes.txt", runOnGPU);
std::vector<std::string> imageNames;
imageNames.push_back(projectBasePath + "/ultralytics/assets/bus.jpg");

@ -49,6 +49,9 @@ elseif (APPLE)
# set(ONNXRUNTIME_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/onnxruntime-osx-x64-${ONNXRUNTIME_VERSION}")
# Apple Universal binary
# set(ONNXRUNTIME_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/onnxruntime-osx-universal2-${ONNXRUNTIME_VERSION}")
else ()
message(SEND_ERROR "Variable ONNXRUNTIME_ROOT is not set properly. Please check if your cmake project \
is not compiled with `-D WIN32=TRUE`, `-D LINUX=TRUE`, or `-D APPLE=TRUE`!")
endif ()
include_directories(${PROJECT_NAME} ${ONNXRUNTIME_ROOT}/include)

@ -82,6 +82,19 @@ Note (2): Due to ONNX Runtime, we need to use CUDA 11 and cuDNN 8. Keep in mind
cmake ..
```
**Notice**:
If you encounter an error indicating that the `ONNXRUNTIME_ROOT` variable is not set correctly, you can resolve this by building the project using the appropriate command tailored to your system.
```console
# compiled in a win32 system
cmake -D WIN32=TRUE ..
# compiled in a linux system
cmake -D LINUX=TRUE ..
# compiled in an apple system
cmake -D APPLE=TRUE ..
```
5. Build the project:
```console

@ -48,7 +48,6 @@ class YOLOv8:
Returns:
None
"""
# Extract the coordinates of the bounding box
x1, y1, w, h = box
@ -118,7 +117,6 @@ class YOLOv8:
Returns:
numpy.ndarray: The input image with detections drawn on it.
"""
# Transpose and squeeze the output to match the expected shape
outputs = np.transpose(np.squeeze(output[0]))

@ -30,7 +30,6 @@ class LetterBox:
def __call__(self, labels=None, image=None):
"""Return updated labels and image with added border."""
if labels is None:
labels = {}
img = labels.get("img") if image is None else image
@ -79,7 +78,6 @@ class LetterBox:
def _update_labels(self, labels, ratio, padw, padh):
"""Update labels."""
labels["instances"].convert_bbox(format="xyxy")
labels["instances"].denormalize(*labels["img"].shape[:2][::-1])
labels["instances"].scale(*ratio)
@ -100,7 +98,6 @@ class Yolov8TFLite:
confidence_thres: Confidence threshold for filtering detections.
iou_thres: IoU (Intersection over Union) threshold for non-maximum suppression.
"""
self.tflite_model = tflite_model
self.input_image = input_image
self.confidence_thres = confidence_thres
@ -125,7 +122,6 @@ class Yolov8TFLite:
Returns:
None
"""
# Extract the coordinates of the bounding box
x1, y1, w, h = box
@ -164,7 +160,6 @@ class Yolov8TFLite:
Returns:
image_data: Preprocessed image data ready for inference.
"""
# Read the input image using OpenCV
self.img = cv2.imread(self.input_image)
@ -193,7 +188,6 @@ class Yolov8TFLite:
Returns:
numpy.ndarray: The input image with detections drawn on it.
"""
boxes = []
scores = []
class_ids = []
@ -238,7 +232,6 @@ class Yolov8TFLite:
Returns:
output_img: The output image with drawn detections.
"""
# Create an interpreter for the TFLite model
interpreter = tflite.Interpreter(model_path=self.tflite_model)
self.model = interpreter

@ -40,7 +40,7 @@ def mouse_callback(event, x, y, flags, param):
"""
Handles mouse events for region manipulation.
Parameters:
Args:
event (int): The mouse event type (e.g., cv2.EVENT_LBUTTONDOWN).
x (int): The x-coordinate of the mouse pointer.
y (int): The y-coordinate of the mouse pointer.

@ -21,7 +21,6 @@ class YOLOv8Seg:
Args:
onnx_model (str): Path to the ONNX model.
"""
# Build Ort session
self.session = ort.InferenceSession(
onnx_model,
@ -57,7 +56,6 @@ class YOLOv8Seg:
segments (List): list of segments.
masks (np.ndarray): [N, H, W], output masks.
"""
# Pre-process
im, ratio, (pad_w, pad_h) = self.preprocess(im0)
@ -90,7 +88,6 @@ class YOLOv8Seg:
pad_w (float): width padding in letterbox.
pad_h (float): height padding in letterbox.
"""
# Resize and pad input image using letterbox() (Borrowed from Ultralytics)
shape = img.shape[:2] # original image shape
new_shape = (self.model_height, self.model_width)
@ -130,7 +127,7 @@ class YOLOv8Seg:
"""
x, protos = preds[0], preds[1] # Two outputs: predictions and protos
# Transpose the first output: (Batch_size, xywh_conf_cls_nm, Num_anchors) -> (Batch_size, Num_anchors, xywh_conf_cls_nm)
# Transpose dim 1: (Batch_size, xywh_conf_cls_nm, Num_anchors) -> (Batch_size, Num_anchors, xywh_conf_cls_nm)
x = np.einsum("bcn->bnc", x)
# Predictions filtering by conf-threshold
@ -169,8 +166,8 @@ class YOLOv8Seg:
@staticmethod
def masks2segments(masks):
"""
It takes a list of masks(n,h,w) and returns a list of segments(n,xy) (Borrowed from
https://github.com/ultralytics/ultralytics/blob/465df3024f44fa97d4fad9986530d5a13cdabdca/ultralytics/utils/ops.py#L750)
Takes a list of masks(n,h,w) and returns a list of segments(n,xy), from
https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/ops.py.
Args:
masks (numpy.ndarray): the output of the model, which is a tensor of shape (batch_size, 160, 160).
@ -191,8 +188,8 @@ class YOLOv8Seg:
@staticmethod
def crop_mask(masks, boxes):
"""
It takes a mask and a bounding box, and returns a mask that is cropped to the bounding box. (Borrowed from
https://github.com/ultralytics/ultralytics/blob/465df3024f44fa97d4fad9986530d5a13cdabdca/ultralytics/utils/ops.py#L599)
Takes a mask and a bounding box, and returns a mask that is cropped to the bounding box, from
https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/ops.py.
Args:
masks (Numpy.ndarray): [n, h, w] tensor of masks.
@ -209,8 +206,8 @@ class YOLOv8Seg:
def process_mask(self, protos, masks_in, bboxes, im0_shape):
"""
Takes the output of the mask head, and applies the mask to the bounding boxes. This produces masks of higher quality
but is slower. (Borrowed from https://github.com/ultralytics/ultralytics/blob/465df3024f44fa97d4fad9986530d5a13cdabdca/ultralytics/utils/ops.py#L618)
Takes the output of the mask head, and applies the mask to the bounding boxes. This produces masks of higher
quality but is slower, from https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/ops.py.
Args:
protos (numpy.ndarray): [mask_dim, mask_h, mask_w].
@ -232,8 +229,8 @@ class YOLOv8Seg:
@staticmethod
def scale_mask(masks, im0_shape, ratio_pad=None):
"""
Takes a mask, and resizes it to the original image size. (Borrowed from
https://github.com/ultralytics/ultralytics/blob/465df3024f44fa97d4fad9986530d5a13cdabdca/ultralytics/utils/ops.py#L305)
Takes a mask, and resizes it to the original image size, from
https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/ops.py.
Args:
masks (np.ndarray): resized and padded masks/images, [h, w, num]/[h, w, 3].
@ -277,7 +274,6 @@ class YOLOv8Seg:
Returns:
None
"""
# Draw rectangles and polygons
im_canvas = im.copy()
for (*box, conf, cls_), segment in zip(bboxes, segments):

@ -11,7 +11,7 @@
" <a href=\"https://ultralytics.com/yolov8\" target=\"_blank\">\n",
" <img width=\"1024\", src=\"https://raw.githubusercontent.com/ultralytics/assets/main/yolov8/banner-yolov8.png\"></a>\n",
"\n",
" [中文](https://docs.ultralytics.com/zh/) | [한국어](https://docs.ultralytics.com/ko/) | [日本語](https://docs.ultralytics.com/ja/) | [Русский](https://docs.ultralytics.com/ru/) | [Deutsch](https://docs.ultralytics.com/de/) | [Français](https://docs.ultralytics.com/fr/) | [Español](https://docs.ultralytics.com/es/) | [Português](https://docs.ultralytics.com/pt/) | [Türkçe](https://docs.ultralytics.com/tr/) | [Tiếng Việt](https://docs.ultralytics.com/vi/) | [ि](https://docs.ultralytics.com/hi/) | [العربية](https://docs.ultralytics.com/ar/)\n",
" [中文](https://docs.ultralytics.com/zh/) | [한국어](https://docs.ultralytics.com/ko/) | [日本語](https://docs.ultralytics.com/ja/) | [Русский](https://docs.ultralytics.com/ru/) | [Deutsch](https://docs.ultralytics.com/de/) | [Français](https://docs.ultralytics.com/fr/) | [Español](https://docs.ultralytics.com/es/) | [Português](https://docs.ultralytics.com/pt/) | [Türkçe](https://docs.ultralytics.com/tr/) | [Tiếng Việt](https://docs.ultralytics.com/vi/) | [العربية](https://docs.ultralytics.com/ar/)\n",
"\n",
" <a href=\"https://github.com/ultralytics/ultralytics/actions/workflows/ci.yaml\"><img src=\"https://github.com/ultralytics/ultralytics/actions/workflows/ci.yaml/badge.svg\" alt=\"Ultralytics CI\"></a>\n",
" <a href=\"https://console.paperspace.com/github/ultralytics/ultralytics\"><img src=\"https://assets.paperspace.io/img/gradient-badge.svg\" alt=\"Run on Gradient\"/></a>\n",

@ -11,7 +11,7 @@
"\n",
"<div align=\"center\">\n",
"\n",
"[中文](https://docs.ultralytics.com/zh/hub/) | [한국어](https://docs.ultralytics.com/ko/hub/) | [日本語](https://docs.ultralytics.com/ja/hub/) | [Русский](https://docs.ultralytics.com/ru/hub/) | [Deutsch](https://docs.ultralytics.com/de/hub/) | [Français](https://docs.ultralytics.com/fr/hub/) | [Español](https://docs.ultralytics.com/es/hub/) | [Português](https://docs.ultralytics.com/pt/hub/) | [Türkçe](https://docs.ultralytics.com/tr/hub/) | [Tiếng Việt](https://docs.ultralytics.com/vi/hub/) | [ि](https://docs.ultralytics.com/hi/hub/) | [العربية](https://docs.ultralytics.com/ar/hub/)\n",
"[中文](https://docs.ultralytics.com/zh/hub/) | [한국어](https://docs.ultralytics.com/ko/hub/) | [日本語](https://docs.ultralytics.com/ja/hub/) | [Русский](https://docs.ultralytics.com/ru/hub/) | [Deutsch](https://docs.ultralytics.com/de/hub/) | [Français](https://docs.ultralytics.com/fr/hub/) | [Español](https://docs.ultralytics.com/es/hub/) | [Português](https://docs.ultralytics.com/pt/hub/) | [Türkçe](https://docs.ultralytics.com/tr/hub/) | [Tiếng Việt](https://docs.ultralytics.com/vi/hub/) | [العربية](https://docs.ultralytics.com/ar/hub/)\n",
"\n",
" <a href=\"https://github.com/ultralytics/hub/actions/workflows/ci.yaml\">\n",
" <img src=\"https://github.com/ultralytics/hub/actions/workflows/ci.yaml/badge.svg\" alt=\"CI CPU\"></a>\n",

@ -11,7 +11,7 @@
" <a href=\"https://ultralytics.com/yolov8\" target=\"_blank\">\n",
" <img width=\"1024\", src=\"https://raw.githubusercontent.com/ultralytics/assets/main/yolov8/banner-yolov8.png\"></a>\n",
"\n",
" [中文](https://docs.ultralytics.com/zh/) | [한국어](https://docs.ultralytics.com/ko/) | [日本語](https://docs.ultralytics.com/ja/) | [Русский](https://docs.ultralytics.com/ru/) | [Deutsch](https://docs.ultralytics.com/de/) | [Français](https://docs.ultralytics.com/fr/) | [Español](https://docs.ultralytics.com/es/) | [Português](https://docs.ultralytics.com/pt/) | [Türkçe](https://docs.ultralytics.com/tr/) | [Tiếng Việt](https://docs.ultralytics.com/vi/) | [ि](https://docs.ultralytics.com/hi/) | [العربية](https://docs.ultralytics.com/ar/)\n",
" [中文](https://docs.ultralytics.com/zh/) | [한국어](https://docs.ultralytics.com/ko/) | [日本語](https://docs.ultralytics.com/ja/) | [Русский](https://docs.ultralytics.com/ru/) | [Deutsch](https://docs.ultralytics.com/de/) | [Français](https://docs.ultralytics.com/fr/) | [Español](https://docs.ultralytics.com/es/) | [Português](https://docs.ultralytics.com/pt/) | [Türkçe](https://docs.ultralytics.com/tr/) | [Tiếng Việt](https://docs.ultralytics.com/vi/) | [العربية](https://docs.ultralytics.com/ar/)\n",
"\n",
" <a href=\"https://github.com/ultralytics/ultralytics/actions/workflows/ci.yaml\"><img src=\"https://github.com/ultralytics/ultralytics/actions/workflows/ci.yaml/badge.svg\" alt=\"Ultralytics CI\"></a>\n",
" <a href=\"https://console.paperspace.com/github/ultralytics/ultralytics\"><img src=\"https://assets.paperspace.io/img/gradient-badge.svg\" alt=\"Run on Gradient\"/></a>\n",

@ -11,7 +11,7 @@
" <a href=\"https://ultralytics.com/yolov8\" target=\"_blank\">\n",
" <img width=\"1024\", src=\"https://raw.githubusercontent.com/ultralytics/assets/main/yolov8/banner-yolov8.png\"></a>\n",
"\n",
" [中文](https://docs.ultralytics.com/zh/) | [한국어](https://docs.ultralytics.com/ko/) | [日本語](https://docs.ultralytics.com/ja/) | [Русский](https://docs.ultralytics.com/ru/) | [Deutsch](https://docs.ultralytics.com/de/) | [Français](https://docs.ultralytics.com/fr/) | [Español](https://docs.ultralytics.com/es/) | [Português](https://docs.ultralytics.com/pt/) | [Türkçe](https://docs.ultralytics.com/tr/) | [Tiếng Việt](https://docs.ultralytics.com/vi/) | [ि](https://docs.ultralytics.com/hi/) | [العربية](https://docs.ultralytics.com/ar/)\n",
" [中文](https://docs.ultralytics.com/zh/) | [한국어](https://docs.ultralytics.com/ko/) | [日本語](https://docs.ultralytics.com/ja/) | [Русский](https://docs.ultralytics.com/ru/) | [Deutsch](https://docs.ultralytics.com/de/) | [Français](https://docs.ultralytics.com/fr/) | [Español](https://docs.ultralytics.com/es/) | [Português](https://docs.ultralytics.com/pt/) | [Türkçe](https://docs.ultralytics.com/tr/) | [Tiếng Việt](https://docs.ultralytics.com/vi/) | [العربية](https://docs.ultralytics.com/ar/)\n",
"\n",
" <a href=\"https://github.com/ultralytics/ultralytics/actions/workflows/ci.yaml\"><img src=\"https://github.com/ultralytics/ultralytics/actions/workflows/ci.yaml/badge.svg\" alt=\"Ultralytics CI\"></a>\n",
" <a href=\"https://console.paperspace.com/github/ultralytics/ultralytics\"><img src=\"https://assets.paperspace.io/img/gradient-badge.svg\" alt=\"Run on Gradient\"/></a>\n",

@ -25,7 +25,7 @@
" <a href=\"https://ultralytics.com/yolov8\" target=\"_blank\">\n",
" <img width=\"1024\", src=\"https://raw.githubusercontent.com/ultralytics/assets/main/yolov8/banner-yolov8.png\"></a>\n",
"\n",
" [中文](https://docs.ultralytics.com/zh/) | [한국어](https://docs.ultralytics.com/ko/) | [日本語](https://docs.ultralytics.com/ja/) | [Русский](https://docs.ultralytics.com/ru/) | [Deutsch](https://docs.ultralytics.com/de/) | [Français](https://docs.ultralytics.com/fr/) | [Español](https://docs.ultralytics.com/es/) | [Português](https://docs.ultralytics.com/pt/) | [Türkçe](https://docs.ultralytics.com/tr/) | [Tiếng Việt](https://docs.ultralytics.com/vi/) | [ि](https://docs.ultralytics.com/hi/) | [العربية](https://docs.ultralytics.com/ar/)\n",
" [中文](https://docs.ultralytics.com/zh/) | [한국어](https://docs.ultralytics.com/ko/) | [日本語](https://docs.ultralytics.com/ja/) | [Русский](https://docs.ultralytics.com/ru/) | [Deutsch](https://docs.ultralytics.com/de/) | [Français](https://docs.ultralytics.com/fr/) | [Español](https://docs.ultralytics.com/es/) | [Português](https://docs.ultralytics.com/pt/) | [Türkçe](https://docs.ultralytics.com/tr/) | [Tiếng Việt](https://docs.ultralytics.com/vi/) | [العربية](https://docs.ultralytics.com/ar/)\n",
"\n",
" <a href=\"https://github.com/ultralytics/ultralytics/actions/workflows/ci.yaml\"><img src=\"https://github.com/ultralytics/ultralytics/actions/workflows/ci.yaml/badge.svg\" alt=\"Ultralytics CI\"></a>\n",
" <a href=\"https://console.paperspace.com/github/ultralytics/ultralytics\"><img src=\"https://assets.paperspace.io/img/gradient-badge.svg\" alt=\"Run on Gradient\"/></a>\n",

@ -174,10 +174,8 @@ nav:
- 🇪🇸&nbsp Español: https://docs.ultralytics.com/es/
- 🇵🇹&nbsp Português: https://docs.ultralytics.com/pt/
- 🇮🇹&nbsp Italiano: https://docs.ultralytics.com/it/
- 🇳🇱&nbsp Nederlands: https://docs.ultralytics.com/nl/
- 🇹🇷&nbsp Türkçe: https://docs.ultralytics.com/tr/
- 🇻🇳&nbsp Tiếng Việt: https://docs.ultralytics.com/vi/
- 🇮🇳&nbsp हि: https://docs.ultralytics.com/hi/
- 🇸🇦&nbsp العربية: https://docs.ultralytics.com/ar/
- Quickstart:
- quickstart.md
@ -649,6 +647,8 @@ plugins:
- mkdocs-jupyter
- redirects:
redirect_maps:
hi/index.md: index.md
nl/index.md: index.md
callbacks.md: usage/callbacks.md
cfg.md: usage/cfg.md
cli.md: usage/cli.md

@ -172,12 +172,15 @@ split_before_first_argument = false
[tool.ruff]
line-length = 120
[tool.ruff.format]
docstring-code-format = true
[tool.docformatter]
wrap-summaries = 120
wrap-descriptions = 120
in-place = true
pre-summary-newline = true
close-quotes-on-newline = true
in-place = true
[tool.codespell]
ignore-words-list = "crate,nd,ned,strack,dota,ane,segway,fo,gool,winn,commend,bloc,nam,afterall"

@ -13,7 +13,6 @@ WORKOUTS_SOLUTION_DEMO = "https://github.com/ultralytics/assets/releases/downloa
@pytest.mark.slow
def test_major_solutions():
"""Test the object counting, heatmap, speed estimation and queue management solution."""
safe_download(url=MAJOR_SOLUTIONS_DEMO)
model = YOLO("yolov8n.pt")
names = model.names
@ -41,7 +40,6 @@ def test_major_solutions():
@pytest.mark.slow
def test_aigym():
"""Test the workouts monitoring solution."""
safe_download(url=WORKOUTS_SOLUTION_DEMO)
model = YOLO("yolov8n-pose.pt")
cap = cv2.VideoCapture("solution_ci_pose_demo.mp4")
@ -60,7 +58,6 @@ def test_aigym():
@pytest.mark.slow
def test_instance_segmentation():
"""Test the instance segmentation solution."""
from ultralytics.utils.plotting import Annotator, colors
model = YOLO("yolov8n-seg.pt")
@ -86,5 +83,4 @@ def test_instance_segmentation():
@pytest.mark.slow
def test_streamlit_predict():
"""Test streamlit predict live inference solution."""
solutions.inference()

@ -1,6 +1,6 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
__version__ = "8.2.78"
__version__ = "8.2.81"
import os

@ -198,15 +198,15 @@ def cfg2dict(cfg):
Examples:
Convert a YAML file path to a dictionary:
>>> config_dict = cfg2dict('config.yaml')
>>> config_dict = cfg2dict("config.yaml")
Convert a SimpleNamespace to a dictionary:
>>> from types import SimpleNamespace
>>> config_sn = SimpleNamespace(param1='value1', param2='value2')
>>> config_sn = SimpleNamespace(param1="value1", param2="value2")
>>> config_dict = cfg2dict(config_sn)
Pass through an already existing dictionary:
>>> config_dict = cfg2dict({'param1': 'value1', 'param2': 'value2'})
>>> config_dict = cfg2dict({"param1": "value1", "param2": "value2"})
Notes:
- If cfg is a path or string, it's loaded as YAML and converted to a dictionary.
@ -235,7 +235,7 @@ def get_cfg(cfg: Union[str, Path, Dict, SimpleNamespace] = DEFAULT_CFG_DICT, ove
Examples:
>>> from ultralytics.cfg import get_cfg
>>> config = get_cfg() # Load default configuration
>>> config = get_cfg('path/to/config.yaml', overrides={'epochs': 50, 'batch_size': 16})
>>> config = get_cfg("path/to/config.yaml", overrides={"epochs": 50, "batch_size": 16})
Notes:
- If both `cfg` and `overrides` are provided, the values in `overrides` will take precedence.
@ -282,10 +282,10 @@ def check_cfg(cfg, hard=True):
Examples:
>>> config = {
... 'epochs': 50, # valid integer
... 'lr0': 0.01, # valid float
... 'momentum': 1.2, # invalid float (out of 0.0-1.0 range)
... 'save': 'true', # invalid bool
... "epochs": 50, # valid integer
... "lr0": 0.01, # valid float
... "momentum": 1.2, # invalid float (out of 0.0-1.0 range)
... "save": "true", # invalid bool
... }
>>> check_cfg(config, hard=False)
>>> print(config)
@ -345,12 +345,11 @@ def get_save_dir(args, name=None):
Examples:
>>> from types import SimpleNamespace
>>> args = SimpleNamespace(project='my_project', task='detect', mode='train', exist_ok=True)
>>> args = SimpleNamespace(project="my_project", task="detect", mode="train", exist_ok=True)
>>> save_dir = get_save_dir(args)
>>> print(save_dir)
my_project/detect/train
"""
if getattr(args, "save_dir", None):
save_dir = args.save_dir
else:
@ -381,7 +380,6 @@ def _handle_deprecation(custom):
equivalents. It also handles value conversions where necessary, such as inverting boolean values for
'hide_labels' and 'hide_conf'.
"""
for key in custom.copy().keys():
if key == "boxes":
deprecation_warn(key, "show_boxes")
@ -413,8 +411,8 @@ def check_dict_alignment(base: Dict, custom: Dict, e=None):
SystemExit: If mismatched keys are found between the custom and base dictionaries.
Examples:
>>> base_cfg = {'epochs': 50, 'lr0': 0.01, 'batch_size': 16}
>>> custom_cfg = {'epoch': 100, 'lr': 0.02, 'batch_size': 32}
>>> base_cfg = {"epochs": 50, "lr0": 0.01, "batch_size": 16}
>>> custom_cfg = {"epoch": 100, "lr": 0.02, "batch_size": 32}
>>> try:
... check_dict_alignment(base_cfg, custom_cfg)
... except SystemExit:
@ -548,9 +546,9 @@ def handle_yolo_settings(args: List[str]) -> None:
def handle_explorer(args: List[str]):
"""
This function launches a graphical user interface that provides tools for interacting with and analyzing datasets
using the Ultralytics Explorer API. It checks for the required 'streamlit' package and informs the user that the
Explorer dashboard is loading.
Launches a graphical user interface that provides tools for interacting with and analyzing datasets using the
Ultralytics Explorer API. It checks for the required 'streamlit' package and informs the user that the Explorer
dashboard is loading.
Args:
args (List[str]): A list of optional command line arguments.

@ -21,7 +21,7 @@ def auto_annotate(data, det_model="yolov8x.pt", sam_model="sam_b.pt", device="",
Examples:
>>> from ultralytics.data.annotator import auto_annotate
>>> auto_annotate(data='ultralytics/assets', det_model='yolov8n.pt', sam_model='mobile_sam.pt')
>>> auto_annotate(data="ultralytics/assets", det_model="yolov8n.pt", sam_model="mobile_sam.pt")
Notes:
- The function creates a new directory for output if not specified.

@ -38,7 +38,7 @@ class BaseTransform:
Examples:
>>> transform = BaseTransform()
>>> labels = {'image': np.array(...), 'instances': [...], 'semantic': np.array(...)}
>>> labels = {"image": np.array(...), "instances": [...], "semantic": np.array(...)}
>>> transformed_labels = transform(labels)
"""
@ -93,7 +93,7 @@ class BaseTransform:
Examples:
>>> transform = BaseTransform()
>>> labels = {'instances': Instances(xyxy=torch.rand(5, 4), cls=torch.randint(0, 80, (5,)))}
>>> labels = {"instances": Instances(xyxy=torch.rand(5, 4), cls=torch.randint(0, 80, (5,)))}
>>> transformed_labels = transform.apply_instances(labels)
"""
pass
@ -135,7 +135,7 @@ class BaseTransform:
Examples:
>>> transform = BaseTransform()
>>> labels = {'img': np.random.rand(640, 640, 3), 'instances': []}
>>> labels = {"img": np.random.rand(640, 640, 3), "instances": []}
>>> transformed_labels = transform(labels)
"""
self.apply_image(labels)
@ -338,6 +338,7 @@ class BaseMixTransform:
... def _mix_transform(self, labels):
... # Implement custom mix logic here
... return labels
...
... def get_indexes(self):
... return [random.randint(0, len(self.dataset) - 1) for _ in range(3)]
>>> dataset = YourDataset()
@ -421,7 +422,7 @@ class BaseMixTransform:
Examples:
>>> transform = BaseMixTransform(dataset)
>>> labels = {'image': img, 'bboxes': boxes, 'mix_labels': [{'image': img2, 'bboxes': boxes2}]}
>>> labels = {"image": img, "bboxes": boxes, "mix_labels": [{"image": img2, "bboxes": boxes2}]}
>>> augmented_labels = transform._mix_transform(labels)
"""
raise NotImplementedError
@ -456,20 +457,17 @@ class BaseMixTransform:
Examples:
>>> labels = {
... 'texts': [['cat'], ['dog']],
... 'cls': torch.tensor([[0], [1]]),
... 'mix_labels': [{
... 'texts': [['bird'], ['fish']],
... 'cls': torch.tensor([[0], [1]])
... }]
... "texts": [["cat"], ["dog"]],
... "cls": torch.tensor([[0], [1]]),
... "mix_labels": [{"texts": [["bird"], ["fish"]], "cls": torch.tensor([[0], [1]])}],
... }
>>> updated_labels = self._update_label_text(labels)
>>> print(updated_labels['texts'])
>>> print(updated_labels["texts"])
[['cat'], ['dog'], ['bird'], ['fish']]
>>> print(updated_labels['cls'])
>>> print(updated_labels["cls"])
tensor([[0],
[1]])
>>> print(updated_labels['mix_labels'][0]['cls'])
>>> print(updated_labels["mix_labels"][0]["cls"])
tensor([[2],
[3]])
"""
@ -616,9 +614,12 @@ class Mosaic(BaseMixTransform):
Examples:
>>> mosaic = Mosaic(dataset, imgsz=640, p=1.0, n=3)
>>> labels = {'img': np.random.rand(480, 640, 3), 'mix_labels': [{'img': np.random.rand(480, 640, 3)} for _ in range(2)]}
>>> labels = {
... "img": np.random.rand(480, 640, 3),
... "mix_labels": [{"img": np.random.rand(480, 640, 3)} for _ in range(2)],
... }
>>> result = mosaic._mosaic3(labels)
>>> print(result['img'].shape)
>>> print(result["img"].shape)
(640, 640, 3)
"""
mosaic_labels = []
@ -670,9 +671,10 @@ class Mosaic(BaseMixTransform):
Examples:
>>> mosaic = Mosaic(dataset, imgsz=640, p=1.0, n=4)
>>> labels = {"img": np.random.rand(480, 640, 3), "mix_labels": [
... {"img": np.random.rand(480, 640, 3)} for _ in range(3)
... ]}
>>> labels = {
... "img": np.random.rand(480, 640, 3),
... "mix_labels": [{"img": np.random.rand(480, 640, 3)} for _ in range(3)],
... }
>>> result = mosaic._mosaic4(labels)
>>> assert result["img"].shape == (1280, 1280, 3)
"""
@ -734,7 +736,7 @@ class Mosaic(BaseMixTransform):
>>> mosaic = Mosaic(dataset, imgsz=640, p=1.0, n=9)
>>> input_labels = dataset[0]
>>> mosaic_result = mosaic._mosaic9(input_labels)
>>> mosaic_image = mosaic_result['img']
>>> mosaic_image = mosaic_result["img"]
"""
mosaic_labels = []
s = self.imgsz
@ -898,7 +900,7 @@ class MixUp(BaseMixTransform):
Examples:
>>> from ultralytics.data.dataset import YOLODataset
>>> dataset = YOLODataset('path/to/data.yaml')
>>> dataset = YOLODataset("path/to/data.yaml")
>>> mixup = MixUp(dataset, pre_transform=None, p=0.5)
"""
super().__init__(dataset=dataset, pre_transform=pre_transform, p=p)
@ -974,10 +976,10 @@ class RandomPerspective:
Examples:
>>> transform = RandomPerspective(degrees=10, translate=0.1, scale=0.1, shear=10)
>>> image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
>>> labels = {'img': image, 'cls': np.array([0, 1]), 'instances': Instances(...)}
>>> labels = {"img": image, "cls": np.array([0, 1]), "instances": Instances(...)}
>>> result = transform(labels)
>>> transformed_image = result['img']
>>> transformed_instances = result['instances']
>>> transformed_image = result["img"]
>>> transformed_instances = result["instances"]
"""
def __init__(
@ -1003,7 +1005,6 @@ class RandomPerspective:
>>> transform = RandomPerspective(degrees=10.0, translate=0.1, scale=0.5, shear=5.0)
>>> result = transform(labels) # Apply random perspective to labels
"""
self.degrees = degrees
self.translate = translate
self.scale = scale
@ -1036,7 +1037,6 @@ class RandomPerspective:
>>> border = (10, 10)
>>> transformed_img, matrix, scale = affine_transform(img, border)
"""
# Center
C = np.eye(3, dtype=np.float32)
@ -1209,12 +1209,12 @@ class RandomPerspective:
>>> transform = RandomPerspective()
>>> image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
>>> labels = {
... 'img': image,
... 'cls': np.array([0, 1, 2]),
... 'instances': Instances(bboxes=np.array([[10, 10, 50, 50], [100, 100, 150, 150]]))
... "img": image,
... "cls": np.array([0, 1, 2]),
... "instances": Instances(bboxes=np.array([[10, 10, 50, 50], [100, 100, 150, 150]])),
... }
>>> result = transform(labels)
>>> assert result['img'].shape[:2] == result['resized_shape']
>>> assert result["img"].shape[:2] == result["resized_shape"]
"""
if self.pre_transform and "mosaic_border" not in labels:
labels = self.pre_transform(labels)
@ -1358,9 +1358,9 @@ class RandomHSV:
Examples:
>>> hsv_augmenter = RandomHSV(hgain=0.5, sgain=0.5, vgain=0.5)
>>> labels = {'img': np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8)}
>>> labels = {"img": np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8)}
>>> hsv_augmenter(labels)
>>> augmented_img = labels['img']
>>> augmented_img = labels["img"]
"""
img = labels["img"]
if self.hgain or self.sgain or self.vgain:
@ -1394,7 +1394,7 @@ class RandomFlip:
__call__: Applies the random flip transformation to an image and its annotations.
Examples:
>>> transform = RandomFlip(p=0.5, direction='horizontal')
>>> transform = RandomFlip(p=0.5, direction="horizontal")
>>> result = transform({"img": image, "instances": instances})
>>> flipped_image = result["img"]
>>> flipped_instances = result["instances"]
@ -1416,8 +1416,8 @@ class RandomFlip:
AssertionError: If direction is not 'horizontal' or 'vertical', or if p is not between 0 and 1.
Examples:
>>> flip = RandomFlip(p=0.5, direction='horizontal')
>>> flip = RandomFlip(p=0.7, direction='vertical', flip_idx=[1, 0, 3, 2, 5, 4])
>>> flip = RandomFlip(p=0.5, direction="horizontal")
>>> flip = RandomFlip(p=0.7, direction="vertical", flip_idx=[1, 0, 3, 2, 5, 4])
"""
assert direction in {"horizontal", "vertical"}, f"Support direction `horizontal` or `vertical`, got {direction}"
assert 0 <= p <= 1.0, f"The probability should be in range [0, 1], but got {p}."
@ -1446,8 +1446,8 @@ class RandomFlip:
'instances' (ultralytics.utils.instance.Instances): Updated instances matching the flipped image.
Examples:
>>> labels = {'img': np.random.rand(640, 640, 3), 'instances': Instances(...)}
>>> random_flip = RandomFlip(p=0.5, direction='horizontal')
>>> labels = {"img": np.random.rand(640, 640, 3), "instances": Instances(...)}
>>> random_flip = RandomFlip(p=0.5, direction="horizontal")
>>> flipped_labels = random_flip(labels)
"""
img = labels["img"]
@ -1493,8 +1493,8 @@ class LetterBox:
Examples:
>>> transform = LetterBox(new_shape=(640, 640))
>>> result = transform(labels)
>>> resized_img = result['img']
>>> updated_instances = result['instances']
>>> resized_img = result["img"]
>>> updated_instances = result["instances"]
"""
def __init__(self, new_shape=(640, 640), auto=False, scaleFill=False, scaleup=True, center=True, stride=32):
@ -1548,9 +1548,9 @@ class LetterBox:
Examples:
>>> letterbox = LetterBox(new_shape=(640, 640))
>>> result = letterbox(labels={'img': np.zeros((480, 640, 3)), 'instances': Instances(...)})
>>> resized_img = result['img']
>>> updated_instances = result['instances']
>>> result = letterbox(labels={"img": np.zeros((480, 640, 3)), "instances": Instances(...)})
>>> resized_img = result["img"]
>>> updated_instances = result["instances"]
"""
if labels is None:
labels = {}
@ -1616,7 +1616,7 @@ class LetterBox:
Examples:
>>> letterbox = LetterBox(new_shape=(640, 640))
>>> labels = {'instances': Instances(...)}
>>> labels = {"instances": Instances(...)}
>>> ratio = (0.5, 0.5)
>>> padw, padh = 10, 20
>>> updated_labels = letterbox._update_labels(labels, ratio, padw, padh)
@ -1643,7 +1643,7 @@ class CopyPaste:
Examples:
>>> copypaste = CopyPaste(p=0.5)
>>> augmented_labels = copypaste(labels)
>>> augmented_image = augmented_labels['img']
>>> augmented_image = augmented_labels["img"]
"""
def __init__(self, p=0.5) -> None:
@ -1680,7 +1680,7 @@ class CopyPaste:
(Dict): Dictionary with augmented image and updated instances under 'img', 'cls', and 'instances' keys.
Examples:
>>> labels = {'img': np.random.rand(640, 640, 3), 'cls': np.array([0, 1, 2]), 'instances': Instances(...)}
>>> labels = {"img": np.random.rand(640, 640, 3), "cls": np.array([0, 1, 2]), "instances": Instances(...)}
>>> augmenter = CopyPaste(p=0.5)
>>> augmented_labels = augmenter(labels)
"""
@ -1765,8 +1765,8 @@ class Albumentations:
Examples:
>>> transform = Albumentations(p=0.5)
>>> augmented = transform(image=image, bboxes=bboxes, class_labels=classes)
>>> augmented_image = augmented['image']
>>> augmented_bboxes = augmented['bboxes']
>>> augmented_image = augmented["image"]
>>> augmented_bboxes = augmented["bboxes"]
Notes:
- Requires Albumentations version 1.0.3 or higher.
@ -1871,7 +1871,7 @@ class Albumentations:
>>> labels = {
... "img": np.random.rand(640, 640, 3),
... "cls": np.array([0, 1]),
... "instances": Instances(bboxes=np.array([[0, 0, 1, 1], [0.5, 0.5, 0.8, 0.8]]))
... "instances": Instances(bboxes=np.array([[0, 0, 1, 1], [0.5, 0.5, 0.8, 0.8]])),
... }
>>> augmented = transform(labels)
>>> assert augmented["img"].shape == (640, 640, 3)
@ -1927,11 +1927,11 @@ class Format:
_format_segments: Converts polygon points to bitmap masks.
Examples:
>>> formatter = Format(bbox_format='xywh', normalize=True, return_mask=True)
>>> formatter = Format(bbox_format="xywh", normalize=True, return_mask=True)
>>> formatted_labels = formatter(labels)
>>> img = formatted_labels['img']
>>> bboxes = formatted_labels['bboxes']
>>> masks = formatted_labels['masks']
>>> img = formatted_labels["img"]
>>> bboxes = formatted_labels["bboxes"]
>>> masks = formatted_labels["masks"]
"""
def __init__(
@ -1975,7 +1975,7 @@ class Format:
bgr (float): The probability to return BGR images.
Examples:
>>> format = Format(bbox_format='xyxy', return_mask=True, return_keypoint=False)
>>> format = Format(bbox_format="xyxy", return_mask=True, return_keypoint=False)
>>> print(format.bbox_format)
xyxy
"""
@ -2013,8 +2013,8 @@ class Format:
- 'batch_idx': Batch index tensor (if batch_idx is True).
Examples:
>>> formatter = Format(bbox_format='xywh', normalize=True, return_mask=True)
>>> labels = {'img': np.random.rand(640, 640, 3), 'cls': np.array([0, 1]), 'instances': Instances(...)}
>>> formatter = Format(bbox_format="xywh", normalize=True, return_mask=True)
>>> labels = {"img": np.random.rand(640, 640, 3), "cls": np.array([0, 1]), "instances": Instances(...)}
>>> formatted_labels = formatter(labels)
>>> print(formatted_labels.keys())
"""
@ -2275,8 +2275,8 @@ def v8_transforms(dataset, imgsz, hyp, stretch=False):
Examples:
>>> from ultralytics.data.dataset import YOLODataset
>>> dataset = YOLODataset(img_path='path/to/images', imgsz=640)
>>> hyp = {'mosaic': 1.0, 'copy_paste': 0.5, 'degrees': 10.0, 'translate': 0.2, 'scale': 0.9}
>>> dataset = YOLODataset(img_path="path/to/images", imgsz=640)
>>> hyp = {"mosaic": 1.0, "copy_paste": 0.5, "degrees": 10.0, "translate": 0.2, "scale": 0.9}
>>> transforms = v8_transforms(dataset, imgsz=640, hyp=hyp)
>>> augmented_data = transforms(dataset[0])
"""
@ -2343,7 +2343,7 @@ def classify_transforms(
Examples:
>>> transforms = classify_transforms(size=224)
>>> img = Image.open('path/to/image.jpg')
>>> img = Image.open("path/to/image.jpg")
>>> transformed_img = transforms(img)
"""
import torchvision.transforms as T # scope for faster 'import ultralytics'
@ -2415,7 +2415,7 @@ def classify_augmentations(
(torchvision.transforms.Compose): A composition of image augmentation transforms.
Examples:
>>> transforms = classify_augmentations(size=224, auto_augment='randaugment')
>>> transforms = classify_augmentations(size=224, auto_augment="randaugment")
>>> augmented_image = transforms(original_image)
"""
# Transforms to apply if Albumentations not installed

@ -298,10 +298,10 @@ class BaseDataset(Dataset):
im_file=im_file,
shape=shape, # format: (height, width)
cls=cls,
bboxes=bboxes, # xywh
bboxes=bboxes, # xywh
segments=segments, # xy
keypoints=keypoints, # xy
normalized=True, # or False
keypoints=keypoints, # xy
normalized=True, # or False
bbox_format="xyxy", # or xywh, ltwh
)
```

@ -115,7 +115,7 @@ def coco91_to_coco80_class():
def coco80_to_coco91_class():
"""
r"""
Converts 80-index (val2014) to 91-index (paper).
For details see https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/.
@ -123,8 +123,8 @@ def coco80_to_coco91_class():
```python
import numpy as np
a = np.loadtxt('data/coco.names', dtype='str', delimiter='\n')
b = np.loadtxt('data/coco_paper.names', dtype='str', delimiter='\n')
a = np.loadtxt("data/coco.names", dtype="str", delimiter="\n")
b = np.loadtxt("data/coco_paper.names", dtype="str", delimiter="\n")
x1 = [list(a[i] == b).index(True) + 1 for i in range(80)] # darknet to coco
x2 = [list(b[i] == a).index(True) if any(b[i] == a) else None for i in range(91)] # coco to darknet
```
@ -236,14 +236,13 @@ def convert_coco(
```python
from ultralytics.data.converter import convert_coco
convert_coco('../datasets/coco/annotations/', use_segments=True, use_keypoints=False, cls91to80=True)
convert_coco('../datasets/lvis/annotations/', use_segments=True, use_keypoints=False, cls91to80=False, lvis=True)
convert_coco("../datasets/coco/annotations/", use_segments=True, use_keypoints=False, cls91to80=True)
convert_coco("../datasets/lvis/annotations/", use_segments=True, use_keypoints=False, cls91to80=False, lvis=True)
```
Output:
Generates output files in the specified output directory.
"""
# Create dataset directory
save_dir = increment_path(save_dir) # increment if save directory already exists
for p in save_dir / "labels", save_dir / "images":
@ -351,7 +350,7 @@ def convert_segment_masks_to_yolo_seg(masks_dir, output_dir, classes):
from ultralytics.data.converter import convert_segment_masks_to_yolo_seg
# The classes here is the total classes in the dataset, for COCO dataset we have 80 classes
convert_segment_masks_to_yolo_seg('path/to/masks_directory', 'path/to/output/directory', classes=80)
convert_segment_masks_to_yolo_seg("path/to/masks_directory", "path/to/output/directory", classes=80)
```
Notes:
@ -429,7 +428,7 @@ def convert_dota_to_yolo_obb(dota_root_path: str):
```python
from ultralytics.data.converter import convert_dota_to_yolo_obb
convert_dota_to_yolo_obb('path/to/DOTA')
convert_dota_to_yolo_obb("path/to/DOTA")
```
Notes:

@ -163,7 +163,7 @@ class Explorer:
```python
exp = Explorer()
exp.create_embeddings_table()
similar = exp.query(img='https://ultralytics.com/images/zidane.jpg')
similar = exp.query(img="https://ultralytics.com/images/zidane.jpg")
```
"""
if self.table is None:
@ -226,6 +226,7 @@ class Explorer:
def plot_sql_query(self, query: str, labels: bool = True) -> Image.Image:
"""
Plot the results of a SQL-Like query on the table.
Args:
query (str): SQL query to run.
labels (bool): Whether to plot the labels or not.
@ -271,7 +272,7 @@ class Explorer:
```python
exp = Explorer()
exp.create_embeddings_table()
similar = exp.get_similar(img='https://ultralytics.com/images/zidane.jpg')
similar = exp.get_similar(img="https://ultralytics.com/images/zidane.jpg")
```
"""
assert return_type in {"pandas", "arrow"}, f"Return type should be `pandas` or `arrow`, but got {return_type}"
@ -306,7 +307,7 @@ class Explorer:
```python
exp = Explorer()
exp.create_embeddings_table()
similar = exp.plot_similar(img='https://ultralytics.com/images/zidane.jpg')
similar = exp.plot_similar(img="https://ultralytics.com/images/zidane.jpg")
```
"""
similar = self.get_similar(img, idx, limit, return_type="arrow")
@ -395,8 +396,8 @@ class Explorer:
exp.create_embeddings_table()
similarity_idx_plot = exp.plot_similarity_index()
similarity_idx_plot.show() # view image preview
similarity_idx_plot.save('path/to/save/similarity_index_plot.png') # save contents to file
similarity_idx_plot.show() # view image preview
similarity_idx_plot.save("path/to/save/similarity_index_plot.png") # save contents to file
```
"""
sim_idx = self.similarity_index(max_dist=max_dist, top_k=top_k, force=force)
@ -447,7 +448,7 @@ class Explorer:
```python
exp = Explorer()
exp.create_embeddings_table()
answer = exp.ask_ai('Show images with 1 person and 2 dogs')
answer = exp.ask_ai("Show images with 1 person and 2 dogs")
```
"""
result = prompt_sql_query(query)
@ -457,20 +458,3 @@ class Explorer:
LOGGER.error("AI generated query is not valid. Please try again with a different prompt")
LOGGER.error(e)
return None
def visualize(self, result):
"""
Visualize the results of a query. TODO.
Args:
result (pyarrow.Table): Table containing the results of a query.
"""
pass
def generate_report(self, result):
"""
Generate a report of the dataset.
TODO
"""
pass

@ -240,7 +240,7 @@ class LoadScreenshots:
return self
def __next__(self):
"""mss screen capture: get raw pixels from the screen as np array."""
"""Screen capture with 'mss' to get raw pixels from the screen as np array."""
im0 = np.asarray(self.sct.grab(self.monitor))[:, :, :3] # BGRA to BGR
s = f"screen {self.screen} (LTWH): {self.left},{self.top},{self.width},{self.height}: "

@ -19,11 +19,19 @@ from shapely.geometry import Polygon
def bbox_iof(polygon1, bbox2, eps=1e-6):
"""
Calculate iofs between bbox1 and bbox2.
Calculate Intersection over Foreground (IoF) between polygons and bounding boxes.
Args:
polygon1 (np.ndarray): Polygon coordinates, (n, 8).
bbox2 (np.ndarray): Bounding boxes, (n ,4).
polygon1 (np.ndarray): Polygon coordinates, shape (n, 8).
bbox2 (np.ndarray): Bounding boxes, shape (n, 4).
eps (float, optional): Small value to prevent division by zero. Defaults to 1e-6.
Returns:
(np.ndarray): IoF scores, shape (n, 1) or (n, m) if bbox2 is (m, 4).
Note:
Polygon format: [x1, y1, x2, y2, x3, y3, x4, y4].
Bounding box format: [x_min, y_min, x_max, y_max].
"""
polygon1 = polygon1.reshape(-1, 4, 2)
lt_point = np.min(polygon1, axis=-2) # left-top

@ -265,7 +265,6 @@ def check_det_dataset(dataset, autodownload=True):
Returns:
(dict): Parsed dataset information and paths.
"""
file = check_file(dataset)
# Download (optional)
@ -363,7 +362,6 @@ def check_cls_dataset(dataset, split=""):
- 'nc' (int): The number of classes in the dataset.
- 'names' (dict): A dictionary of class names in the dataset.
"""
# Download (optional if dataset=https://file.zip is passed directly)
if str(dataset).startswith(("http:/", "https:/")):
dataset = safe_download(dataset, dir=DATASETS_DIR, unzip=True, delete=False)
@ -438,11 +436,11 @@ class HUBDatasetStats:
```python
from ultralytics.data.utils import HUBDatasetStats
stats = HUBDatasetStats('path/to/coco8.zip', task='detect') # detect dataset
stats = HUBDatasetStats('path/to/coco8-seg.zip', task='segment') # segment dataset
stats = HUBDatasetStats('path/to/coco8-pose.zip', task='pose') # pose dataset
stats = HUBDatasetStats('path/to/dota8.zip', task='obb') # OBB dataset
stats = HUBDatasetStats('path/to/imagenet10.zip', task='classify') # classification dataset
stats = HUBDatasetStats("path/to/coco8.zip", task="detect") # detect dataset
stats = HUBDatasetStats("path/to/coco8-seg.zip", task="segment") # segment dataset
stats = HUBDatasetStats("path/to/coco8-pose.zip", task="pose") # pose dataset
stats = HUBDatasetStats("path/to/dota8.zip", task="obb") # OBB dataset
stats = HUBDatasetStats("path/to/imagenet10.zip", task="classify") # classification dataset
stats.get_json(save=True)
stats.process_images()
@ -598,11 +596,10 @@ def compress_one_image(f, f_new=None, max_dim=1920, quality=50):
from pathlib import Path
from ultralytics.data.utils import compress_one_image
for f in Path('path/to/dataset').rglob('*.jpg'):
for f in Path("path/to/dataset").rglob("*.jpg"):
compress_one_image(f)
```
"""
try: # use PIL
im = Image.open(f)
r = max_dim / max(im.height, im.width) # ratio
@ -635,7 +632,6 @@ def autosplit(path=DATASETS_DIR / "coco8/images", weights=(0.9, 0.1, 0.0), annot
autosplit()
```
"""
path = Path(path) # images dir
files = sorted(x for x in path.rglob("*.*") if x.suffix[1:].lower() in IMG_FORMATS) # image files only
n = len(files) # number of files

@ -1,6 +1,6 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
"""
Export a YOLOv8 PyTorch model to other formats. TensorFlow exports authored by https://github.com/zldrobit
Export a YOLOv8 PyTorch model to other formats. TensorFlow exports authored by https://github.com/zldrobit.
Format | `format=argument` | Model
--- | --- | ---
@ -533,9 +533,7 @@ class Exporter:
@try_export
def export_ncnn(self, prefix=colorstr("NCNN:")):
"""
YOLOv8 NCNN export using PNNX https://github.com/pnnx/pnnx.
"""
"""YOLOv8 NCNN export using PNNX https://github.com/pnnx/pnnx."""
check_requirements("ncnn")
import ncnn # noqa

@ -6,6 +6,7 @@ from typing import List, Union
import numpy as np
import torch
from PIL import Image
from ultralytics.cfg import TASK2DATA, get_cfg, get_save_dir
from ultralytics.engine.results import Results
@ -71,11 +72,11 @@ class Model(nn.Module):
Examples:
>>> from ultralytics import YOLO
>>> model = YOLO('yolov8n.pt')
>>> results = model.predict('image.jpg')
>>> model.train(data='coco128.yaml', epochs=3)
>>> model = YOLO("yolov8n.pt")
>>> results = model.predict("image.jpg")
>>> model.train(data="coco128.yaml", epochs=3)
>>> metrics = model.val()
>>> model.export(format='onnx')
>>> model.export(format="onnx")
"""
def __init__(
@ -143,7 +144,7 @@ class Model(nn.Module):
def __call__(
self,
source: Union[str, Path, int, list, tuple, np.ndarray, torch.Tensor] = None,
source: Union[str, Path, int, Image.Image, list, tuple, np.ndarray, torch.Tensor] = None,
stream: bool = False,
**kwargs,
) -> list:
@ -165,8 +166,8 @@ class Model(nn.Module):
Results object.
Examples:
>>> model = YOLO('yolov8n.pt')
>>> results = model('https://ultralytics.com/images/bus.jpg')
>>> model = YOLO("yolov8n.pt")
>>> results = model("https://ultralytics.com/images/bus.jpg")
>>> for r in results:
... print(f"Detected {len(r)} objects in image")
"""
@ -187,9 +188,9 @@ class Model(nn.Module):
(bool): True if the model string is a valid Triton Server URL, False otherwise.
Examples:
>>> Model.is_triton_model('http://localhost:8000/v2/models/yolov8n')
>>> Model.is_triton_model("http://localhost:8000/v2/models/yolov8n")
True
>>> Model.is_triton_model('yolov8n.pt')
>>> Model.is_triton_model("yolov8n.pt")
False
"""
from urllib.parse import urlsplit
@ -252,7 +253,7 @@ class Model(nn.Module):
Examples:
>>> model = Model()
>>> model._new('yolov8n.yaml', task='detect', verbose=True)
>>> model._new("yolov8n.yaml", task="detect", verbose=True)
"""
cfg_dict = yaml_model_load(cfg)
self.cfg = cfg
@ -283,8 +284,8 @@ class Model(nn.Module):
Examples:
>>> model = Model()
>>> model._load('yolov8n.pt')
>>> model._load('path/to/weights.pth', task='detect')
>>> model._load("yolov8n.pt")
>>> model._load("path/to/weights.pth", task="detect")
"""
if weights.lower().startswith(("https://", "http://", "rtsp://", "rtmp://", "tcp://")):
weights = checks.check_file(weights, download_dir=SETTINGS["weights_dir"]) # download and return local file
@ -347,7 +348,7 @@ class Model(nn.Module):
AssertionError: If the model is not a PyTorch model.
Examples:
>>> model = Model('yolov8n.pt')
>>> model = Model("yolov8n.pt")
>>> model.reset_weights()
"""
self._check_is_pytorch_model()
@ -376,8 +377,8 @@ class Model(nn.Module):
Examples:
>>> model = Model()
>>> model.load('yolov8n.pt')
>>> model.load(Path('path/to/weights.pt'))
>>> model.load("yolov8n.pt")
>>> model.load(Path("path/to/weights.pt"))
"""
self._check_is_pytorch_model()
if isinstance(weights, (str, Path)):
@ -401,8 +402,8 @@ class Model(nn.Module):
AssertionError: If the model is not a PyTorch model.
Examples:
>>> model = Model('yolov8n.pt')
>>> model.save('my_model.pt')
>>> model = Model("yolov8n.pt")
>>> model.save("my_model.pt")
"""
self._check_is_pytorch_model()
from copy import deepcopy
@ -438,7 +439,7 @@ class Model(nn.Module):
TypeError: If the model is not a PyTorch model.
Examples:
>>> model = Model('yolov8n.pt')
>>> model = Model("yolov8n.pt")
>>> model.info() # Prints model summary
>>> info_list = model.info(detailed=True, verbose=False) # Returns detailed info as a list
"""
@ -493,8 +494,8 @@ class Model(nn.Module):
AssertionError: If the model is not a PyTorch model.
Examples:
>>> model = YOLO('yolov8n.pt')
>>> image = 'https://ultralytics.com/images/bus.jpg'
>>> model = YOLO("yolov8n.pt")
>>> image = "https://ultralytics.com/images/bus.jpg"
>>> embeddings = model.embed(image)
>>> print(embeddings[0].shape)
"""
@ -504,7 +505,7 @@ class Model(nn.Module):
def predict(
self,
source: Union[str, Path, int, list, tuple, np.ndarray, torch.Tensor] = None,
source: Union[str, Path, int, Image.Image, list, tuple, np.ndarray, torch.Tensor] = None,
stream: bool = False,
predictor=None,
**kwargs,
@ -517,7 +518,7 @@ class Model(nn.Module):
types of image sources and can operate in a streaming mode.
Args:
source (str | Path | int | List[str] | List[Path] | List[int] | np.ndarray | torch.Tensor): The source
source (str | Path | int | PIL.Image | np.ndarray | torch.Tensor | List | Tuple): The source
of the image(s) to make predictions on. Accepts various types including file paths, URLs, PIL
images, numpy arrays, and torch tensors.
stream (bool): If True, treats the input source as a continuous stream for predictions.
@ -530,8 +531,8 @@ class Model(nn.Module):
Results object.
Examples:
>>> model = YOLO('yolov8n.pt')
>>> results = model.predict(source='path/to/image.jpg', conf=0.25)
>>> model = YOLO("yolov8n.pt")
>>> results = model.predict(source="path/to/image.jpg", conf=0.25)
>>> for r in results:
... print(r.boxes.data) # print detection bounding boxes
@ -591,8 +592,8 @@ class Model(nn.Module):
AttributeError: If the predictor does not have registered trackers.
Examples:
>>> model = YOLO('yolov8n.pt')
>>> results = model.track(source='path/to/video.mp4', show=True)
>>> model = YOLO("yolov8n.pt")
>>> results = model.track(source="path/to/video.mp4", show=True)
>>> for r in results:
... print(r.boxes.id) # print tracking IDs
@ -634,8 +635,8 @@ class Model(nn.Module):
AssertionError: If the model is not a PyTorch model.
Examples:
>>> model = YOLO('yolov8n.pt')
>>> results = model.val(data='coco128.yaml', imgsz=640)
>>> model = YOLO("yolov8n.pt")
>>> results = model.val(data="coco128.yaml", imgsz=640)
>>> print(results.box.map) # Print mAP50-95
"""
custom = {"rect": True} # method defaults
@ -676,8 +677,8 @@ class Model(nn.Module):
AssertionError: If the model is not a PyTorch model.
Examples:
>>> model = YOLO('yolov8n.pt')
>>> results = model.benchmark(data='coco8.yaml', imgsz=640, half=True)
>>> model = YOLO("yolov8n.pt")
>>> results = model.benchmark(data="coco8.yaml", imgsz=640, half=True)
>>> print(results)
"""
self._check_is_pytorch_model()
@ -726,8 +727,8 @@ class Model(nn.Module):
RuntimeError: If the export process fails due to errors.
Examples:
>>> model = YOLO('yolov8n.pt')
>>> model.export(format='onnx', dynamic=True, simplify=True)
>>> model = YOLO("yolov8n.pt")
>>> model.export(format="onnx", dynamic=True, simplify=True)
'path/to/exported/model.onnx'
"""
self._check_is_pytorch_model()
@ -781,8 +782,8 @@ class Model(nn.Module):
ModuleNotFoundError: If the HUB SDK is not installed.
Examples:
>>> model = YOLO('yolov8n.pt')
>>> results = model.train(data='coco128.yaml', epochs=3)
>>> model = YOLO("yolov8n.pt")
>>> results = model.train(data="coco128.yaml", epochs=3)
"""
self._check_is_pytorch_model()
if hasattr(self.session, "model") and self.session.model.id: # Ultralytics HUB session with loaded model
@ -846,7 +847,7 @@ class Model(nn.Module):
AssertionError: If the model is not a PyTorch model.
Examples:
>>> model = YOLO('yolov8n.pt')
>>> model = YOLO("yolov8n.pt")
>>> results = model.tune(use_ray=True, iterations=20)
>>> print(results)
"""
@ -900,15 +901,15 @@ class Model(nn.Module):
initialized, it sets it up before retrieving the names.
Returns:
(List[str]): A list of class names associated with the model.
(Dict[int, str]): A dict of class names associated with the model.
Raises:
AttributeError: If the model or predictor does not have a 'names' attribute.
Examples:
>>> model = YOLO('yolov8n.pt')
>>> model = YOLO("yolov8n.pt")
>>> print(model.names)
['person', 'bicycle', 'car', ...]
{0: 'person', 1: 'bicycle', 2: 'car', ...}
"""
from ultralytics.nn.autobackend import check_class_names
@ -956,7 +957,7 @@ class Model(nn.Module):
(object | None): The transform object of the model if available, otherwise None.
Examples:
>>> model = YOLO('yolov8n.pt')
>>> model = YOLO("yolov8n.pt")
>>> transforms = model.transforms
>>> if transforms:
... print(f"Model transforms: {transforms}")
@ -985,9 +986,9 @@ class Model(nn.Module):
Examples:
>>> def on_train_start(trainer):
... print("Training is starting!")
>>> model = YOLO('yolov8n.pt')
>>> model = YOLO("yolov8n.pt")
>>> model.add_callback("on_train_start", on_train_start)
>>> model.train(data='coco128.yaml', epochs=1)
>>> model.train(data="coco128.yaml", epochs=1)
"""
self.callbacks[event].append(func)
@ -1004,9 +1005,9 @@ class Model(nn.Module):
recognized by the Ultralytics callback system.
Examples:
>>> model = YOLO('yolov8n.pt')
>>> model.add_callback('on_train_start', lambda: print('Training started'))
>>> model.clear_callback('on_train_start')
>>> model = YOLO("yolov8n.pt")
>>> model.add_callback("on_train_start", lambda: print("Training started"))
>>> model.clear_callback("on_train_start")
>>> # All callbacks for 'on_train_start' are now removed
Notes:
@ -1034,8 +1035,8 @@ class Model(nn.Module):
modifications, ensuring consistent behavior across different runs or experiments.
Examples:
>>> model = YOLO('yolov8n.pt')
>>> model.add_callback('on_train_start', custom_function)
>>> model = YOLO("yolov8n.pt")
>>> model.add_callback("on_train_start", custom_function)
>>> model.reset_callbacks()
# All callbacks are now reset to their default functions
"""
@ -1058,7 +1059,7 @@ class Model(nn.Module):
(dict): A new dictionary containing only the specified include keys from the input arguments.
Examples:
>>> original_args = {'imgsz': 640, 'data': 'coco.yaml', 'task': 'detect', 'batch': 16, 'epochs': 100}
>>> original_args = {"imgsz": 640, "data": "coco.yaml", "task": "detect", "batch": 16, "epochs": 100}
>>> reset_args = Model._reset_ckpt_args(original_args)
>>> print(reset_args)
{'imgsz': 640, 'data': 'coco.yaml', 'task': 'detect'}
@ -1089,9 +1090,9 @@ class Model(nn.Module):
NotImplementedError: If the specified key is not supported for the current task.
Examples:
>>> model = Model(task='detect')
>>> predictor = model._smart_load('predictor')
>>> trainer = model._smart_load('trainer')
>>> model = Model(task="detect")
>>> predictor = model._smart_load("predictor")
>>> trainer = model._smart_load("trainer")
Notes:
- This method is typically used internally by other methods of the Model class.
@ -1127,8 +1128,8 @@ class Model(nn.Module):
Examples:
>>> model = Model()
>>> task_map = model.task_map
>>> detect_class_map = task_map['detect']
>>> segment_class_map = task_map['segment']
>>> detect_class_map = task_map["detect"]
>>> segment_class_map = task_map["segment"]
Note:
The actual implementation of this method may vary depending on the specific tasks and

@ -384,7 +384,7 @@ class BasePredictor:
cv2.imwrite(save_path, im)
def show(self, p=""):
"""Display an image in a window using OpenCV imshow()."""
"""Display an image in a window using the OpenCV imshow function."""
im = self.plotted_img
if platform.system() == "Linux" and p not in self.windows:
self.windows.append(p)

@ -143,7 +143,7 @@ class BaseTensor(SimpleClass):
Examples:
>>> base_tensor = BaseTensor(torch.randn(3, 4), orig_shape=(480, 640))
>>> cuda_tensor = base_tensor.to('cuda')
>>> cuda_tensor = base_tensor.to("cuda")
>>> float16_tensor = base_tensor.to(dtype=torch.float16)
"""
return self.__class__(torch.as_tensor(self.data).to(*args, **kwargs), self.orig_shape)
@ -223,7 +223,7 @@ class Results(SimpleClass):
>>> for result in results:
... print(result.boxes) # Print detection boxes
... result.show() # Display the annotated image
... result.save(filename='result.jpg') # Save annotated image
... result.save(filename="result.jpg") # Save annotated image
"""
def __init__(
@ -280,7 +280,7 @@ class Results(SimpleClass):
(Results): A new Results object containing the specified subset of inference results.
Examples:
>>> results = model('path/to/image.jpg') # Perform inference
>>> results = model("path/to/image.jpg") # Perform inference
>>> single_result = results[0] # Get the first result
>>> subset_results = results[1:4] # Get a slice of results
"""
@ -319,7 +319,7 @@ class Results(SimpleClass):
obb (torch.Tensor | None): A tensor of shape (N, 5) containing oriented bounding box coordinates.
Examples:
>>> results = model('image.jpg')
>>> results = model("image.jpg")
>>> new_boxes = torch.tensor([[100, 100, 200, 200, 0.9, 0]])
>>> results[0].update(boxes=new_boxes)
"""
@ -370,7 +370,7 @@ class Results(SimpleClass):
(Results): A new Results object with all tensor attributes on CPU memory.
Examples:
>>> results = model('path/to/image.jpg') # Perform inference
>>> results = model("path/to/image.jpg") # Perform inference
>>> cpu_result = results[0].cpu() # Move the first result to CPU
>>> print(cpu_result.boxes.device) # Output: cpu
"""
@ -384,7 +384,7 @@ class Results(SimpleClass):
(Results): A new Results object with all tensors converted to numpy arrays.
Examples:
>>> results = model('path/to/image.jpg')
>>> results = model("path/to/image.jpg")
>>> numpy_result = results[0].numpy()
>>> type(numpy_result.boxes.data)
<class 'numpy.ndarray'>
@ -488,7 +488,7 @@ class Results(SimpleClass):
(np.ndarray): Annotated image as a numpy array.
Examples:
>>> results = model('image.jpg')
>>> results = model("image.jpg")
>>> for result in results:
... im = result.plot()
... im.show()
@ -578,7 +578,7 @@ class Results(SimpleClass):
**kwargs (Any): Arbitrary keyword arguments to be passed to the `plot()` method.
Examples:
>>> results = model('path/to/image.jpg')
>>> results = model("path/to/image.jpg")
>>> results[0].show() # Display the first result
>>> for result in results:
... result.show() # Display all results
@ -599,12 +599,12 @@ class Results(SimpleClass):
**kwargs (Any): Arbitrary keyword arguments to be passed to the `plot` method.
Examples:
>>> results = model('path/to/image.jpg')
>>> results = model("path/to/image.jpg")
>>> for result in results:
... result.save('annotated_image.jpg')
... result.save("annotated_image.jpg")
>>> # Or with custom plot arguments
>>> for result in results:
... result.save('annotated_image.jpg', conf=False, line_width=2)
... result.save("annotated_image.jpg", conf=False, line_width=2)
"""
if not filename:
filename = f"results_{Path(self.path).name}"
@ -623,7 +623,7 @@ class Results(SimpleClass):
number of detections per class. For classification tasks, it includes the top 5 class probabilities.
Examples:
>>> results = model('path/to/image.jpg')
>>> results = model("path/to/image.jpg")
>>> for result in results:
... print(result.verbose())
2 persons, 1 car, 3 traffic lights,
@ -660,7 +660,7 @@ class Results(SimpleClass):
Examples:
>>> from ultralytics import YOLO
>>> model = YOLO('yolov8n.pt')
>>> model = YOLO("yolov8n.pt")
>>> results = model("path/to/image.jpg")
>>> for result in results:
... result.save_txt("output.txt")
@ -757,7 +757,7 @@ class Results(SimpleClass):
task type (classification or detection) and available information (boxes, masks, keypoints).
Examples:
>>> results = model('image.jpg')
>>> results = model("image.jpg")
>>> summary = results[0].summary()
>>> print(summary)
"""
@ -919,7 +919,7 @@ class Boxes(BaseTensor):
coordinates in [x1, y1, x2, y2] format, where n is the number of boxes.
Examples:
>>> results = model('image.jpg')
>>> results = model("image.jpg")
>>> boxes = results[0].boxes
>>> xyxy = boxes.xyxy
>>> print(xyxy)
@ -953,7 +953,7 @@ class Boxes(BaseTensor):
The shape is (N,), where N is the number of boxes.
Examples:
>>> results = model('image.jpg')
>>> results = model("image.jpg")
>>> boxes = results[0].boxes
>>> class_ids = boxes.cls
>>> print(class_ids) # tensor([0., 2., 1.])
@ -970,7 +970,7 @@ class Boxes(BaseTensor):
otherwise None. Shape is (N,) where N is the number of boxes.
Examples:
>>> results = model.track('path/to/video.mp4')
>>> results = model.track("path/to/video.mp4")
>>> for result in results:
... boxes = result.boxes
... if boxes.is_track:
@ -992,8 +992,8 @@ class Boxes(BaseTensor):
Convert bounding boxes from [x1, y1, x2, y2] format to [x, y, width, height] format.
Returns:
(torch.Tensor | numpy.ndarray): Boxes in [x, y, width, height] format, where x, y are the coordinates of
the top-left corner of the bounding box, width, height are the dimensions of the bounding box and the
(torch.Tensor | numpy.ndarray): Boxes in [x_center, y_center, width, height] format, where x_center, y_center are the coordinates of
the center point of the bounding box, width, height are the dimensions of the bounding box and the
shape of the returned tensor is (N, 4), where N is the number of boxes.
Examples:
@ -1116,7 +1116,7 @@ class Masks(BaseTensor):
mask contour.
Examples:
>>> results = model('image.jpg')
>>> results = model("image.jpg")
>>> masks = results[0].masks
>>> normalized_coords = masks.xyn
>>> print(normalized_coords[0]) # Normalized coordinates of the first mask
@ -1141,7 +1141,7 @@ class Masks(BaseTensor):
number of points in the segment.
Examples:
>>> results = model('image.jpg')
>>> results = model("image.jpg")
>>> masks = results[0].masks
>>> xy_coords = masks.xy
>>> print(len(xy_coords)) # Number of masks
@ -1223,7 +1223,7 @@ class Keypoints(BaseTensor):
the number of detections and K is the number of keypoints per detection.
Examples:
>>> results = model('image.jpg')
>>> results = model("image.jpg")
>>> keypoints = results[0].keypoints
>>> xy = keypoints.xy
>>> print(xy.shape) # (N, K, 2)
@ -1388,7 +1388,7 @@ class Probs(BaseTensor):
(torch.Tensor | numpy.ndarray): A tensor containing the confidence score of the top 1 class.
Examples:
>>> results = model('image.jpg') # classify an image
>>> results = model("image.jpg") # classify an image
>>> probs = results[0].probs # get classification probabilities
>>> top1_confidence = probs.top1conf # get confidence of top 1 class
>>> print(f"Top 1 class confidence: {top1_confidence.item():.4f}")
@ -1410,7 +1410,7 @@ class Probs(BaseTensor):
top 5 predicted classes, sorted in descending order of probability.
Examples:
>>> results = model('image.jpg')
>>> results = model("image.jpg")
>>> probs = results[0].probs
>>> top5_conf = probs.top5conf
>>> print(top5_conf) # Prints confidence scores for top 5 classes
@ -1497,7 +1497,7 @@ class OBB(BaseTensor):
[x_center, y_center, width, height, rotation]. The shape is (N, 5) where N is the number of boxes.
Examples:
>>> results = model('image.jpg')
>>> results = model("image.jpg")
>>> obb = results[0].obb
>>> xywhr = obb.xywhr
>>> print(xywhr.shape)
@ -1518,7 +1518,7 @@ class OBB(BaseTensor):
for N detections, where each score is in the range [0, 1].
Examples:
>>> results = model('image.jpg')
>>> results = model("image.jpg")
>>> obb_result = results[0].obb
>>> confidence_scores = obb_result.conf
>>> print(confidence_scores)
@ -1535,7 +1535,7 @@ class OBB(BaseTensor):
bounding box. The shape is (N,), where N is the number of boxes.
Examples:
>>> results = model('image.jpg')
>>> results = model("image.jpg")
>>> result = results[0]
>>> obb = result.obb
>>> class_values = obb.cls
@ -1553,7 +1553,7 @@ class OBB(BaseTensor):
oriented bounding box. Returns None if tracking IDs are not available.
Examples:
>>> results = model('image.jpg', tracker=True) # Run inference with tracking
>>> results = model("image.jpg", tracker=True) # Run inference with tracking
>>> for result in results:
... if result.obb is not None:
... track_ids = result.obb.id
@ -1620,8 +1620,8 @@ class OBB(BaseTensor):
Examples:
>>> import torch
>>> from ultralytics import YOLO
>>> model = YOLO('yolov8n-obb.pt')
>>> results = model('path/to/image.jpg')
>>> model = YOLO("yolov8n-obb.pt")
>>> results = model("path/to/image.jpg")
>>> for result in results:
... obb = result.obb
... if obb is not None:

@ -56,8 +56,6 @@ from ultralytics.utils.torch_utils import (
class BaseTrainer:
"""
BaseTrainer.
A base class for creating trainers.
Attributes:
@ -230,7 +228,6 @@ class BaseTrainer:
def _setup_train(self, world_size):
"""Builds dataloaders and optimizer on correct rank process."""
# Model
self.run_callbacks("on_pretrain_routine_start")
ckpt = self.setup_model()
@ -478,12 +475,16 @@ class BaseTrainer:
torch.cuda.empty_cache()
self.run_callbacks("teardown")
def read_results_csv(self):
"""Read results.csv into a dict using pandas."""
import pandas as pd # scope for faster 'import ultralytics'
return {k.strip(): v for k, v in pd.read_csv(self.csv).to_dict(orient="list").items()}
def save_model(self):
"""Save model training checkpoints with additional metadata."""
import io
import pandas as pd # scope for faster 'import ultralytics'
# Serialize ckpt to a byte buffer once (faster than repeated torch.save() calls)
buffer = io.BytesIO()
torch.save(
@ -496,7 +497,7 @@ class BaseTrainer:
"optimizer": convert_optimizer_state_dict_to_fp16(deepcopy(self.optimizer.state_dict())),
"train_args": vars(self.args), # save as dict
"train_metrics": {**self.metrics, **{"fitness": self.fitness}},
"train_results": {k.strip(): v for k, v in pd.read_csv(self.csv).to_dict(orient="list").items()},
"train_results": self.read_results_csv(),
"date": datetime.now().isoformat(),
"version": __version__,
"license": "AGPL-3.0 (https://ultralytics.com/license)",
@ -636,7 +637,7 @@ class BaseTrainer:
pass
def on_plot(self, name, data=None):
"""Registers plots (e.g. to be consumed in callbacks)"""
"""Registers plots (e.g. to be consumed in callbacks)."""
path = Path(name)
self.plots[path] = {"data": data, "timestamp": time.time()}
@ -646,6 +647,9 @@ class BaseTrainer:
if f.exists():
strip_optimizer(f) # strip optimizers
if f is self.best:
if self.last.is_file(): # update best.pt train_metrics from last.pt
k = "train_results"
torch.save({**torch.load(self.best), **{k: torch.load(self.last)[k]}}, self.best)
LOGGER.info(f"\nValidating {f}...")
self.validator.args.plots = self.args.plots
self.metrics = self.validator(model=f)
@ -732,7 +736,6 @@ class BaseTrainer:
Returns:
(torch.optim.Optimizer): The constructed optimizer.
"""
g = [], [], [] # optimizer parameter groups
bn = tuple(v for k, v in nn.__dict__.items() if "Norm" in k) # normalization layers, i.e. BatchNorm2d()
if name == "auto":

@ -1,7 +1,7 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
"""
This module provides functionalities for hyperparameter tuning of the Ultralytics YOLO models for object detection,
instance segmentation, image classification, pose estimation, and multi-object tracking.
Module provides functionalities for hyperparameter tuning of the Ultralytics YOLO models for object detection, instance
segmentation, image classification, pose estimation, and multi-object tracking.
Hyperparameter tuning is the process of systematically searching for the optimal set of hyperparameters
that yield the best model performance. This is particularly crucial in deep learning models like YOLO,
@ -12,8 +12,8 @@ Example:
```python
from ultralytics import YOLO
model = YOLO('yolov8n.pt')
model.tune(data='coco8.yaml', epochs=10, iterations=300, optimizer='AdamW', plots=False, save=False, val=False)
model = YOLO("yolov8n.pt")
model.tune(data="coco8.yaml", epochs=10, iterations=300, optimizer="AdamW", plots=False, save=False, val=False)
```
"""
@ -54,15 +54,15 @@ class Tuner:
```python
from ultralytics import YOLO
model = YOLO('yolov8n.pt')
model.tune(data='coco8.yaml', epochs=10, iterations=300, optimizer='AdamW', plots=False, save=False, val=False)
model = YOLO("yolov8n.pt")
model.tune(data="coco8.yaml", epochs=10, iterations=300, optimizer="AdamW", plots=False, save=False, val=False)
```
Tune with custom search space.
```python
from ultralytics import YOLO
model = YOLO('yolov8n.pt')
model = YOLO("yolov8n.pt")
model.tune(space={key1: val1, key2: val2}) # custom search space dictionary
```
"""
@ -176,7 +176,6 @@ class Tuner:
The method utilizes the `self.tune_csv` Path object to read and log hyperparameters and fitness scores.
Ensure this path is set correctly in the Tuner instance.
"""
t0 = time.time()
best_save_dir, best_metrics = None, None
(self.tune_dir / "weights").mkdir(parents=True, exist_ok=True)

@ -104,9 +104,7 @@ class BaseValidator:
@smart_inference_mode()
def __call__(self, trainer=None, model=None):
"""Supports validation of a pre-trained model if passed or a model being trained if trainer is passed (trainer
gets priority).
"""
"""Executes validation process, running inference on dataloader and computing performance metrics."""
self.training = trainer is not None
augment = self.args.augment and (not self.training)
if self.training:
@ -280,7 +278,7 @@ class BaseValidator:
return batch
def postprocess(self, preds):
"""Describes and summarizes the purpose of 'postprocess()' but no details mentioned."""
"""Preprocesses the predictions."""
return preds
def init_metrics(self, model):
@ -317,7 +315,7 @@ class BaseValidator:
return []
def on_plot(self, name, data=None):
"""Registers plots (e.g. to be consumed in callbacks)"""
"""Registers plots (e.g. to be consumed in callbacks)."""
self.plots[Path(name)] = {"data": data, "timestamp": time.time()}
# TODO: may need to put these following functions into callback

@ -136,11 +136,11 @@ def check_dataset(path: str, task: str) -> None:
```python
from ultralytics.hub import check_dataset
check_dataset('path/to/coco8.zip', task='detect') # detect dataset
check_dataset('path/to/coco8-seg.zip', task='segment') # segment dataset
check_dataset('path/to/coco8-pose.zip', task='pose') # pose dataset
check_dataset('path/to/dota8.zip', task='obb') # OBB dataset
check_dataset('path/to/imagenet10.zip', task='classify') # classification dataset
check_dataset("path/to/coco8.zip", task="detect") # detect dataset
check_dataset("path/to/coco8-seg.zip", task="segment") # segment dataset
check_dataset("path/to/coco8-pose.zip", task="pose") # pose dataset
check_dataset("path/to/dota8.zip", task="obb") # OBB dataset
check_dataset("path/to/imagenet10.zip", task="classify") # classification dataset
```
"""
HUBDatasetStats(path=path, task=task).get_json()

@ -27,10 +27,14 @@ class Auth:
def __init__(self, api_key="", verbose=False):
"""
Initialize the Auth class with an optional API key.
Initialize Auth class and authenticate user.
Handles API key validation, Google Colab authentication, and new key requests. Updates SETTINGS upon successful
authentication.
Args:
api_key (str, optional): May be an API key or a combination API key and model ID, i.e. key_id
api_key (str): API key or combined key_id format.
verbose (bool): Enable verbose logging.
"""
# Split the input API key in case it contains a combined key_model and keep only the API key part
api_key = api_key.split("_")[0]

@ -1,5 +1,6 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
import shutil
import threading
import time
from http import HTTPStatus
@ -158,7 +159,6 @@ class HUBTrainingSession:
Raises:
HUBModelError: If the identifier format is not recognized.
"""
# Initialize variables
api_key, model_id, filename = None, None, None
@ -199,7 +199,6 @@ class HUBTrainingSession:
ValueError: If the model is already trained, if required dataset information is missing, or if there are
issues with the provided training arguments.
"""
if self.model.is_resumable():
# Model has saved weights
self.train_args = {"data": self.model.get_dataset_url(), "resume": True}
@ -275,7 +274,7 @@ class HUBTrainingSession:
# if request related to metrics upload and exceed retries
if response is None and kwargs.get("metrics"):
self.metrics_upload_failed_queue.update(kwargs.get("metrics", None))
self.metrics_upload_failed_queue.update(kwargs.get("metrics"))
return response
@ -344,23 +343,34 @@ class HUBTrainingSession:
map (float): Mean average precision of the model.
final (bool): Indicates if the model is the final model after training.
"""
if Path(weights).is_file():
progress_total = Path(weights).stat().st_size if final else None # Only show progress if final
self.request_queue(
self.model.upload_model,
epoch=epoch,
weights=weights,
is_best=is_best,
map=map,
final=final,
retry=10,
timeout=3600,
thread=not final,
progress_total=progress_total,
stream_response=True,
)
else:
LOGGER.warning(f"{PREFIX}WARNING ⚠ Model upload issue. Missing model {weights}.")
weights = Path(weights)
if not weights.is_file():
last = weights.with_name("last" + weights.suffix)
if final and last.is_file():
LOGGER.warning(
f"{PREFIX} WARNING ⚠ Model 'best.pt' not found, copying 'last.pt' to 'best.pt' and uploading. "
"This often happens when resuming training in transient environments like Google Colab. "
"For more reliable training, consider using Ultralytics HUB Cloud. "
"Learn more at https://docs.ultralytics.com/hub/cloud-training."
)
shutil.copy(last, weights) # copy last.pt to best.pt
else:
LOGGER.warning(f"{PREFIX} WARNING ⚠ Model upload issue. Missing model {weights}.")
return
self.request_queue(
self.model.upload_model,
epoch=epoch,
weights=str(weights),
is_best=is_best,
map=map,
final=final,
retry=10,
timeout=3600,
thread=not final,
progress_total=weights.stat().st_size if final else None, # only show progress if final
stream_response=True,
)
@staticmethod
def _show_upload_progress(content_length: int, response: requests.Response) -> None:

@ -16,8 +16,8 @@ class FastSAM(Model):
```python
from ultralytics import FastSAM
model = FastSAM('last.pt')
results = model.predict('ultralytics/assets/bus.jpg')
model = FastSAM("last.pt")
results = model.predict("ultralytics/assets/bus.jpg")
```
"""
@ -30,18 +30,21 @@ class FastSAM(Model):
def predict(self, source, stream=False, bboxes=None, points=None, labels=None, texts=None, **kwargs):
"""
Performs segmentation prediction on the given image or video source.
Perform segmentation prediction on image or video source.
Supports prompted segmentation with bounding boxes, points, labels, and texts.
Args:
source (str): Path to the image or video file, or a PIL.Image object, or a numpy.ndarray object.
stream (bool, optional): If True, enables real-time streaming. Defaults to False.
bboxes (list, optional): List of bounding box coordinates for prompted segmentation. Defaults to None.
points (list, optional): List of points for prompted segmentation. Defaults to None.
labels (list, optional): List of labels for prompted segmentation. Defaults to None.
texts (list, optional): List of texts for prompted segmentation. Defaults to None.
source (str | PIL.Image | numpy.ndarray): Input source.
stream (bool): Enable real-time streaming.
bboxes (list): Bounding box coordinates for prompted segmentation.
points (list): Points for prompted segmentation.
labels (list): Labels for prompted segmentation.
texts (list): Texts for prompted segmentation.
**kwargs (Any): Additional keyword arguments.
Returns:
(list): The model predictions.
(list): Model predictions.
"""
prompts = dict(bboxes=bboxes, points=points, labels=labels, texts=texts)
return super().predict(source, stream, prompts=prompts, **kwargs)

@ -92,8 +92,8 @@ class FastSAMPredictor(SegmentationPredictor):
if labels.sum() == 0 # all negative points
else torch.zeros(len(result), dtype=torch.bool, device=self.device)
)
for p, l in zip(points, labels):
point_idx[torch.nonzero(masks[:, p[1], p[0]], as_tuple=True)[0]] = True if l else False
for point, label in zip(points, labels):
point_idx[torch.nonzero(masks[:, point[1], point[0]], as_tuple=True)[0]] = True if label else False
idx |= point_idx
if texts is not None:
if isinstance(texts, str):

@ -13,7 +13,6 @@ def adjust_bboxes_to_image_border(boxes, image_shape, threshold=20):
Returns:
adjusted_boxes (torch.Tensor): adjusted bounding boxes
"""
# Image dimensions
h, w = image_shape

@ -6,8 +6,8 @@ Example:
```python
from ultralytics import NAS
model = NAS('yolo_nas_s')
results = model.predict('ultralytics/assets/bus.jpg')
model = NAS("yolo_nas_s")
results = model.predict("ultralytics/assets/bus.jpg")
```
"""
@ -34,8 +34,8 @@ class NAS(Model):
```python
from ultralytics import NAS
model = NAS('yolo_nas_s')
results = model.predict('ultralytics/assets/bus.jpg')
model = NAS("yolo_nas_s")
results = model.predict("ultralytics/assets/bus.jpg")
```
Attributes:

@ -22,7 +22,7 @@ class NASPredictor(BasePredictor):
```python
from ultralytics import NAS
model = NAS('yolo_nas_s')
model = NAS("yolo_nas_s")
predictor = model.predictor
# Assumes that raw_preds, img, orig_imgs are available
results = predictor.postprocess(raw_preds, img, orig_imgs)
@ -34,7 +34,6 @@ class NASPredictor(BasePredictor):
def postprocess(self, preds_in, img, orig_imgs):
"""Postprocess predictions and returns a list of Results objects."""
# Cat boxes and class scores
boxes = ops.xyxy2xywh(preds_in[0][0])
preds = torch.cat((boxes, preds_in[0][1]), -1).permute(0, 2, 1)

@ -24,7 +24,7 @@ class NASValidator(DetectionValidator):
```python
from ultralytics import NAS
model = NAS('yolo_nas_s')
model = NAS("yolo_nas_s")
validator = model.validator
# Assumes that raw_preds are available
final_preds = validator.postprocess(raw_preds)

@ -21,7 +21,7 @@ class RTDETRPredictor(BasePredictor):
from ultralytics.utils import ASSETS
from ultralytics.models.rtdetr import RTDETRPredictor
args = dict(model='rtdetr-l.pt', source=ASSETS)
args = dict(model="rtdetr-l.pt", source=ASSETS)
predictor = RTDETRPredictor(overrides=args)
predictor.predict_cli()
```

@ -25,7 +25,7 @@ class RTDETRTrainer(DetectionTrainer):
```python
from ultralytics.models.rtdetr.train import RTDETRTrainer
args = dict(model='rtdetr-l.yaml', data='coco8.yaml', imgsz=640, epochs=3)
args = dict(model="rtdetr-l.yaml", data="coco8.yaml", imgsz=640, epochs=3)
trainer = RTDETRTrainer(overrides=args)
trainer.train()
```

@ -62,7 +62,7 @@ class RTDETRValidator(DetectionValidator):
```python
from ultralytics.models.rtdetr import RTDETRValidator
args = dict(model='rtdetr-l.pt', data='coco8.yaml')
args = dict(model="rtdetr-l.pt", data="coco8.yaml")
validator = RTDETRValidator(args=args)
validator()
```

@ -41,8 +41,8 @@ class SAM(Model):
info: Logs information about the SAM model.
Examples:
>>> sam = SAM('sam_b.pt')
>>> results = sam.predict('image.jpg', points=[[500, 375]])
>>> sam = SAM("sam_b.pt")
>>> results = sam.predict("image.jpg", points=[[500, 375]])
>>> for r in results:
>>> print(f"Detected {len(r.masks)} masks")
"""
@ -58,7 +58,7 @@ class SAM(Model):
NotImplementedError: If the model file extension is not .pt or .pth.
Examples:
>>> sam = SAM('sam_b.pt')
>>> sam = SAM("sam_b.pt")
>>> print(sam.is_sam2)
"""
if model and Path(model).suffix not in {".pt", ".pth"}:
@ -78,8 +78,8 @@ class SAM(Model):
task (str | None): Task name. If provided, it specifies the particular task the model is being loaded for.
Examples:
>>> sam = SAM('sam_b.pt')
>>> sam._load('path/to/custom_weights.pt')
>>> sam = SAM("sam_b.pt")
>>> sam._load("path/to/custom_weights.pt")
"""
self.model = build_sam(weights)
@ -100,8 +100,8 @@ class SAM(Model):
(List): The model predictions.
Examples:
>>> sam = SAM('sam_b.pt')
>>> results = sam.predict('image.jpg', points=[[500, 375]])
>>> sam = SAM("sam_b.pt")
>>> results = sam.predict("image.jpg", points=[[500, 375]])
>>> for r in results:
... print(f"Detected {len(r.masks)} masks")
"""
@ -130,8 +130,8 @@ class SAM(Model):
(List): The model predictions, typically containing segmentation masks and other relevant information.
Examples:
>>> sam = SAM('sam_b.pt')
>>> results = sam('image.jpg', points=[[500, 375]])
>>> sam = SAM("sam_b.pt")
>>> results = sam("image.jpg", points=[[500, 375]])
>>> print(f"Detected {len(results[0].masks)} masks")
"""
return self.predict(source, stream, bboxes, points, labels, **kwargs)
@ -151,7 +151,7 @@ class SAM(Model):
(Tuple): A tuple containing the model's information (string representations of the model).
Examples:
>>> sam = SAM('sam_b.pt')
>>> sam = SAM("sam_b.pt")
>>> info = sam.info()
>>> print(info[0]) # Print summary information
"""
@ -167,7 +167,7 @@ class SAM(Model):
class. For SAM2 models, it maps to SAM2Predictor, otherwise to the standard Predictor.
Examples:
>>> sam = SAM('sam_b.pt')
>>> sam = SAM("sam_b.pt")
>>> task_map = sam.task_map
>>> print(task_map)
{'segment': <class 'ultralytics.models.sam.predict.Predictor'>}

@ -32,8 +32,9 @@ class MaskDecoder(nn.Module):
Examples:
>>> decoder = MaskDecoder(transformer_dim=256, transformer=transformer_module)
>>> masks, iou_pred = decoder(image_embeddings, image_pe, sparse_prompt_embeddings,
... dense_prompt_embeddings, multimask_output=True)
>>> masks, iou_pred = decoder(
... image_embeddings, image_pe, sparse_prompt_embeddings, dense_prompt_embeddings, multimask_output=True
... )
>>> print(f"Predicted masks shape: {masks.shape}, IoU predictions shape: {iou_pred.shape}")
"""
@ -213,7 +214,8 @@ class SAM2MaskDecoder(nn.Module):
>>> dense_prompt_embeddings = torch.rand(1, 256, 64, 64)
>>> decoder = SAM2MaskDecoder(256, transformer)
>>> masks, iou_pred, sam_tokens_out, obj_score_logits = decoder.forward(
... image_embeddings, image_pe, sparse_prompt_embeddings, dense_prompt_embeddings, True, False)
... image_embeddings, image_pe, sparse_prompt_embeddings, dense_prompt_embeddings, True, False
... )
"""
def __init__(
@ -345,7 +347,8 @@ class SAM2MaskDecoder(nn.Module):
>>> dense_prompt_embeddings = torch.rand(1, 256, 64, 64)
>>> decoder = SAM2MaskDecoder(256, transformer)
>>> masks, iou_pred, sam_tokens_out, obj_score_logits = decoder.forward(
... image_embeddings, image_pe, sparse_prompt_embeddings, dense_prompt_embeddings, True, False)
... image_embeddings, image_pe, sparse_prompt_embeddings, dense_prompt_embeddings, True, False
... )
"""
masks, iou_pred, mask_tokens_out, object_score_logits = self.predict_masks(
image_embeddings=image_embeddings,

@ -417,7 +417,15 @@ class SAM2Model(torch.nn.Module):
>>> point_inputs = {"point_coords": torch.rand(1, 2, 2), "point_labels": torch.tensor([[1, 0]])}
>>> mask_inputs = torch.rand(1, 1, 512, 512)
>>> results = model._forward_sam_heads(backbone_features, point_inputs, mask_inputs)
>>> low_res_multimasks, high_res_multimasks, ious, low_res_masks, high_res_masks, obj_ptr, object_score_logits = results
>>> (
... low_res_multimasks,
... high_res_multimasks,
... ious,
... low_res_masks,
... high_res_masks,
... obj_ptr,
... object_score_logits,
... ) = results
"""
B = backbone_features.size(0)
device = backbone_features.device

@ -716,7 +716,7 @@ class BasicLayer(nn.Module):
Examples:
>>> layer = BasicLayer(dim=96, input_resolution=(56, 56), depth=2, num_heads=3, window_size=7)
>>> x = torch.randn(1, 56*56, 96)
>>> x = torch.randn(1, 56 * 56, 96)
>>> output = layer(x)
>>> print(output.shape)
"""

@ -232,7 +232,6 @@ class TwoWayAttentionBlock(nn.Module):
def forward(self, queries: Tensor, keys: Tensor, query_pe: Tensor, key_pe: Tensor) -> Tuple[Tensor, Tensor]:
"""Applies two-way attention to process query and key embeddings in a transformer block."""
# Self attention block
if self.skip_first_layer_pe:
queries = self.self_attn(q=queries, k=queries, v=queries)
@ -353,7 +352,6 @@ class Attention(nn.Module):
def forward(self, q: Tensor, k: Tensor, v: Tensor) -> Tensor:
"""Applies multi-head attention to query, key, and value tensors with optional downsampling."""
# Input projections
q = self.q_proj(q)
k = self.k_proj(k)

@ -22,7 +22,7 @@ def select_closest_cond_frames(frame_idx, cond_frame_outputs, max_cond_frame_num
Examples:
>>> frame_idx = 5
>>> cond_frame_outputs = {1: 'a', 3: 'b', 7: 'c', 9: 'd'}
>>> cond_frame_outputs = {1: "a", 3: "b", 7: "c", 9: "d"}
>>> max_cond_frame_num = 2
>>> selected, unselected = select_closest_cond_frames(frame_idx, cond_frame_outputs, max_cond_frame_num)
>>> print(selected)

@ -69,8 +69,8 @@ class Predictor(BasePredictor):
Examples:
>>> predictor = Predictor()
>>> predictor.setup_model(model_path='sam_model.pt')
>>> predictor.set_image('image.jpg')
>>> predictor.setup_model(model_path="sam_model.pt")
>>> predictor.set_image("image.jpg")
>>> masks, scores, boxes = predictor.generate()
>>> results = predictor.postprocess((masks, scores, boxes), im, orig_img)
"""
@ -90,8 +90,8 @@ class Predictor(BasePredictor):
Examples:
>>> predictor = Predictor(cfg=DEFAULT_CFG)
>>> predictor = Predictor(overrides={'imgsz': 640})
>>> predictor = Predictor(_callbacks={'on_predict_start': custom_callback})
>>> predictor = Predictor(overrides={"imgsz": 640})
>>> predictor = Predictor(_callbacks={"on_predict_start": custom_callback})
"""
if overrides is None:
overrides = {}
@ -188,8 +188,8 @@ class Predictor(BasePredictor):
Examples:
>>> predictor = Predictor()
>>> predictor.setup_model(model_path='sam_model.pt')
>>> predictor.set_image('image.jpg')
>>> predictor.setup_model(model_path="sam_model.pt")
>>> predictor.set_image("image.jpg")
>>> masks, scores, logits = predictor.inference(im, bboxes=[[0, 0, 100, 100]])
"""
# Override prompts if any stored in self.prompts
@ -475,8 +475,8 @@ class Predictor(BasePredictor):
Examples:
>>> predictor = Predictor()
>>> predictor.setup_source('path/to/images')
>>> predictor.setup_source('video.mp4')
>>> predictor.setup_source("path/to/images")
>>> predictor.setup_source("video.mp4")
>>> predictor.setup_source(None) # Uses default source if available
Notes:
@ -504,8 +504,8 @@ class Predictor(BasePredictor):
Examples:
>>> predictor = Predictor()
>>> predictor.set_image('path/to/image.jpg')
>>> predictor.set_image(cv2.imread('path/to/image.jpg'))
>>> predictor.set_image("path/to/image.jpg")
>>> predictor.set_image(cv2.imread("path/to/image.jpg"))
Notes:
- This method should be called before performing inference on a new image.

@ -34,15 +34,19 @@ class DETRLoss(nn.Module):
self, nc=80, loss_gain=None, aux_loss=True, use_fl=True, use_vfl=False, use_uni_match=False, uni_match_ind=0
):
"""
DETR loss function.
Initialize DETR loss function with customizable components and gains.
Uses default loss_gain if not provided. Initializes HungarianMatcher with
preset cost gains. Supports auxiliary losses and various loss types.
Args:
nc (int): The number of classes.
loss_gain (dict): The coefficient of loss.
aux_loss (bool): If 'aux_loss = True', loss at each decoder layer are to be used.
use_vfl (bool): Use VarifocalLoss or not.
use_uni_match (bool): Whether to use a fixed layer to assign labels for auxiliary branch.
uni_match_ind (int): The fixed indices of a layer.
nc (int): Number of classes.
loss_gain (dict): Coefficients for different loss components.
aux_loss (bool): Use auxiliary losses from each decoder layer.
use_fl (bool): Use FocalLoss.
use_vfl (bool): Use VarifocalLoss.
use_uni_match (bool): Use fixed layer for auxiliary branch label assignment.
uni_match_ind (int): Index of fixed layer for uni_match.
"""
super().__init__()
@ -82,9 +86,7 @@ class DETRLoss(nn.Module):
return {name_class: loss_cls.squeeze() * self.loss_gain["class"]}
def _get_loss_bbox(self, pred_bboxes, gt_bboxes, postfix=""):
"""Calculates and returns the bounding box loss and GIoU loss for the predicted and ground truth bounding
boxes.
"""
"""Computes bounding box and GIoU losses for predicted and ground truth bounding boxes."""
# Boxes: [b, query, 4], gt_bbox: list[[n, 4]]
name_bbox = f"loss_bbox{postfix}"
name_giou = f"loss_giou{postfix}"
@ -250,14 +252,24 @@ class DETRLoss(nn.Module):
def forward(self, pred_bboxes, pred_scores, batch, postfix="", **kwargs):
"""
Calculate loss for predicted bounding boxes and scores.
Args:
pred_bboxes (torch.Tensor): [l, b, query, 4]
pred_scores (torch.Tensor): [l, b, query, num_classes]
batch (dict): A dict includes:
gt_cls (torch.Tensor) with shape [num_gts, ],
gt_bboxes (torch.Tensor): [num_gts, 4],
gt_groups (List(int)): a list of batch size length includes the number of gts of each image.
postfix (str): postfix of loss name.
pred_bboxes (torch.Tensor): Predicted bounding boxes, shape [l, b, query, 4].
pred_scores (torch.Tensor): Predicted class scores, shape [l, b, query, num_classes].
batch (dict): Batch information containing:
cls (torch.Tensor): Ground truth classes, shape [num_gts].
bboxes (torch.Tensor): Ground truth bounding boxes, shape [num_gts, 4].
gt_groups (List[int]): Number of ground truths for each image in the batch.
postfix (str): Postfix for loss names.
**kwargs (Any): Additional arguments, may include 'match_indices'.
Returns:
(dict): Computed losses, including main and auxiliary (if enabled).
Note:
Uses last elements of pred_bboxes and pred_scores for main loss, and the rest for auxiliary losses if
self.aux_loss is True.
"""
self.device = pred_bboxes.device
match_indices = kwargs.get("match_indices", None)

@ -32,9 +32,7 @@ class HungarianMatcher(nn.Module):
"""
def __init__(self, cost_gain=None, use_fl=True, with_mask=False, num_sample_points=12544, alpha=0.25, gamma=2.0):
"""Initializes HungarianMatcher with cost coefficients, Focal Loss, mask prediction, sample points, and alpha
gamma factors.
"""
"""Initializes a HungarianMatcher module for optimal assignment of predicted and ground truth bounding boxes."""
super().__init__()
if cost_gain is None:
cost_gain = {"class": 1, "bbox": 5, "giou": 2, "mask": 1, "dice": 1}
@ -70,7 +68,6 @@ class HungarianMatcher(nn.Module):
For each batch element, it holds:
len(index_i) = len(index_j) = min(num_queries, num_target_boxes)
"""
bs, nq, nc = pred_scores.shape
if sum(gt_groups) == 0:
@ -175,7 +172,6 @@ def get_cdn_group(
bounding boxes, attention mask and meta information for denoising. If not in training mode or 'num_dn'
is less than or equal to 0, the function returns None for all elements in the tuple.
"""
if (not training) or num_dn <= 0:
return None, None, None, None
gt_groups = batch["gt_groups"]

@ -21,7 +21,7 @@ class ClassificationPredictor(BasePredictor):
from ultralytics.utils import ASSETS
from ultralytics.models.yolo.classify import ClassificationPredictor
args = dict(model='yolov8n-cls.pt', source=ASSETS)
args = dict(model="yolov8n-cls.pt", source=ASSETS)
predictor = ClassificationPredictor(overrides=args)
predictor.predict_cli()
```

@ -22,7 +22,7 @@ class ClassificationTrainer(BaseTrainer):
```python
from ultralytics.models.yolo.classify import ClassificationTrainer
args = dict(model='yolov8n-cls.pt', data='imagenet10', epochs=3)
args = dict(model="yolov8n-cls.pt", data="imagenet10", epochs=3)
trainer = ClassificationTrainer(overrides=args)
trainer.train()
```

@ -20,7 +20,7 @@ class ClassificationValidator(BaseValidator):
```python
from ultralytics.models.yolo.classify import ClassificationValidator
args = dict(model='yolov8n-cls.pt', data='imagenet10')
args = dict(model="yolov8n-cls.pt", data="imagenet10")
validator = ClassificationValidator(args=args)
validator()
```

@ -14,7 +14,7 @@ class DetectionPredictor(BasePredictor):
from ultralytics.utils import ASSETS
from ultralytics.models.yolo.detect import DetectionPredictor
args = dict(model='yolov8n.pt', source=ASSETS)
args = dict(model="yolov8n.pt", source=ASSETS)
predictor = DetectionPredictor(overrides=args)
predictor.predict_cli()
```

@ -24,7 +24,7 @@ class DetectionTrainer(BaseTrainer):
```python
from ultralytics.models.yolo.detect import DetectionTrainer
args = dict(model='yolov8n.pt', data='coco8.yaml', epochs=3)
args = dict(model="yolov8n.pt", data="coco8.yaml", epochs=3)
trainer = DetectionTrainer(overrides=args)
trainer.train()
```

@ -22,7 +22,7 @@ class DetectionValidator(BaseValidator):
```python
from ultralytics.models.yolo.detect import DetectionValidator
args = dict(model='yolov8n.pt', data='coco8.yaml')
args = dict(model="yolov8n.pt", data="coco8.yaml")
validator = DetectionValidator(args=args)
validator()
```

@ -64,10 +64,14 @@ class YOLOWorld(Model):
def __init__(self, model="yolov8s-world.pt", verbose=False) -> None:
"""
Initializes the YOLOv8-World model with the given pre-trained model file. Supports *.pt and *.yaml formats.
Initialize YOLOv8-World model with a pre-trained model file.
Loads a YOLOv8-World model for object detection. If no custom class names are provided, it assigns default
COCO class names.
Args:
model (str | Path): Path to the pre-trained model. Defaults to 'yolov8s-world.pt'.
model (str | Path): Path to the pre-trained model file. Supports *.pt and *.yaml formats.
verbose (bool): If True, prints additional information during initialization.
"""
super().__init__(model=model, task="detect", verbose=verbose)

@ -16,7 +16,7 @@ class OBBPredictor(DetectionPredictor):
from ultralytics.utils import ASSETS
from ultralytics.models.yolo.obb import OBBPredictor
args = dict(model='yolov8n-obb.pt', source=ASSETS)
args = dict(model="yolov8n-obb.pt", source=ASSETS)
predictor = OBBPredictor(overrides=args)
predictor.predict_cli()
```

@ -15,7 +15,7 @@ class OBBTrainer(yolo.detect.DetectionTrainer):
```python
from ultralytics.models.yolo.obb import OBBTrainer
args = dict(model='yolov8n-obb.pt', data='dota8.yaml', epochs=3)
args = dict(model="yolov8n-obb.pt", data="dota8.yaml", epochs=3)
trainer = OBBTrainer(overrides=args)
trainer.train()
```

@ -18,9 +18,9 @@ class OBBValidator(DetectionValidator):
```python
from ultralytics.models.yolo.obb import OBBValidator
args = dict(model='yolov8n-obb.pt', data='dota8.yaml')
args = dict(model="yolov8n-obb.pt", data="dota8.yaml")
validator = OBBValidator(args=args)
validator(model=args['model'])
validator(model=args["model"])
```
"""

@ -14,7 +14,7 @@ class PosePredictor(DetectionPredictor):
from ultralytics.utils import ASSETS
from ultralytics.models.yolo.pose import PosePredictor
args = dict(model='yolov8n-pose.pt', source=ASSETS)
args = dict(model="yolov8n-pose.pt", source=ASSETS)
predictor = PosePredictor(overrides=args)
predictor.predict_cli()
```

@ -16,7 +16,7 @@ class PoseTrainer(yolo.detect.DetectionTrainer):
```python
from ultralytics.models.yolo.pose import PoseTrainer
args = dict(model='yolov8n-pose.pt', data='coco8-pose.yaml', epochs=3)
args = dict(model="yolov8n-pose.pt", data="coco8-pose.yaml", epochs=3)
trainer = PoseTrainer(overrides=args)
trainer.train()
```

@ -20,7 +20,7 @@ class PoseValidator(DetectionValidator):
```python
from ultralytics.models.yolo.pose import PoseValidator
args = dict(model='yolov8n-pose.pt', data='coco8-pose.yaml')
args = dict(model="yolov8n-pose.pt", data="coco8-pose.yaml")
validator = PoseValidator(args=args)
validator()
```

@ -14,7 +14,7 @@ class SegmentationPredictor(DetectionPredictor):
from ultralytics.utils import ASSETS
from ultralytics.models.yolo.segment import SegmentationPredictor
args = dict(model='yolov8n-seg.pt', source=ASSETS)
args = dict(model="yolov8n-seg.pt", source=ASSETS)
predictor = SegmentationPredictor(overrides=args)
predictor.predict_cli()
```

@ -16,7 +16,7 @@ class SegmentationTrainer(yolo.detect.DetectionTrainer):
```python
from ultralytics.models.yolo.segment import SegmentationTrainer
args = dict(model='yolov8n-seg.pt', data='coco8-seg.yaml', epochs=3)
args = dict(model="yolov8n-seg.pt", data="coco8-seg.yaml", epochs=3)
trainer = SegmentationTrainer(overrides=args)
trainer.train()
```

@ -22,7 +22,7 @@ class SegmentationValidator(DetectionValidator):
```python
from ultralytics.models.yolo.segment import SegmentationValidator
args = dict(model='yolov8n-seg.pt', data='coco8-seg.yaml')
args = dict(model="yolov8n-seg.pt", data="coco8-seg.yaml")
validator = SegmentationValidator(args=args)
validator()
```

@ -29,7 +29,7 @@ class WorldTrainer(yolo.detect.DetectionTrainer):
```python
from ultralytics.models.yolo.world import WorldModel
args = dict(model='yolov8s-world.pt', data='coco8.yaml', epochs=3)
args = dict(model="yolov8s-world.pt", data="coco8.yaml", epochs=3)
trainer = WorldTrainer(overrides=args)
trainer.train()
```

@ -641,8 +641,8 @@ class AutoBackend(nn.Module):
@staticmethod
def _model_type(p="path/to/model.pt"):
"""
This function takes a path to a model file and returns the model type. Possibles types are pt, jit, onnx, xml,
engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, ncnn or paddle.
Takes a path to a model file and returns the model type. Possibles types are pt, jit, onnx, xml, engine, coreml,
saved_model, pb, tflite, edgetpu, tfjs, ncnn or paddle.
Args:
p: path to the model file. Defaults to path/to/model.pt

@ -11,9 +11,9 @@ Example:
x = torch.ones(1, 128, 40, 40)
m = Conv(128, 128)
f = f'{m._get_name()}.onnx'
f = f"{m._get_name()}.onnx"
torch.onnx.export(m, x, f)
os.system(f'onnxslim {f} {f} && open {f}') # pip install onnxslim
os.system(f"onnxslim {f} {f} && open {f}") # pip install onnxslim
```
"""

@ -204,9 +204,7 @@ class C2(nn.Module):
"""CSP Bottleneck with 2 convolutions."""
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
"""Initializes the CSP Bottleneck with 2 convolutions module with arguments ch_in, ch_out, number, shortcut,
groups, expansion.
"""
"""Initializes a CSP Bottleneck with 2 convolutions and optional shortcut connection."""
super().__init__()
self.c = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, 2 * self.c, 1, 1)
@ -224,9 +222,7 @@ class C2f(nn.Module):
"""Faster Implementation of CSP Bottleneck with 2 convolutions."""
def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5):
"""Initialize CSP bottleneck layer with two convolutions with arguments ch_in, ch_out, number, shortcut, groups,
expansion.
"""
"""Initializes a CSP bottleneck with 2 convolutions and n Bottleneck blocks for faster processing."""
super().__init__()
self.c = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, 2 * self.c, 1, 1)
@ -335,9 +331,7 @@ class Bottleneck(nn.Module):
"""Standard bottleneck."""
def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5):
"""Initializes a bottleneck module with given input/output channels, shortcut option, group, kernels, and
expansion.
"""
"""Initializes a standard bottleneck module with optional shortcut connection and configurable parameters."""
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c_, k[0], 1)
@ -345,7 +339,7 @@ class Bottleneck(nn.Module):
self.add = shortcut and c1 == c2
def forward(self, x):
"""'forward()' applies the YOLO FPN to input data."""
"""Applies the YOLO FPN to input data."""
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
@ -449,9 +443,7 @@ class C2fAttn(nn.Module):
"""C2f module with an additional attn module."""
def __init__(self, c1, c2, n=1, ec=128, nh=1, gc=512, shortcut=False, g=1, e=0.5):
"""Initialize CSP bottleneck layer with two convolutions with arguments ch_in, ch_out, number, shortcut, groups,
expansion.
"""
"""Initializes C2f module with attention mechanism for enhanced feature extraction and processing."""
super().__init__()
self.c = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, 2 * self.c, 1, 1)
@ -521,9 +513,7 @@ class ImagePoolingAttn(nn.Module):
class ContrastiveHead(nn.Module):
"""Contrastive Head for YOLO-World compute the region-text scores according to the similarity between image and text
features.
"""
"""Implements contrastive learning head for region-text similarity in vision-language models."""
def __init__(self):
"""Initializes ContrastiveHead with specified region-text similarity parameters."""
@ -569,16 +559,14 @@ class RepBottleneck(Bottleneck):
"""Rep bottleneck."""
def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5):
"""Initializes a RepBottleneck module with customizable in/out channels, shortcut option, groups and expansion
ratio.
"""
"""Initializes a RepBottleneck module with customizable in/out channels, shortcuts, groups and expansion."""
super().__init__(c1, c2, shortcut, g, k, e)
c_ = int(c2 * e) # hidden channels
self.cv1 = RepConv(c1, c_, k[0], 1)
class RepCSP(C3):
"""Rep CSP Bottleneck with 3 convolutions."""
"""Repeatable Cross Stage Partial Network (RepCSP) module for efficient feature extraction."""
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
"""Initializes RepCSP layer with given channels, repetitions, shortcut, groups and expansion ratio."""

@ -158,9 +158,7 @@ class GhostConv(nn.Module):
"""Ghost Convolution https://github.com/huawei-noah/ghostnet."""
def __init__(self, c1, c2, k=1, s=1, g=1, act=True):
"""Initializes the GhostConv object with input channels, output channels, kernel size, stride, groups and
activation.
"""
"""Initializes Ghost Convolution module with primary and cheap operations for efficient feature learning."""
super().__init__()
c_ = c2 // 2 # hidden channels
self.cv1 = Conv(c1, c_, k, s, None, g, act=act)

@ -8,6 +8,7 @@ import torch
import torch.nn as nn
from torch.nn.init import constant_, xavier_uniform_
from ultralytics.utils import MACOS
from ultralytics.utils.tal import TORCH_1_10, dist2bbox, dist2rbox, make_anchors
from .block import DFL, BNContrastiveHead, ContrastiveHead, Proto
@ -151,13 +152,16 @@ class Detect(nn.Module):
boxes = torch.gather(boxes, dim=1, index=index.repeat(1, 1, boxes.shape[-1]))
scores = torch.gather(scores, dim=1, index=index.repeat(1, 1, scores.shape[-1]))
# NOTE: simplify but result slightly lower mAP
# NOTE: simplify result but slightly lower mAP
# scores, labels = scores.max(dim=-1)
# return torch.cat([boxes, scores.unsqueeze(-1), labels.unsqueeze(-1)], dim=-1)
scores, index = torch.topk(scores.flatten(1), max_det, axis=-1)
labels = index % nc
index = index // nc
# Set int64 dtype for MPS and CoreML compatibility to avoid 'gather_along_axis' ops error
if MACOS:
index = index.to(torch.int64)
boxes = boxes.gather(dim=1, index=index.unsqueeze(-1).repeat(1, 1, boxes.shape[-1]))
return torch.cat([boxes, scores.unsqueeze(-1), labels.unsqueeze(-1).to(boxes.dtype)], dim=-1)
@ -262,9 +266,7 @@ class Classify(nn.Module):
"""YOLOv8 classification head, i.e. x(b,c1,20,20) to x(b,c2)."""
def __init__(self, c1, c2, k=1, s=1, p=None, g=1):
"""Initializes YOLOv8 classification head with specified input and output channels, kernel size, stride,
padding, and groups.
"""
"""Initializes YOLOv8 classification head to transform input tensor from (b,c1,20,20) to (b,c2) shape."""
super().__init__()
c_ = 1280 # efficientnet_b0 size
self.conv = Conv(c1, c_, k, s, p, g)
@ -567,7 +569,7 @@ class RTDETRDecoder(nn.Module):
class v10Detect(Detect):
"""
v10 Detection head from https://arxiv.org/pdf/2405.14458
v10 Detection head from https://arxiv.org/pdf/2405.14458.
Args:
nc (int): Number of classes.

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save