diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml
index 063121371..5f02211de 100644
--- a/.github/ISSUE_TEMPLATE/bug-report.yml
+++ b/.github/ISSUE_TEMPLATE/bug-report.yml
@@ -52,9 +52,9 @@ body:
   - type: textarea
     attributes:
       label: Environment
-      description: Many issues are often related to dependency versions and hardware. Please provide the output of `yolo checks` or `ultralytics.checks()` command to help us diagnose the problem.
+      description: Many issues are often related to dependency versions and hardware. Please provide the output of `yolo checks` (CLI) or `ultralytics.utils.checks.collect_system_info()` (Python) command to help us diagnose the problem.
       placeholder: |
-        Paste output of `yolo checks` or `ultralytics.checks()` command, i.e.:
+        Paste output of `yolo checks` (CLI) or `ultralytics.utils.checks.collect_system_info()` (Python) command, i.e.:
         ```
         Ultralytics 8.3.2 🚀 Python-3.11.2 torch-2.4.1 CPU (Apple M3)
         Setup complete ✅ (8 CPUs, 16.0 GB RAM, 266.5/460.4 GB disk)
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 9b1c5364a..3a65188e4 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -52,16 +52,15 @@ jobs:
       - uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python-version }}
-          cache: "pip" # caching pip dependencies
+      - uses: astral-sh/setup-uv@v4
       - name: Install requirements
         shell: bash # for Windows compatibility
         run: |
-          python -m pip install --upgrade pip wheel
-          pip install . --extra-index-url https://download.pytorch.org/whl/cpu
+          uv pip install --system . --extra-index-url https://download.pytorch.org/whl/cpu
       - name: Check environment
         run: |
           yolo checks
-          pip list
+          uv pip list
       - name: Test HUB training
         shell: python
         env:
@@ -111,6 +110,7 @@ jobs:
       - name: Install requirements
         shell: bash # for Windows compatibility
         run: |
+          # Warnings: uv causes numpy errors during benchmarking
           python -m pip install --upgrade pip wheel
           pip install -e ".[export]" "coverage[toml]" --extra-index-url https://download.pytorch.org/whl/cpu
       - name: Check environment
@@ -143,7 +143,7 @@ jobs:
           coverage xml -o coverage-benchmarks.xml
       - name: Upload Coverage Reports to CodeCov
         if: github.repository == 'ultralytics/ultralytics'
-        uses: codecov/codecov-action@v4
+        uses: codecov/codecov-action@v5
         with:
           flags: Benchmarks
         env:
@@ -172,12 +172,11 @@ jobs:
       - uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python-version }}
-          cache: "pip" # caching pip dependencies
+      - uses: astral-sh/setup-uv@v4
       - name: Install requirements
         shell: bash # for Windows compatibility
         run: |
           # CoreML must be installed before export due to protobuf error from AutoInstall
-          python -m pip install --upgrade pip wheel
           slow=""
           torch=""
           if [ "${{ matrix.torch }}" == "1.8.0" ]; then
@@ -186,11 +185,11 @@ jobs:
           if [[ "${{ github.event_name }}" =~ ^(schedule|workflow_dispatch)$ ]]; then
               slow="pycocotools mlflow"
           fi
-          pip install -e ".[export]" $torch $slow pytest-cov --extra-index-url https://download.pytorch.org/whl/cpu
+          uv pip install --system -e ".[export]" $torch $slow pytest-cov --extra-index-url https://download.pytorch.org/whl/cpu
       - name: Check environment
         run: |
           yolo checks
-          pip list
+          uv pip list
       - name: Pytest tests
         shell: bash # for Windows compatibility
         run: |
@@ -201,7 +200,7 @@ jobs:
           pytest $slow --cov=ultralytics/ --cov-report xml tests/
       - name: Upload Coverage Reports to CodeCov
         if: github.repository == 'ultralytics/ultralytics' # && matrix.os == 'ubuntu-latest' && matrix.python-version == '3.11'
-        uses: codecov/codecov-action@v4
+        uses: codecov/codecov-action@v5
         with:
           flags: Tests
         env:
@@ -213,12 +212,13 @@ jobs:
     runs-on: gpu-latest
     steps:
       - uses: actions/checkout@v4
+      - uses: astral-sh/setup-uv@v4
       - name: Install requirements
-        run: pip install . pytest-cov
+        run: uv pip install --system . pytest-cov
       - name: Check environment
         run: |
           yolo checks
-          pip list
+          uv pip list
       - name: Pytest tests
         run: |
           slow=""
@@ -227,7 +227,7 @@ jobs:
           fi
           pytest $slow --cov=ultralytics/ --cov-report xml tests/test_cuda.py
       - name: Upload Coverage Reports to CodeCov
-        uses: codecov/codecov-action@v4
+        uses: codecov/codecov-action@v5
         with:
           flags: GPU
         env:
@@ -294,13 +294,8 @@ jobs:
           channels: conda-forge,defaults
           channel-priority: true
           activate-environment: anaconda-client-env
-      - name: Cleanup toolcache
-        run: |
-          echo "Free space before deletion:"
-          df -h /
-          rm -rf /opt/hostedtoolcache
-          echo "Free space after deletion:"
-          df -h /
+      - name: Cleanup disk space
+        uses: ultralytics/actions/cleanup-disk@main
       - name: Install Linux packages
         run: |
           # Fix cv2 ImportError: 'libEGL.so.1: cannot open shared object file: No such file or directory'
@@ -348,14 +343,14 @@ jobs:
 
   Summary:
     runs-on: ubuntu-latest
-    needs: [HUB, Benchmarks, Tests, GPU, RaspberryPi, Conda] # Add job names that you want to check for failure
-    if: always() # This ensures the job runs even if previous jobs fail
+    needs: [HUB, Benchmarks, Tests, GPU, RaspberryPi, Conda]
+    if: always()
     steps:
       - name: Check for failure and notify
         if: (needs.HUB.result == 'failure' || needs.Benchmarks.result == 'failure' || needs.Tests.result == 'failure' || needs.GPU.result == 'failure' || needs.RaspberryPi.result == 'failure' || needs.Conda.result == 'failure' ) && github.repository == 'ultralytics/ultralytics' && (github.event_name == 'schedule' || github.event_name == 'push') && github.run_attempt == '1'
-        uses: slackapi/slack-github-action@v1.27.0
+        uses: slackapi/slack-github-action@v2.0.0
         with:
+          webhook-type: incoming-webhook
+          webhook: ${{ secrets.SLACK_WEBHOOK_URL_YOLO }}
           payload: |
-            {"text": "<!channel> GitHub Actions error for ${{ github.workflow }} ❌\n\n\n*Repository:* https://github.com/${{ github.repository }}\n*Action:* https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\n*Author:* ${{ github.actor }}\n*Event:* ${{ github.event_name }}\n"}
-        env:
-          SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_YOLO }}
+            text: "<!channel> GitHub Actions error for ${{ github.workflow }} ❌\n\n\n*Repository:* https://github.com/${{ github.repository }}\n*Action:* https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\n*Author:* ${{ github.actor }}\n*Event:* ${{ github.event_name }}\n"
diff --git a/.github/workflows/codeql.yaml b/.github/workflows/codeql.yaml
deleted file mode 100644
index e6e3e85d3..000000000
--- a/.github/workflows/codeql.yaml
+++ /dev/null
@@ -1,42 +0,0 @@
-# Ultralytics YOLO 🚀, AGPL-3.0 license
-
-name: "CodeQL"
-
-on:
-  schedule:
-    - cron: "0 0 1 * *"
-  workflow_dispatch:
-
-jobs:
-  analyze:
-    name: Analyze
-    runs-on: ${{ 'ubuntu-latest' }}
-    permissions:
-      actions: read
-      contents: read
-      security-events: write
-
-    strategy:
-      fail-fast: false
-      matrix:
-        language: ["python"]
-        # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
-
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-
-      # Initializes the CodeQL tools for scanning.
-      - name: Initialize CodeQL
-        uses: github/codeql-action/init@v3
-        with:
-          languages: ${{ matrix.language }}
-          # If you wish to specify custom queries, you can do so here or in a config file.
-          # By default, queries listed here will override any specified in a config file.
-          # Prefix the list here with "+" to use these queries and those in the config file.
-          # queries: security-extended,security-and-quality
-
-      - name: Perform CodeQL Analysis
-        uses: github/codeql-action/analyze@v3
-        with:
-          category: "/language:${{matrix.language}}"
diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml
index 38f30bb1b..26846b0b4 100644
--- a/.github/workflows/docker.yaml
+++ b/.github/workflows/docker.yaml
@@ -134,12 +134,12 @@ jobs:
 
       - name: Build Image
         if: github.event_name == 'push' || github.event.inputs[matrix.dockerfile] == 'true'
-        uses: nick-invision/retry@v3
+        uses: ultralytics/actions/retry@main
         with:
           timeout_minutes: 120
-          retry_wait_seconds: 60
-          max_attempts: 3 # retry twice
-          command: |
+          retry_delay_seconds: 60
+          retries: 2
+          run: |
             docker build \
             --platform ${{ matrix.platforms }} \
             -f docker/${{ matrix.dockerfile }} \
@@ -172,7 +172,7 @@ jobs:
           fi
           if [[ "${{ matrix.tags }}" == "latest-python" ]]; then
             t=ultralytics/ultralytics:latest-jupyter
-            v=ultralytics/ultralytics:${{ steps.get_version.outputs.version_tag }}-jupyter
+            v=ultralytics/ultralytics:${{ steps.get_version.outputs.version }}-jupyter
             docker build -f docker/Dockerfile-jupyter -t $t -t $v .
             docker push $t
             if [[ "${{ steps.check_tag.outputs.new_release }}" == "true" ]]; then
@@ -202,9 +202,9 @@ jobs:
     steps:
       - name: Check for failure and notify
         if: needs.docker.result == 'failure' && github.repository == 'ultralytics/ultralytics' && github.event_name == 'push' && github.run_attempt == '1'
-        uses: slackapi/slack-github-action@v1.27.0
+        uses: slackapi/slack-github-action@v2.0.0
         with:
+          webhook-type: incoming-webhook
+          webhook: ${{ secrets.SLACK_WEBHOOK_URL_YOLO }}
           payload: |
-            {"text": "<!channel> GitHub Actions error for ${{ github.workflow }} ❌\n\n\n*Repository:* https://github.com/${{ github.repository }}\n*Action:* https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\n*Author:* ${{ github.actor }}\n*Event:* ${{ github.event_name }}\n"}
-        env:
-          SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_YOLO }}
+            text: "<!channel> GitHub Actions error for ${{ github.workflow }} ❌\n\n\n*Repository:* https://github.com/${{ github.repository }}\n*Action:* https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\n*Author:* ${{ github.actor }}\n*Event:* ${{ github.event_name }}\n"
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index ea6def886..5b0c7a96d 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -29,7 +29,7 @@ on:
 jobs:
   Docs:
     if: github.repository == 'ultralytics/ultralytics'
-    runs-on: macos-14
+    runs-on: ubuntu-latest
     steps:
       - name: Git config
         run: |
@@ -46,9 +46,9 @@ jobs:
         uses: actions/setup-python@v5
         with:
           python-version: "3.x"
-          cache: "pip" # caching pip dependencies
+      - uses: astral-sh/setup-uv@v4
       - name: Install Dependencies
-        run: pip install ruff black tqdm minify-html mkdocs-material "mkdocstrings[python]" mkdocs-jupyter mkdocs-redirects mkdocs-ultralytics-plugin mkdocs-macros-plugin
+        run: uv pip install --system ruff black tqdm mkdocs-material "mkdocstrings[python]" mkdocs-jupyter mkdocs-redirects mkdocs-ultralytics-plugin mkdocs-macros-plugin
       - name: Ruff fixes
         continue-on-error: true
         run: ruff check --fix --unsafe-fixes --select D --ignore=D100,D104,D203,D205,D212,D213,D401,D406,D407,D413 .
diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml
index acd286565..28720abfb 100644
--- a/.github/workflows/format.yml
+++ b/.github/workflows/format.yml
@@ -15,7 +15,7 @@ on:
 
 jobs:
   format:
-    runs-on: macos-14
+    runs-on: ubuntu-latest
     steps:
       - name: Run Ultralytics Formatting
         uses: ultralytics/actions@main
diff --git a/.github/workflows/links.yml b/.github/workflows/links.yml
index 4dd8aa38b..b66a7d507 100644
--- a/.github/workflows/links.yml
+++ b/.github/workflows/links.yml
@@ -29,12 +29,12 @@ jobs:
           sudo mv lychee /usr/local/bin
 
       - name: Test Markdown and HTML links with retry
-        uses: nick-invision/retry@v3
+        uses: ultralytics/actions/retry@main
         with:
-          timeout_minutes: 5
-          retry_wait_seconds: 60
-          max_attempts: 3
-          command: |
+          timeout_minutes: 60
+          retry_delay_seconds: 900
+          retries: 2
+          run: |
             lychee \
             --scheme https \
             --timeout 60 \
@@ -59,12 +59,12 @@ jobs:
 
       - name: Test Markdown, HTML, YAML, Python and Notebook links with retry
         if: github.event_name == 'workflow_dispatch'
-        uses: nick-invision/retry@v3
+        uses: ultralytics/actions/retry@main
         with:
-          timeout_minutes: 5
-          retry_wait_seconds: 60
-          max_attempts: 3
-          command: |
+          timeout_minutes: 60
+          retry_delay_seconds: 900
+          retries: 2
+          run: |
             lychee \
             --scheme https \
             --timeout 60 \
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index bccc28332..b1dd1e435 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -17,7 +17,7 @@ jobs:
     if: github.repository == 'ultralytics/ultralytics' && github.actor == 'glenn-jocher'
     name: Publish
     runs-on: ubuntu-latest
-    environment:  # for GitHub Deployments tab
+    environment: # for GitHub Deployments tab
       name: Release - PyPI
       url: https://pypi.org/p/ultralytics
     permissions:
@@ -90,19 +90,20 @@ jobs:
           fi
           echo "PR_NUMBER=$PR_NUMBER" >> $GITHUB_ENV
           echo "PR_TITLE=$PR_TITLE" >> $GITHUB_ENV
+
       - name: Notify on Slack (Success)
         if: success() && github.event_name == 'push' && steps.check_pypi.outputs.increment == 'True'
-        uses: slackapi/slack-github-action@v1.27.0
+        uses: slackapi/slack-github-action@v2.0.0
         with:
+          webhook-type: incoming-webhook
+          webhook: ${{ secrets.SLACK_WEBHOOK_URL_YOLO }}
           payload: |
-            {"text": "<!channel> GitHub Actions success for ${{ github.workflow }} ✅\n\n\n*Repository:* https://github.com/${{ github.repository }}\n*Action:* https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\n*Author:* ${{ github.actor }}\n*Event:* NEW `${{ github.repository }} ${{ steps.check_pypi.outputs.current_tag }}` pip package published 😃\n*Job Status:* ${{ job.status }}\n*Pull Request:* <https://github.com/${{ github.repository }}/pull/${{ env.PR_NUMBER }}> ${{ env.PR_TITLE }}\n"}
-        env:
-          SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_YOLO }}
+            text: "<!channel> GitHub Actions success for ${{ github.workflow }} ✅\n\n\n*Repository:* https://github.com/${{ github.repository }}\n*Action:* https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\n*Author:* ${{ github.actor }}\n*Event:* NEW `${{ github.repository }} ${{ steps.check_pypi.outputs.current_tag }}` pip package published 😃\n*Job Status:* ${{ job.status }}\n*Pull Request:* <https://github.com/${{ github.repository }}/pull/${{ env.PR_NUMBER }}> ${{ env.PR_TITLE }}\n"
       - name: Notify on Slack (Failure)
         if: failure()
-        uses: slackapi/slack-github-action@v1.27.0
+        uses: slackapi/slack-github-action@v2.0.0
         with:
+          webhook-type: incoming-webhook
+          webhook: ${{ secrets.SLACK_WEBHOOK_URL_YOLO }}
           payload: |
-            {"text": "<!channel> GitHub Actions error for ${{ github.workflow }} ❌\n\n\n*Repository:* https://github.com/${{ github.repository }}\n*Action:* https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\n*Author:* ${{ github.actor }}\n*Event:* ${{ github.event_name }}\n*Job Status:* ${{ job.status }}\n*Pull Request:* <https://github.com/${{ github.repository }}/pull/${{ env.PR_NUMBER }}> ${{ env.PR_TITLE }}\n"}
-        env:
-          SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_YOLO }}
+            text: "<!channel> GitHub Actions error for ${{ github.workflow }} ❌\n\n\n*Repository:* https://github.com/${{ github.repository }}\n*Action:* https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\n*Author:* ${{ github.actor }}\n*Event:* ${{ github.event_name }}\n*Job Status:* ${{ job.status }}\n*Pull Request:* <https://github.com/${{ github.repository }}/pull/${{ env.PR_NUMBER }}> ${{ env.PR_TITLE }}\n"
diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml
index 991e0edd9..cfbe31ae5 100644
--- a/.github/workflows/stale.yml
+++ b/.github/workflows/stale.yml
@@ -8,7 +8,7 @@ on:
 permissions:
   pull-requests: write
   issues: write
-  
+
 jobs:
   stale:
     runs-on: ubuntu-latest
diff --git a/.gitignore b/.gitignore
index 4e0f0845b..0d4b744d3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -163,6 +163,7 @@ weights/
 *_openvino_model/
 *_paddle_model/
 *_ncnn_model/
+*_imx_model/
 pnnx*
 
 # Autogenerated files for tests
diff --git a/README.md b/README.md
index 01277aff5..61dd03d37 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@
 
 <div>
     <a href="https://github.com/ultralytics/ultralytics/actions/workflows/ci.yaml"><img src="https://github.com/ultralytics/ultralytics/actions/workflows/ci.yaml/badge.svg" alt="Ultralytics CI"></a>
-    <a href="https://pepy.tech/project/ultralytics"><img src="https://static.pepy.tech/badge/ultralytics" alt="Ultralytics Downloads"></a>
+    <a href="https://pepy.tech/projects/ultralytics"><img src="https://static.pepy.tech/badge/ultralytics" alt="Ultralytics Downloads"></a>
     <a href="https://zenodo.org/badge/latestdoi/264818686"><img src="https://zenodo.org/badge/264818686.svg" alt="Ultralytics YOLO Citation"></a>
     <a href="https://discord.com/invite/ultralytics"><img alt="Ultralytics Discord" src="https://img.shields.io/discord/1089800235347353640?logo=discord&logoColor=white&label=Discord&color=blue"></a>
     <a href="https://community.ultralytics.com/"><img alt="Ultralytics Forums" src="https://img.shields.io/discourse/users?server=https%3A%2F%2Fcommunity.ultralytics.com&logo=discourse&label=Forums&color=blue"></a>
@@ -27,7 +27,9 @@ We hope that the resources here will help you get the most out of YOLO. Please b
 
 To request an Enterprise License please complete the form at [Ultralytics Licensing](https://www.ultralytics.com/license).
 
-<img width="100%" src="https://raw.githubusercontent.com/ultralytics/assets/refs/heads/main/yolo/performance-comparison.png" alt="YOLO11 performance plots"></a>
+<a href="https://docs.ultralytics.com/models/yolo11/" target="_blank">
+  <img width="100%" src="https://raw.githubusercontent.com/ultralytics/assets/refs/heads/main/yolo/performance-comparison.png" alt="YOLO11 performance plots">
+</a>
 
 <div align="center">
   <a href="https://github.com/ultralytics"><img src="https://github.com/ultralytics/assets/raw/main/social/logo-social-github.png" width="2%" alt="Ultralytics GitHub"></a>
@@ -55,7 +57,7 @@ See below for a quickstart install and usage examples, and see our [Docs](https:
 
 Pip install the ultralytics package including all [requirements](https://github.com/ultralytics/ultralytics/blob/main/pyproject.toml) in a [**Python>=3.8**](https://www.python.org/) environment with [**PyTorch>=1.8**](https://pytorch.org/get-started/locally/).
 
-[![PyPI - Version](https://img.shields.io/pypi/v/ultralytics?logo=pypi&logoColor=white)](https://pypi.org/project/ultralytics/) [![Ultralytics Downloads](https://static.pepy.tech/badge/ultralytics)](https://pepy.tech/project/ultralytics) [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/ultralytics?logo=python&logoColor=gold)](https://pypi.org/project/ultralytics/)
+[![PyPI - Version](https://img.shields.io/pypi/v/ultralytics?logo=pypi&logoColor=white)](https://pypi.org/project/ultralytics/) [![Ultralytics Downloads](https://static.pepy.tech/badge/ultralytics)](https://www.pepy.tech/projects/ultralytics) [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/ultralytics?logo=python&logoColor=gold)](https://pypi.org/project/ultralytics/)
 
 ```bash
 pip install ultralytics
@@ -150,8 +152,8 @@ See [Segmentation Docs](https://docs.ultralytics.com/tasks/segment/) for usage e
 | [YOLO11l-seg](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l-seg.pt) | 640                   | 53.4                 | 42.9                  | 344.2 ± 3.2                    | 7.8 ± 0.2                           | 27.6               | 142.2             |
 | [YOLO11x-seg](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x-seg.pt) | 640                   | 54.7                 | 43.8                  | 664.5 ± 3.2                    | 15.8 ± 0.7                          | 62.1               | 319.0             |
 
-- **mAP<sup>val</sup>** values are for single-model single-scale on [COCO val2017](https://cocodataset.org/) dataset. <br>Reproduce by `yolo val segment data=coco-seg.yaml device=0`
-- **Speed** averaged over COCO val images using an [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) instance. <br>Reproduce by `yolo val segment data=coco-seg.yaml batch=1 device=0|cpu`
+- **mAP<sup>val</sup>** values are for single-model single-scale on [COCO val2017](https://cocodataset.org/) dataset. <br>Reproduce by `yolo val segment data=coco.yaml device=0`
+- **Speed** averaged over COCO val images using an [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) instance. <br>Reproduce by `yolo val segment data=coco.yaml batch=1 device=0|cpu`
 
 </details>
 
diff --git a/README.zh-CN.md b/README.zh-CN.md
index caf5e6b47..0ddb98ddb 100644
--- a/README.zh-CN.md
+++ b/README.zh-CN.md
@@ -8,7 +8,7 @@
 
 <div>
     <a href="https://github.com/ultralytics/ultralytics/actions/workflows/ci.yaml"><img src="https://github.com/ultralytics/ultralytics/actions/workflows/ci.yaml/badge.svg" alt="Ultralytics CI"></a>
-    <a href="https://pepy.tech/project/ultralytics"><img src="https://static.pepy.tech/badge/ultralytics" alt="Ultralytics Downloads"></a>
+    <a href="https://pepy.tech/projects/ultralytics"><img src="https://static.pepy.tech/badge/ultralytics" alt="Ultralytics Downloads"></a>
     <a href="https://zenodo.org/badge/latestdoi/264818686"><img src="https://zenodo.org/badge/264818686.svg" alt="Ultralytics YOLO Citation"></a>
     <a href="https://discord.com/invite/ultralytics"><img alt="Ultralytics Discord" src="https://img.shields.io/discord/1089800235347353640?logo=discord&logoColor=white&label=Discord&color=blue"></a>
     <a href="https://community.ultralytics.com/"><img alt="Ultralytics Forums" src="https://img.shields.io/discourse/users?server=https%3A%2F%2Fcommunity.ultralytics.com&logo=discourse&label=Forums&color=blue"></a>
@@ -27,7 +27,9 @@
 
 想申请企业许可证，请完成 [Ultralytics Licensing](https://www.ultralytics.com/license) 上的表单。
 
-<img width="100%" src="https://raw.githubusercontent.com/ultralytics/assets/refs/heads/main/yolo/performance-comparison.png" alt="YOLO11 performance plots"></a>
+<a href="https://docs.ultralytics.com/models/yolo11/" target="_blank">
+  <img width="100%" src="https://raw.githubusercontent.com/ultralytics/assets/refs/heads/main/yolo/performance-comparison.png" alt="YOLO11 performance plots">
+</a>
 
 <div align="center">
   <a href="https://github.com/ultralytics"><img src="https://github.com/ultralytics/assets/raw/main/social/logo-social-github.png" width="2%" alt="Ultralytics GitHub"></a>
@@ -55,7 +57,7 @@
 
 在 [**Python>=3.8**](https://www.python.org/) 环境中使用 [**PyTorch>=1.8**](https://pytorch.org/get-started/locally/) 通过 pip 安装包含所有[依赖项](https://github.com/ultralytics/ultralytics/blob/main/pyproject.toml) 的 ultralytics 包。
 
-[![PyPI - Version](https://img.shields.io/pypi/v/ultralytics?logo=pypi&logoColor=white)](https://pypi.org/project/ultralytics/) [![Ultralytics Downloads](https://static.pepy.tech/badge/ultralytics)](https://pepy.tech/project/ultralytics) [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/ultralytics?logo=python&logoColor=gold)](https://pypi.org/project/ultralytics/)
+[![PyPI - Version](https://img.shields.io/pypi/v/ultralytics?logo=pypi&logoColor=white)](https://pypi.org/project/ultralytics/) [![Ultralytics Downloads](https://static.pepy.tech/badge/ultralytics)](https://www.pepy.tech/projects/ultralytics) [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/ultralytics?logo=python&logoColor=gold)](https://pypi.org/project/ultralytics/)
 
 ```bash
 pip install ultralytics
@@ -150,8 +152,8 @@ YOLO11 [检测](https://docs.ultralytics.com/tasks/detect/)、[分割](https://d
 | [YOLO11l-seg](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l-seg.pt) | 640                 | 53.4                 | 42.9                  | 344.2 ± 3.2                   | 7.8 ± 0.2                          | 27.6             | 142.2             |
 | [YOLO11x-seg](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x-seg.pt) | 640                 | 54.7                 | 43.8                  | 664.5 ± 3.2                   | 15.8 ± 0.7                         | 62.1             | 319.0             |
 
-- **mAP<sup>val</sup>** 值针对单模型单尺度在 [COCO val2017](https://cocodataset.org/) 数据集上进行。 <br>复制命令 `yolo val segment data=coco-seg.yaml device=0`
-- **速度**在使用 [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) 实例的 COCO 验证图像上平均。 <br>复制命令 `yolo val segment data=coco-seg.yaml batch=1 device=0|cpu`
+- **mAP<sup>val</sup>** 值针对单模型单尺度在 [COCO val2017](https://cocodataset.org/) 数据集上进行。 <br>复制命令 `yolo val segment data=coco.yaml device=0`
+- **速度**在使用 [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) 实例的 COCO 验证图像上平均。 <br>复制命令 `yolo val segment data=coco.yaml batch=1 device=0|cpu`
 
 </details>
 
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 931326f89..a25fbdcce 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -56,7 +56,6 @@ RUN pip install numpy==1.23.5
 # Remove extra build files
 RUN rm -rf tmp /root/.config/Ultralytics/persistent_cache.json
 
-
 # Usage Examples -------------------------------------------------------------------------------------------------------
 
 # Build and Push
diff --git a/docker/Dockerfile-cpu b/docker/Dockerfile-cpu
index fe8d88521..ee7dfff1c 100644
--- a/docker/Dockerfile-cpu
+++ b/docker/Dockerfile-cpu
@@ -2,8 +2,8 @@
 # Builds ultralytics/ultralytics:latest-cpu image on DockerHub https://hub.docker.com/r/ultralytics/ultralytics
 # Image is CPU-optimized for ONNX, OpenVINO and PyTorch YOLO11 deployments
 
-# Start FROM Ubuntu image https://hub.docker.com/_/ubuntu
-FROM ubuntu:23.10
+# Use official Python base image for reproducibility (3.11.10 for export and 3.12.6 for inference)
+FROM python:3.11.10-slim-bookworm
 
 # Set environment variables
 ENV PYTHONUNBUFFERED=1 \
@@ -39,14 +39,14 @@ RUN pip install -e ".[export]" --extra-index-url https://download.pytorch.org/wh
 RUN yolo export model=tmp/yolo11n.pt format=edgetpu imgsz=32
 RUN yolo export model=tmp/yolo11n.pt format=ncnn imgsz=32
 # Requires Python<=3.10, bug with paddlepaddle==2.5.0 https://github.com/PaddlePaddle/X2Paddle/issues/991
-# RUN pip install "paddlepaddle>=2.6.0" x2paddle
-
-# Creates a symbolic link to make 'python' point to 'python3'
-RUN ln -sf /usr/bin/python3 /usr/bin/python
+RUN pip install "paddlepaddle>=2.6.0" x2paddle
 
 # Remove extra build files
 RUN rm -rf tmp /root/.config/Ultralytics/persistent_cache.json
 
+# Set default command to bash
+CMD ["/bin/bash"]
+
 # Usage Examples -------------------------------------------------------------------------------------------------------
 
 # Build and Push
diff --git a/docker/Dockerfile-jupyter b/docker/Dockerfile-jupyter
index e42639b9b..0a58bb35f 100644
--- a/docker/Dockerfile-jupyter
+++ b/docker/Dockerfile-jupyter
@@ -17,7 +17,7 @@ RUN mkdir /data/weights && /usr/local/bin/yolo settings weights_dir="/data/weigh
 RUN mkdir /data/runs && /usr/local/bin/yolo settings runs_dir="/data/runs"
 
 # Start JupyterLab with tutorial notebook
-ENTRYPOINT ["/usr/local/bin/jupyter", "lab", "--allow-root", "/ultralytics/examples/tutorial.ipynb"]
+ENTRYPOINT ["/usr/local/bin/jupyter", "lab", "--allow-root", "--ip=*", "/ultralytics/examples/tutorial.ipynb"]
 
 # Usage Examples -------------------------------------------------------------------------------------------------------
 
diff --git a/docker/Dockerfile-runner b/docker/Dockerfile-runner
index 539f0aa03..514ca53f4 100644
--- a/docker/Dockerfile-runner
+++ b/docker/Dockerfile-runner
@@ -35,7 +35,6 @@ ENTRYPOINT sh -c './config.sh --url https://github.com/ultralytics/ultralytics \
                               --replace && \
                   ./run.sh'
 
-
 # Usage Examples -------------------------------------------------------------------------------------------------------
 
 # Build and Push
diff --git a/docs/README.md b/docs/README.md
index a3d3edb40..802352b58 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -15,7 +15,7 @@
 ## 🛠️ Installation
 
 [![PyPI - Version](https://img.shields.io/pypi/v/ultralytics?logo=pypi&logoColor=white)](https://pypi.org/project/ultralytics/)
-[![Downloads](https://static.pepy.tech/badge/ultralytics)](https://pepy.tech/project/ultralytics)
+[![Downloads](https://static.pepy.tech/badge/ultralytics)](https://www.pepy.tech/projects/ultralytics)
 [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/ultralytics?logo=python&logoColor=gold)](https://pypi.org/project/ultralytics/)
 
 To install the ultralytics package in developer mode, ensure you have Git and Python 3 installed on your system. Then, follow these steps:
diff --git a/docs/build_docs.py b/docs/build_docs.py
index 7bf0575f4..0cf46ed57 100644
--- a/docs/build_docs.py
+++ b/docs/build_docs.py
@@ -252,7 +252,7 @@ def minify_html_files():
             content = f.read()
 
         original_size = len(content)
-        minified_content = minify(content)
+        minified_content = minify(content, keep_closing_tags=True, minify_css=True, minify_js=True)
         minified_size = len(minified_content)
 
         total_original_size += original_size
diff --git a/docs/en/datasets/index.md b/docs/en/datasets/index.md
index 5f165b247..38f219242 100644
--- a/docs/en/datasets/index.md
+++ b/docs/en/datasets/index.md
@@ -74,6 +74,7 @@ Pose estimation is a technique used to determine the pose of the object relative
 - [COCO8-pose](pose/coco8-pose.md): A smaller dataset for pose estimation tasks, containing a subset of 8 COCO images with human pose annotations.
 - [Tiger-pose](pose/tiger-pose.md): A compact dataset consisting of 263 images focused on tigers, annotated with 12 keypoints per tiger for pose estimation tasks.
 - [Hand-Keypoints](pose/hand-keypoints.md): A concise dataset featuring over 26,000 images centered on human hands, annotated with 21 keypoints per hand, designed for pose estimation tasks.
+- [Dog-pose](pose/dog-pose.md): A comprehensive dataset featuring approximately 6,000 images focused on dogs, annotated with 24 keypoints per dog, tailored for pose estimation tasks.
 
 ## [Classification](classify/index.md)
 
diff --git a/docs/en/datasets/pose/dog-pose.md b/docs/en/datasets/pose/dog-pose.md
new file mode 100644
index 000000000..fa6acb075
--- /dev/null
+++ b/docs/en/datasets/pose/dog-pose.md
@@ -0,0 +1,141 @@
+---
+comments: true
+description: Discover the Dog-Pose dataset for pose detection. Featuring 6,773 training and 1,703 test images, it's a robust dataset for training YOLO11 models.
+keywords: Dog-Pose, Ultralytics, pose detection dataset, YOLO11, machine learning, computer vision, training data
+---
+
+# Dog-Pose Dataset
+
+## Introduction
+
+The [Ultralytics](https://www.ultralytics.com/) Dog-pose dataset is a high-quality and extensive dataset specifically curated for dog keypoint estimation. With 6,773 training images and 1,703 test images, this dataset provides a solid foundation for training robust pose estimation models. Each annotated image includes 24 keypoints with 3 dimensions per keypoint (x, y, visibility), making it a valuable resource for advanced research and development in computer vision.
+
+<img src="https://github.com/ultralytics/docs/releases/download/0/ultralytics-dogs.avif" alt="Ultralytics Dog-pose display image" width="800">
+
+This dataset is intended for use with Ultralytics [HUB](https://hub.ultralytics.com/) and [YOLO11](https://github.com/ultralytics/ultralytics).
+
+## Dataset YAML
+
+A YAML (Yet Another Markup Language) file is used to define the dataset configuration. It includes paths, keypoint details, and other relevant information. In the case of the Dog-pose dataset, The `dog-pose.yaml` is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/dog-pose.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/dog-pose.yaml).
+
+!!! example "ultralytics/cfg/datasets/dog-pose.yaml"
+
+    ```yaml
+    --8<-- "ultralytics/cfg/datasets/dog-pose.yaml"
+    ```
+
+## Usage
+
+To train a YOLO11n-pose model on the Dog-pose dataset for 100 [epochs](https://www.ultralytics.com/glossary/epoch) with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
+
+!!! example "Train Example"
+
+    === "Python"
+
+        ```python
+        from ultralytics import YOLO
+
+        # Load a model
+        model = YOLO("yolo11n-pose.pt")  # load a pretrained model (recommended for training)
+
+        # Train the model
+        results = model.train(data="dog-pose.yaml", epochs=100, imgsz=640)
+        ```
+
+    === "CLI"
+
+        ```bash
+        # Start training from a pretrained *.pt model
+        yolo pose train data=dog-pose.yaml model=yolo11n-pose.pt epochs=100 imgsz=640
+        ```
+
+## Sample Images and Annotations
+
+Here are some examples of images from the Dog-pose dataset, along with their corresponding annotations:
+
+<img src="https://github.com/ultralytics/docs/releases/download/0/mosaiced-training-batch-2-dog-pose.avif" alt="Dataset sample image" width="800">
+
+- **Mosaiced Image**: This image demonstrates a training batch composed of mosaiced dataset images. Mosaicing is a technique used during training that combines multiple images into a single image to increase the variety of objects and scenes within each training batch. This helps improve the model's ability to generalize to different object sizes, aspect ratios, and contexts.
+
+The example showcases the variety and complexity of the images in the Dog-pose dataset and the benefits of using mosaicing during the training process.
+
+## Citations and Acknowledgments
+
+If you use the Dog-pose dataset in your research or development work, please cite the following paper:
+
+!!! quote ""
+
+    === "BibTeX"
+
+        ```bibtex
+        @inproceedings{khosla2011fgvc,
+          title={Novel dataset for Fine-Grained Image Categorization},
+          author={Aditya Khosla and Nityananda Jayadevaprakash and Bangpeng Yao and Li Fei-Fei},
+          booktitle={First Workshop on Fine-Grained Visual Categorization (FGVC), IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+          year={2011}
+        }
+        @inproceedings{deng2009imagenet,
+          title={ImageNet: A Large-Scale Hierarchical Image Database},
+          author={Jia Deng and Wei Dong and Richard Socher and Li-Jia Li and Kai Li and Li Fei-Fei},
+          booktitle={IEEE Computer Vision and Pattern Recognition (CVPR)},
+          year={2009}
+        }
+        ```
+
+We would like to acknowledge the Stanford team for creating and maintaining this valuable resource for the [computer vision](https://www.ultralytics.com/glossary/computer-vision-cv) community. For more information about the Dog-pose dataset and its creators, visit the [Stanford Dogs Dataset website](http://vision.stanford.edu/aditya86/ImageNetDogs/).
+
+## FAQ
+
+### What is the Dog-pose dataset, and how is it used with Ultralytics YOLO11?
+
+The Dog-Pose dataset features 6,000 images annotated with 17 keypoints for dog pose estimation. Ideal for training and validating models with [Ultralytics YOLO11](https://docs.ultralytics.com/models/yolo11/), it supports applications like animal behavior analysis and veterinary studies.
+
+### How do I train a YOLO11 model using the Dog-pose dataset in Ultralytics?
+
+To train a YOLO11n-pose model on the Dog-pose dataset for 100 epochs with an image size of 640, follow these examples:
+
+!!! example "Train Example"
+
+    === "Python"
+
+        ```python
+        from ultralytics import YOLO
+
+        # Load a model
+        model = YOLO("yolo11n-pose.pt")
+
+        # Train the model
+        results = model.train(data="dog-pose.yaml", epochs=100, imgsz=640)
+        ```
+
+    === "CLI"
+
+        ```bash
+        yolo pose train data=dog-pose.yaml model=yolo11n-pose.pt epochs=100 imgsz=640
+        ```
+
+For a comprehensive list of training arguments, refer to the model [Training](../../modes/train.md) page.
+
+### What are the benefits of using the Dog-pose dataset?
+
+The Dog-pose dataset offers several benefits:
+
+**Large and Diverse Dataset**: With 6,000 images, it provides a substantial amount of data covering a wide range of dog poses, breeds, and contexts, enabling robust model training and evaluation.
+
+**Pose-specific Annotations**: Offers detailed annotations for pose estimation, ensuring high-quality data for training pose detection models.
+
+**Real-World Scenarios**: Includes images from varied environments, enhancing the model's ability to generalize to real-world applications.
+
+**Model Performance Improvement**: The diversity and scale of the dataset help improve model accuracy and robustness, particularly for tasks involving fine-grained pose estimation.
+
+For more about its features and usage, see the [Dataset Introduction](#introduction) section.
+
+### How does mosaicing benefit the YOLO11 training process using the Dog-pose dataset?
+
+Mosaicing, as illustrated in the sample images from the Dog-pose dataset, merges multiple images into a single composite, enriching the diversity of objects and scenes in each training batch. This approach enhances the model's capacity to generalize across different object sizes, aspect ratios, and contexts, leading to improved performance. For example images, refer to the [Sample Images and Annotations](#sample-images-and-annotations) section.
+
+### Where can I find the Dog-pose dataset YAML file and how do I use it?
+
+The Dog-pose dataset YAML file can be found [here](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/dog-pose.yaml). This file defines the dataset configuration, including paths, classes, and other relevant information. Use this file with the YOLO11 training scripts as mentioned in the [Train Example](#how-do-i-train-a-yolo11-model-using-the-dog-pose-dataset-in-ultralytics) section.
+
+For more FAQs and detailed documentation, visit the [Ultralytics Documentation](https://docs.ultralytics.com/).
diff --git a/docs/en/datasets/pose/hand-keypoints.md b/docs/en/datasets/pose/hand-keypoints.md
index dd3c19b1a..559cdcec6 100644
--- a/docs/en/datasets/pose/hand-keypoints.md
+++ b/docs/en/datasets/pose/hand-keypoints.md
@@ -10,6 +10,17 @@ keywords: Hand KeyPoints, pose estimation, dataset, keypoints, MediaPipe, YOLO,
 
 The hand-keypoints dataset contains 26,768 images of hands annotated with keypoints, making it suitable for training models like Ultralytics YOLO for pose estimation tasks. The annotations were generated using the Google MediaPipe library, ensuring high [accuracy](https://www.ultralytics.com/glossary/accuracy) and consistency, and the dataset is compatible [Ultralytics YOLO11](https://github.com/ultralytics/ultralytics) formats.
 
+<p align="center">
+  <br>
+  <iframe loading="lazy" width="720" height="405" src="https://www.youtube.com/embed/fd6u1TW_AGY"
+    title="YouTube video player" frameborder="0"
+    allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share"
+    allowfullscreen>
+  </iframe>
+  <br>
+  <strong>Watch:</strong> Hand Keypoints Estimation with Ultralytics YOLO11 | Human Hand Pose Estimation Tutorial
+</p>
+
 ## Hand Landmarks
 
 ![Hand Landmarks](https://github.com/ultralytics/docs/releases/download/0/hand_landmarks.jpg)
diff --git a/docs/en/datasets/pose/index.md b/docs/en/datasets/pose/index.md
index 296b74f83..321bb9c12 100644
--- a/docs/en/datasets/pose/index.md
+++ b/docs/en/datasets/pose/index.md
@@ -127,6 +127,15 @@ This section outlines the datasets that are compatible with Ultralytics YOLO for
 - **Usage**: Great for human hand pose estimation.
 - [Read more about Hand Keypoints](hand-keypoints.md)
 
+### Dog-Pose
+
+- **Description**: The Dog Pose dataset contains approximately 6,000 images, providing a diverse and extensive resource for training and validation of dog pose estimation models.
+- **Label Format**: Follows the Ultralytics YOLO format, with annotations for multiple keypoints specific to dog anatomy.
+- **Number of Classes**: 1 (Dog).
+- **Keypoints**: Includes 24 keypoints tailored to dog poses, such as limbs, joints, and head positions.
+- **Usage**: Ideal for training models to estimate dog poses in various scenarios, from research to real-world applications.
+- [Read more about Dog-Pose](dog-pose.md)
+
 ### Adding your own dataset
 
 If you have your own dataset and would like to use it for training pose estimation models with Ultralytics YOLO format, ensure that it follows the format specified above under "Ultralytics YOLO format". Convert your annotations to the required format and specify the paths, number of classes, and class names in the YAML configuration file.
diff --git a/docs/en/datasets/segment/coco.md b/docs/en/datasets/segment/coco.md
index 5ff52f46a..2dd8a0f53 100644
--- a/docs/en/datasets/segment/coco.md
+++ b/docs/en/datasets/segment/coco.md
@@ -56,14 +56,14 @@ To train a YOLO11n-seg model on the COCO-Seg dataset for 100 [epochs](https://ww
         model = YOLO("yolo11n-seg.pt")  # load a pretrained model (recommended for training)
 
         # Train the model
-        results = model.train(data="coco-seg.yaml", epochs=100, imgsz=640)
+        results = model.train(data="coco.yaml", epochs=100, imgsz=640)
         ```
 
     === "CLI"
 
         ```bash
         # Start training from a pretrained *.pt model
-        yolo segment train data=coco-seg.yaml model=yolo11n-seg.pt epochs=100 imgsz=640
+        yolo segment train data=coco.yaml model=yolo11n-seg.pt epochs=100 imgsz=640
         ```
 
 ## Sample Images and Annotations
@@ -118,14 +118,14 @@ To train a YOLO11n-seg model on the COCO-Seg dataset for 100 epochs with an imag
         model = YOLO("yolo11n-seg.pt")  # load a pretrained model (recommended for training)
 
         # Train the model
-        results = model.train(data="coco-seg.yaml", epochs=100, imgsz=640)
+        results = model.train(data="coco.yaml", epochs=100, imgsz=640)
         ```
 
     === "CLI"
 
         ```bash
         # Start training from a pretrained *.pt model
-        yolo segment train data=coco-seg.yaml model=yolo11n-seg.pt epochs=100 imgsz=640
+        yolo segment train data=coco.yaml model=yolo11n-seg.pt epochs=100 imgsz=640
         ```
 
 ### What are the key features of the COCO-Seg dataset?
diff --git a/docs/en/guides/analytics.md b/docs/en/guides/analytics.md
index dec9b4cce..cd7fc40dc 100644
--- a/docs/en/guides/analytics.md
+++ b/docs/en/guides/analytics.md
@@ -45,126 +45,15 @@ This guide provides a comprehensive overview of three fundamental types of [data
 
         # generate the pie chart
         yolo solutions analytics analytics_type="pie" show=True
-        ```
-
-    === "Python"
-
-        ```python
-        import cv2
-
-        from ultralytics import solutions
-
-        cap = cv2.VideoCapture("Path/to/video/file.mp4")
-        assert cap.isOpened(), "Error reading video file"
-
-        w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
-
-        out = cv2.VideoWriter(
-            "ultralytics_analytics.avi",
-            cv2.VideoWriter_fourcc(*"MJPG"),
-            fps,
-            (1920, 1080),  # This is fixed
-        )
 
-        analytics = solutions.Analytics(
-            analytics_type="line",
-            show=True,
-        )
+        # generate the bar plots
+        yolo solutions analytics analytics_type="bar" show=True
 
-        frame_count = 0
-        while cap.isOpened():
-            success, im0 = cap.read()
-            if success:
-                frame_count += 1
-                im0 = analytics.process_data(im0, frame_count)  # update analytics graph every frame
-                out.write(im0)  # write the video file
-            else:
-                break
-
-        cap.release()
-        out.release()
-        cv2.destroyAllWindows()
+        # generate the area plots
+        yolo solutions analytics analytics_type="area" show=True
         ```
 
-    === "Pie Chart"
-
-        ```python
-        import cv2
-
-        from ultralytics import solutions
-
-        cap = cv2.VideoCapture("Path/to/video/file.mp4")
-        assert cap.isOpened(), "Error reading video file"
-
-        w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
-
-        out = cv2.VideoWriter(
-            "ultralytics_analytics.avi",
-            cv2.VideoWriter_fourcc(*"MJPG"),
-            fps,
-            (1920, 1080),  # This is fixed
-        )
-
-        analytics = solutions.Analytics(
-            analytics_type="pie",
-            show=True,
-        )
-
-        frame_count = 0
-        while cap.isOpened():
-            success, im0 = cap.read()
-            if success:
-                frame_count += 1
-                im0 = analytics.process_data(im0, frame_count)  # update analytics graph every frame
-                out.write(im0)  # write the video file
-            else:
-                break
-
-        cap.release()
-        out.release()
-        cv2.destroyAllWindows()
-        ```
-
-    === "Bar Plot"
-
-        ```python
-        import cv2
-
-        from ultralytics import solutions
-
-        cap = cv2.VideoCapture("Path/to/video/file.mp4")
-        assert cap.isOpened(), "Error reading video file"
-
-        w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
-
-        out = cv2.VideoWriter(
-            "ultralytics_analytics.avi",
-            cv2.VideoWriter_fourcc(*"MJPG"),
-            fps,
-            (1920, 1080),  # This is fixed
-        )
-
-        analytics = solutions.Analytics(
-            analytics_type="bar",
-            show=True,
-        )
-
-        frame_count = 0
-        while cap.isOpened():
-            success, im0 = cap.read()
-            if success:
-                frame_count += 1
-                im0 = analytics.process_data(im0, frame_count)  # update analytics graph every frame
-                out.write(im0)  # write the video file
-            else:
-                break
-
-        cap.release()
-        out.release()
-        cv2.destroyAllWindows()
-        ```
-
-    === "Area chart"
+    === "Python"
 
         ```python
         import cv2
@@ -173,9 +62,9 @@ This guide provides a comprehensive overview of three fundamental types of [data
 
         cap = cv2.VideoCapture("Path/to/video/file.mp4")
         assert cap.isOpened(), "Error reading video file"
-
         w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
 
+        # Video writer
         out = cv2.VideoWriter(
             "ultralytics_analytics.avi",
             cv2.VideoWriter_fourcc(*"MJPG"),
@@ -183,11 +72,15 @@ This guide provides a comprehensive overview of three fundamental types of [data
             (1920, 1080),  # This is fixed
         )
 
+        # Init analytics
         analytics = solutions.Analytics(
-            analytics_type="area",
-            show=True,
+            show=True,  # Display the output
+            analytics_type="line",  # Pass the analytics type, could be "pie", "bar" or "area".
+            model="yolo11n.pt",  # Path to the YOLO11 model file
+            # classes=[0, 2],  # If you want to count specific classes i.e person and car with COCO pretrained model.
         )
 
+        # Process video
         frame_count = 0
         while cap.isOpened():
             success, im0 = cap.read()
diff --git a/docs/en/guides/distance-calculation.md b/docs/en/guides/distance-calculation.md
index 009899ae3..c9775124d 100644
--- a/docs/en/guides/distance-calculation.md
+++ b/docs/en/guides/distance-calculation.md
@@ -55,6 +55,7 @@ Measuring the gap between two objects is known as distance calculation within a
         # Init distance-calculation obj
         distance = solutions.DistanceCalculation(model="yolo11n.pt", show=True)
 
+        # Process video
         while cap.isOpened():
             success, im0 = cap.read()
             if not success:
diff --git a/docs/en/guides/heatmaps.md b/docs/en/guides/heatmaps.md
index 66c26eaa0..8bc86b69e 100644
--- a/docs/en/guides/heatmaps.md
+++ b/docs/en/guides/heatmaps.md
@@ -47,119 +47,12 @@ A heatmap generated with [Ultralytics YOLO11](https://github.com/ultralytics/ult
 
         # Pass a custom colormap
         yolo solutions heatmap colormap=cv2.COLORMAP_INFERNO
-        ```
-
-    === "Python"
-
-        ```python
-        import cv2
-
-        from ultralytics import solutions
-
-        cap = cv2.VideoCapture("Path/to/video/file.mp4")
-        assert cap.isOpened(), "Error reading video file"
-        w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
-
-        # Video writer
-        video_writer = cv2.VideoWriter("heatmap_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
-
-        # Init heatmap
-        heatmap = solutions.Heatmap(
-            show=True,
-            model="yolo11n.pt",
-            colormap=cv2.COLORMAP_PARULA,
-        )
-
-        while cap.isOpened():
-            success, im0 = cap.read()
-            if not success:
-                print("Video frame is empty or video processing has been successfully completed.")
-                break
-            im0 = heatmap.generate_heatmap(im0)
-            video_writer.write(im0)
-
-        cap.release()
-        video_writer.release()
-        cv2.destroyAllWindows()
-        ```
-
-    === "Line Counting"
-
-        ```python
-        import cv2
-
-        from ultralytics import solutions
-
-        cap = cv2.VideoCapture("Path/to/video/file.mp4")
-        assert cap.isOpened(), "Error reading video file"
-        w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
-
-        # Video writer
-        video_writer = cv2.VideoWriter("heatmap_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
 
-        # line for object counting
-        line_points = [(20, 400), (1080, 404)]
-
-        # Init heatmap
-        heatmap = solutions.Heatmap(
-            show=True,
-            model="yolo11n.pt",
-            colormap=cv2.COLORMAP_PARULA,
-            region=line_points,
-        )
-
-        while cap.isOpened():
-            success, im0 = cap.read()
-            if not success:
-                print("Video frame is empty or video processing has been successfully completed.")
-                break
-            im0 = heatmap.generate_heatmap(im0)
-            video_writer.write(im0)
-
-        cap.release()
-        video_writer.release()
-        cv2.destroyAllWindows()
+        # Heatmaps + object counting
+        yolo solutions heatmap region=[(20, 400), (1080, 400), (1080, 360), (20, 360)]
         ```
 
-    === "Polygon Counting"
-
-        ```python
-        import cv2
-
-        from ultralytics import solutions
-
-        cap = cv2.VideoCapture("Path/to/video/file.mp4")
-        assert cap.isOpened(), "Error reading video file"
-        w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
-
-        # Video writer
-        video_writer = cv2.VideoWriter("heatmap_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
-
-        # Define polygon points
-        region_points = [(20, 400), (1080, 404), (1080, 360), (20, 360), (20, 400)]
-
-        # Init heatmap
-        heatmap = solutions.Heatmap(
-            show=True,
-            model="yolo11n.pt",
-            colormap=cv2.COLORMAP_PARULA,
-            region=region_points,
-        )
-
-        while cap.isOpened():
-            success, im0 = cap.read()
-            if not success:
-                print("Video frame is empty or video processing has been successfully completed.")
-                break
-            im0 = heatmap.generate_heatmap(im0)
-            video_writer.write(im0)
-
-        cap.release()
-        video_writer.release()
-        cv2.destroyAllWindows()
-        ```
-
-    === "Region Counting"
+    === "Python"
 
         ```python
         import cv2
@@ -173,51 +66,24 @@ A heatmap generated with [Ultralytics YOLO11](https://github.com/ultralytics/ult
         # Video writer
         video_writer = cv2.VideoWriter("heatmap_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
 
-        # Define region points
-        region_points = [(20, 400), (1080, 404), (1080, 360), (20, 360)]
-
-        # Init heatmap
-        heatmap = solutions.Heatmap(
-            show=True,
-            model="yolo11n.pt",
-            colormap=cv2.COLORMAP_PARULA,
-            region=region_points,
-        )
-
-        while cap.isOpened():
-            success, im0 = cap.read()
-            if not success:
-                print("Video frame is empty or video processing has been successfully completed.")
-                break
-            im0 = heatmap.generate_heatmap(im0)
-            video_writer.write(im0)
-
-        cap.release()
-        video_writer.release()
-        cv2.destroyAllWindows()
-        ```
-
-    === "Specific Classes"
-
-        ```python
-        import cv2
-
-        from ultralytics import solutions
-
-        cap = cv2.VideoCapture("Path/to/video/file.mp4")
-        assert cap.isOpened(), "Error reading video file"
-        w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
-
-        # Video writer
-        video_writer = cv2.VideoWriter("heatmap_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
+        # In case you want to apply object counting + heatmaps, you can pass region points.
+        # region_points = [(20, 400), (1080, 400)]  # Define line points
+        # region_points = [(20, 400), (1080, 400), (1080, 360), (20, 360)]  # Define region points
+        # region_points = [(20, 400), (1080, 400), (1080, 360), (20, 360), (20, 400)]  # Define polygon points
 
         # Init heatmap
         heatmap = solutions.Heatmap(
-            show=True,
-            model="yolo11n.pt",
-            classes=[0, 2],
+            show=True,  # Display the output
+            model="yolo11n.pt",  # Path to the YOLO11 model file
+            colormap=cv2.COLORMAP_PARULA,  # Colormap of heatmap
+            # region=region_points,  # If you want to do object counting with heatmaps, you can pass region_points
+            # classes=[0, 2],  # If you want to generate heatmap for specific classes i.e person and car.
+            # show_in=True,  # Display in counts
+            # show_out=True,  # Display out counts
+            # line_width=2,  # Adjust the line width for bounding boxes and text display
         )
 
+        # Process video
         while cap.isOpened():
             success, im0 = cap.read()
             if not success:
diff --git a/docs/en/guides/object-counting.md b/docs/en/guides/object-counting.md
index 144555793..a6ea9d923 100644
--- a/docs/en/guides/object-counting.md
+++ b/docs/en/guides/object-counting.md
@@ -19,7 +19,7 @@ Object counting with [Ultralytics YOLO11](https://github.com/ultralytics/ultraly
         allowfullscreen>
       </iframe>
       <br>
-      <strong>Watch:</strong> Object Counting using Ultralytics YOLO11
+      <strong>Watch:</strong> Object Counting using Ultralytics YOLOv8
     </td>
     <td align="center">
       <iframe loading="lazy" width="720" height="405" src="https://www.youtube.com/embed/Fj9TStNBVoY"
@@ -58,7 +58,7 @@ Object counting with [Ultralytics YOLO11](https://github.com/ultralytics/ultraly
         yolo solutions count source="path/to/video/file.mp4"
 
         # Pass region coordinates
-        yolo solutions count region=[(20, 400), (1080, 404), (1080, 360), (20, 360)]
+        yolo solutions count region=[(20, 400), (1080, 400), (1080, 360), (20, 360)]
         ```
 
     === "Python"
@@ -73,165 +73,22 @@ Object counting with [Ultralytics YOLO11](https://github.com/ultralytics/ultraly
         w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
 
         # Define region points
-        region_points = [(20, 400), (1080, 404), (1080, 360), (20, 360)]
+        # region_points = [(20, 400), (1080, 400)]  # For line counting
+        region_points = [(20, 400), (1080, 400), (1080, 360), (20, 360)]  # For rectangle region counting
+        # region_points = [(20, 400), (1080, 400), (1080, 360), (20, 360), (20, 400)]  # For polygon region counting
 
         # Video writer
         video_writer = cv2.VideoWriter("object_counting_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
 
         # Init Object Counter
         counter = solutions.ObjectCounter(
-            show=True,
-            region=region_points,
-            model="yolo11n.pt",
-        )
-
-        # Process video
-        while cap.isOpened():
-            success, im0 = cap.read()
-            if not success:
-                print("Video frame is empty or video processing has been successfully completed.")
-                break
-            im0 = counter.count(im0)
-            video_writer.write(im0)
-
-        cap.release()
-        video_writer.release()
-        cv2.destroyAllWindows()
-        ```
-
-    === "OBB Object Counting"
-
-        ```python
-        import cv2
-
-        from ultralytics import solutions
-
-        cap = cv2.VideoCapture("path/to/video/file.mp4")
-        assert cap.isOpened(), "Error reading video file"
-        w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
-
-        # line or region points
-        line_points = [(20, 400), (1080, 400)]
-
-        # Video writer
-        video_writer = cv2.VideoWriter("object_counting_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
-
-        # Init Object Counter
-        counter = solutions.ObjectCounter(
-            show=True,
-            region=line_points,
-            model="yolo11n-obb.pt",
-        )
-
-        # Process video
-        while cap.isOpened():
-            success, im0 = cap.read()
-            if not success:
-                print("Video frame is empty or video processing has been successfully completed.")
-                break
-            im0 = counter.count(im0)
-            video_writer.write(im0)
-
-        cap.release()
-        video_writer.release()
-        cv2.destroyAllWindows()
-        ```
-
-    === "Count in Polygon"
-
-        ```python
-        import cv2
-
-        from ultralytics import solutions
-
-        cap = cv2.VideoCapture("path/to/video/file.mp4")
-        assert cap.isOpened(), "Error reading video file"
-        w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
-
-        # Define region points
-        region_points = [(20, 400), (1080, 404), (1080, 360), (20, 360), (20, 400)]
-
-        # Video writer
-        video_writer = cv2.VideoWriter("object_counting_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
-
-        # Init Object Counter
-        counter = solutions.ObjectCounter(
-            show=True,
-            region=region_points,
-            model="yolo11n.pt",
-        )
-
-        # Process video
-        while cap.isOpened():
-            success, im0 = cap.read()
-            if not success:
-                print("Video frame is empty or video processing has been successfully completed.")
-                break
-            im0 = counter.count(im0)
-            video_writer.write(im0)
-
-        cap.release()
-        video_writer.release()
-        cv2.destroyAllWindows()
-        ```
-
-    === "Count in Line"
-
-        ```python
-        import cv2
-
-        from ultralytics import solutions
-
-        cap = cv2.VideoCapture("path/to/video/file.mp4")
-        assert cap.isOpened(), "Error reading video file"
-        w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
-
-        # Define region points
-        line_points = [(20, 400), (1080, 400)]
-
-        # Video writer
-        video_writer = cv2.VideoWriter("object_counting_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
-
-        # Init Object Counter
-        counter = solutions.ObjectCounter(
-            show=True,
-            region=line_points,
-            model="yolo11n.pt",
-        )
-
-        # Process video
-        while cap.isOpened():
-            success, im0 = cap.read()
-            if not success:
-                print("Video frame is empty or video processing has been successfully completed.")
-                break
-            im0 = counter.count(im0)
-            video_writer.write(im0)
-
-        cap.release()
-        video_writer.release()
-        cv2.destroyAllWindows()
-        ```
-
-    === "Specific Classes"
-
-        ```python
-        import cv2
-
-        from ultralytics import solutions
-
-        cap = cv2.VideoCapture("path/to/video/file.mp4")
-        assert cap.isOpened(), "Error reading video file"
-        w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
-
-        # Video writer
-        video_writer = cv2.VideoWriter("object_counting_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
-
-        # Init Object Counter
-        counter = solutions.ObjectCounter(
-            show=True,
-            model="yolo11n.pt",
-            classes=[0, 1],
+            show=True,  # Display the output
+            region=region_points,  # Pass region points
+            model="yolo11n.pt",  # model="yolo11n-obb.pt" for object counting using YOLO11 OBB model.
+            # classes=[0, 2],  # If you want to count specific classes i.e person and car with COCO pretrained model.
+            # show_in=True,  # Display in counts
+            # show_out=True,  # Display out counts
+            # line_width=2,  # Adjust the line width for bounding boxes and text display
         )
 
         # Process video
@@ -291,7 +148,7 @@ def count_objects_in_region(video_path, output_video_path, model_path):
     w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
     video_writer = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
 
-    region_points = [(20, 400), (1080, 404), (1080, 360), (20, 360)]
+    region_points = [(20, 400), (1080, 400), (1080, 360), (20, 360)]
     counter = solutions.ObjectCounter(show=True, region=region_points, model=model_path)
 
     while cap.isOpened():
diff --git a/docs/en/guides/queue-management.md b/docs/en/guides/queue-management.md
index 2567a2f78..190191634 100644
--- a/docs/en/guides/queue-management.md
+++ b/docs/en/guides/queue-management.md
@@ -45,7 +45,7 @@ Queue management using [Ultralytics YOLO11](https://github.com/ultralytics/ultra
         yolo solutions queue source="path/to/video/file.mp4"
 
         # Pass queue coordinates
-        yolo solutions queue region=[(20, 400), (1080, 404), (1080, 360), (20, 360)]
+        yolo solutions queue region=[(20, 400), (1080, 400), (1080, 360), (20, 360)]
         ```
 
     === "Python"
@@ -60,53 +60,23 @@ Queue management using [Ultralytics YOLO11](https://github.com/ultralytics/ultra
         assert cap.isOpened(), "Error reading video file"
         w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
 
+        # Video writer
         video_writer = cv2.VideoWriter("queue_management.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
 
-        queue_region = [(20, 400), (1080, 404), (1080, 360), (20, 360)]
+        # Define queue region points
+        queue_region = [(20, 400), (1080, 400), (1080, 360), (20, 360)]  # Define queue region points
+        # queue_region = [(20, 400), (1080, 400), (1080, 360), (20, 360), (20, 400)]  # Define queue polygon points
 
+        # Init Queue Manager
         queue = solutions.QueueManager(
-            model="yolo11n.pt",
-            region=queue_region,
-        )
-
-        while cap.isOpened():
-            success, im0 = cap.read()
-
-            if success:
-                out = queue.process_queue(im0)
-                video_writer.write(im0)
-                if cv2.waitKey(1) & 0xFF == ord("q"):
-                    break
-                continue
-
-            print("Video frame is empty or video processing has been successfully completed.")
-            break
-
-        cap.release()
-        cv2.destroyAllWindows()
-        ```
-
-    === "Queue Manager Specific Classes"
-
-        ```python
-        import cv2
-
-        from ultralytics import solutions
-
-        cap = cv2.VideoCapture("Path/to/video/file.mp4")
-
-        assert cap.isOpened(), "Error reading video file"
-        w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
-
-        video_writer = cv2.VideoWriter("queue_management.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
-
-        queue_region = [(20, 400), (1080, 404), (1080, 360), (20, 360)]
-
-        queue = solutions.QueueManager(
-            model="yolo11n.pt",
-            classes=3,
+            show=True,  # Display the output
+            model="yolo11n.pt",  # Path to the YOLO11 model file
+            region=queue_region,  # Pass queue region points
+            # classes=[0, 2],  # If you want to count specific classes i.e person and car with COCO pretrained model.
+            # line_width=2,  # Adjust the line width for bounding boxes and text display
         )
 
+        # Process video
         while cap.isOpened():
             success, im0 = cap.read()
 
@@ -156,7 +126,7 @@ import cv2
 from ultralytics import solutions
 
 cap = cv2.VideoCapture("path/to/video.mp4")
-queue_region = [(20, 400), (1080, 404), (1080, 360), (20, 360)]
+queue_region = [(20, 400), (1080, 400), (1080, 360), (20, 360)]
 
 queue = solutions.QueueManager(
     model="yolo11n.pt",
diff --git a/docs/en/guides/region-counting.md b/docs/en/guides/region-counting.md
index a27c2b4e5..94120bcab 100644
--- a/docs/en/guides/region-counting.md
+++ b/docs/en/guides/region-counting.md
@@ -34,56 +34,65 @@ keywords: object counting, regions, YOLOv8, computer vision, Ultralytics, effici
 | ![People Counting in Different Region using Ultralytics YOLOv8](https://github.com/ultralytics/docs/releases/download/0/people-counting-different-region-ultralytics-yolov8.avif) | ![Crowd Counting in Different Region using Ultralytics YOLOv8](https://github.com/ultralytics/docs/releases/download/0/crowd-counting-different-region-ultralytics-yolov8.avif) |
 |                                                           People Counting in Different Region using Ultralytics YOLOv8                                                            |                                                           Crowd Counting in Different Region using Ultralytics YOLOv8                                                           |
 
-## Steps to Run
+!!! example "Region Counting Example"
 
-### Step 1: Install Required Libraries
+    === "Python"
 
-Begin by cloning the Ultralytics repository, installing dependencies, and navigating to the local directory using the provided commands in Step 2.
+        ```python
+         import cv2
+         from ultralytics import solutions
 
-```bash
-# Clone Ultralytics repo
-git clone https://github.com/ultralytics/ultralytics
+         cap = cv2.VideoCapture("Path/to/video/file.mp4")
+         assert cap.isOpened(), "Error reading video file"
+         w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
 
-# Navigate to the local directory
-cd ultralytics/examples/YOLOv8-Region-Counter
-```
+         # Define region points
+         # region_points = [(20, 400), (1080, 400), (1080, 360), (20, 360)] # Pass region as list
 
-### Step 2: Run Region Counting Using Ultralytics YOLOv8
+         # pass region as dictionary
+         region_points = {
+             "region-01": [(50, 50), (250, 50), (250, 250), (50, 250)],
+             "region-02": [(640, 640), (780, 640), (780, 720), (640, 720)]
+         }
 
-Execute the following basic commands for inference.
+         # Video writer
+         video_writer = cv2.VideoWriter("region_counting.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
 
-???+ tip "Region is Movable"
+         # Init Object Counter
+         region = solutions.RegionCounter(
+             show=True,
+             region=region_points,
+             model="yolo11n.pt",
+         )
 
-    During video playback, you can interactively move the region within the video by clicking and dragging using the left mouse button.
+         # Process video
+         while cap.isOpened():
+             success, im0 = cap.read()
+             if not success:
+                 print("Video frame is empty or video processing has been successfully completed.")
+                 break
+             im0 = region.count(im0)
+             video_writer.write(im0)
 
-```bash
-# Save results
-python yolov8_region_counter.py --source "path/to/video.mp4" --save-img
+         cap.release()
+         video_writer.release()
+         cv2.destroyAllWindows()
+        ```
 
-# Run model on CPU
-python yolov8_region_counter.py --source "path/to/video.mp4" --device cpu
+!!! tip "Ultralytics Example Code"
 
-# Change model file
-python yolov8_region_counter.py --source "path/to/video.mp4" --weights "path/to/model.pt"
+      The Ultralytics region counting module is available in our [examples section](https://github.com/ultralytics/ultralytics/blob/main/examples/YOLOv8-Region-Counter/yolov8_region_counter.py). You can explore this example for code customization and modify it to suit your specific use case.
 
-# Detect specific classes (e.g., first and third classes)
-python yolov8_region_counter.py --source "path/to/video.mp4" --classes 0 2
+### Argument `RegionCounter`
 
-# View results without saving
-python yolov8_region_counter.py --source "path/to/video.mp4" --view-img
-```
+Here's a table with the `RegionCounter` arguments:
 
-### Optional Arguments
-
-| Name                 | Type   | Default      | Description                                                                 |
-| -------------------- | ------ | ------------ | --------------------------------------------------------------------------- |
-| `--source`           | `str`  | `None`       | Path to video file, for webcam 0                                            |
-| `--line_thickness`   | `int`  | `2`          | [Bounding Box](https://www.ultralytics.com/glossary/bounding-box) thickness |
-| `--save-img`         | `bool` | `False`      | Save the predicted video/image                                              |
-| `--weights`          | `str`  | `yolov8n.pt` | Weights file path                                                           |
-| `--classes`          | `list` | `None`       | Detect specific classes i.e. --classes 0 2                                  |
-| `--region-thickness` | `int`  | `2`          | Region Box thickness                                                        |
-| `--track-thickness`  | `int`  | `2`          | Tracking line thickness                                                     |
+| Name         | Type   | Default                    | Description                                          |
+| ------------ | ------ | -------------------------- | ---------------------------------------------------- |
+| `model`      | `str`  | `None`                     | Path to Ultralytics YOLO Model File                  |
+| `region`     | `list` | `[(20, 400), (1260, 400)]` | List of points defining the counting region.         |
+| `line_width` | `int`  | `2`                        | Line thickness for bounding boxes.                   |
+| `show`       | `bool` | `False`                    | Flag to control whether to display the video stream. |
 
 ## FAQ
 
@@ -107,7 +116,7 @@ Follow these steps to run object counting in Ultralytics YOLOv8:
     python yolov8_region_counter.py --source "path/to/video.mp4" --save-img
     ```
 
-For more options, visit the [Run Region Counting](#steps-to-run) section.
+For more options, visit the [Run Region Counting](https://github.com/ultralytics/ultralytics/blob/main/examples/YOLOv8-Region-Counter/readme.md) section.
 
 ### Why should I use Ultralytics YOLOv8 for object counting in regions?
 
@@ -121,7 +130,7 @@ Explore deeper benefits in the [Advantages](#advantages-of-object-counting-in-re
 
 ### Can the defined regions be adjusted during video playback?
 
-Yes, with Ultralytics YOLOv8, regions can be interactively moved during video playback. Simply click and drag with the left mouse button to reposition the region. This feature enhances flexibility for dynamic environments. Learn more in the tip section for [movable regions](#step-2-run-region-counting-using-ultralytics-yolov8).
+Yes, with Ultralytics YOLOv8, regions can be interactively moved during video playback. Simply click and drag with the left mouse button to reposition the region. This feature enhances flexibility for dynamic environments. Learn more in the tip section for [movable regions](https://github.com/ultralytics/ultralytics/blob/33cdaa5782efb2bc2b5ede945771ba647882830d/examples/YOLOv8-Region-Counter/yolov8_region_counter.py#L39).
 
 ### What are some real-world applications of object counting in regions?
 
diff --git a/docs/en/guides/speed-estimation.md b/docs/en/guides/speed-estimation.md
index dd9660d14..a885bcaa2 100644
--- a/docs/en/guides/speed-estimation.md
+++ b/docs/en/guides/speed-estimation.md
@@ -50,7 +50,7 @@ keywords: Ultralytics YOLO11, speed estimation, object tracking, computer vision
         yolo solutions speed source="path/to/video/file.mp4"
 
         # Pass region coordinates
-        yolo solutions speed region=[(20, 400), (1080, 404), (1080, 360), (20, 360)]
+        yolo solutions speed region=[(20, 400), (1080, 400), (1080, 360), (20, 360)]
         ```
 
     === "Python"
@@ -61,16 +61,24 @@ keywords: Ultralytics YOLO11, speed estimation, object tracking, computer vision
         from ultralytics import solutions
 
         cap = cv2.VideoCapture("Path/to/video/file.mp4")
-
         assert cap.isOpened(), "Error reading video file"
         w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
 
+        # Video writer
         video_writer = cv2.VideoWriter("speed_management.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
 
-        speed_region = [(20, 400), (1080, 404), (1080, 360), (20, 360)]
+        # Define speed region points
+        speed_region = [(20, 400), (1080, 400), (1080, 360), (20, 360)]
 
-        speed = solutions.SpeedEstimator(model="yolo11n.pt", region=speed_region, show=True)
+        speed = solutions.SpeedEstimator(
+            show=True,  # Display the output
+            model="yolo11n-pose.pt",  # Path to the YOLO11 model file.
+            region=speed_region,  # Pass region points
+            # classes=[0, 2],  # If you want to estimate speed of specific classes.
+            # line_width=2,  # Adjust the line width for bounding boxes and text display
+        )
 
+        # Process video
         while cap.isOpened():
             success, im0 = cap.read()
 
diff --git a/docs/en/guides/streamlit-live-inference.md b/docs/en/guides/streamlit-live-inference.md
index b5831fd4f..5a822b4c4 100644
--- a/docs/en/guides/streamlit-live-inference.md
+++ b/docs/en/guides/streamlit-live-inference.md
@@ -40,6 +40,12 @@ Streamlit makes it simple to build and deploy interactive web applications. Comb
 
 !!! example "Streamlit Application"
 
+    === "CLI"
+
+        ```bash
+        yolo streamlit-predict
+        ```
+
     === "Python"
 
         ```python
@@ -50,12 +56,6 @@ Streamlit makes it simple to build and deploy interactive web applications. Comb
         ### Make sure to run the file using command `streamlit run <file-name.py>`
         ```
 
-    === "CLI"
-
-        ```bash
-        yolo streamlit-predict
-        ```
-
 This will launch the Streamlit application in your default web browser. You will see the main title, subtitle, and the sidebar with configuration options. Select your desired YOLO11 model, set the confidence and NMS thresholds, and click the "Start" button to begin the real-time object detection.
 
 You can optionally supply a specific model in Python:
diff --git a/docs/en/guides/workouts-monitoring.md b/docs/en/guides/workouts-monitoring.md
index 949b29835..02816e9cf 100644
--- a/docs/en/guides/workouts-monitoring.md
+++ b/docs/en/guides/workouts-monitoring.md
@@ -60,40 +60,18 @@ Monitoring workouts through pose estimation with [Ultralytics YOLO11](https://gi
         assert cap.isOpened(), "Error reading video file"
         w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
 
-        gym = solutions.AIGym(
-            model="yolo11n-pose.pt",
-            show=True,
-            kpts=[6, 8, 10],
-        )
-
-        while cap.isOpened():
-            success, im0 = cap.read()
-            if not success:
-                print("Video frame is empty or video processing has been successfully completed.")
-                break
-            im0 = gym.monitor(im0)
-
-        cv2.destroyAllWindows()
-        ```
-
-    === "Workouts Monitoring with Save Output"
-
-        ```python
-        import cv2
-
-        from ultralytics import solutions
-
-        cap = cv2.VideoCapture("path/to/video/file.mp4")
-        assert cap.isOpened(), "Error reading video file"
-        w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
-
+        # Video writer
         video_writer = cv2.VideoWriter("workouts.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
 
+        # Init AIGym
         gym = solutions.AIGym(
-            show=True,
-            kpts=[6, 8, 10],
+            show=True,  # Display the frame
+            kpts=[6, 8, 10],  # keypoints index of person for monitoring specific exercise, by default it's for pushup
+            model="yolo11n-pose.pt",  # Path to the YOLO11 pose estimation model file
+            # line_width=2,  # Adjust the line width for bounding boxes and text display
         )
 
+        # Process video
         while cap.isOpened():
             success, im0 = cap.read()
             if not success:
diff --git a/docs/en/help/CI.md b/docs/en/help/CI.md
index 0f6b4c3a4..487954516 100644
--- a/docs/en/help/CI.md
+++ b/docs/en/help/CI.md
@@ -22,18 +22,18 @@ Here's a brief description of our CI actions:
 
 Below is the table showing the status of these CI tests for our main repositories:
 
-| Repository                                                | CI                                                                                                                                                                             | Docker Deployment                                                                                                                                                                        | Broken Links                                                                                                                                                                                                                                                                                                                                      | CodeQL                                                                                                                                                                                            | PyPI and Docs Publishing                                                                                                                                                                                              |
-| --------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| [yolov3](https://github.com/ultralytics/yolov3)           | [![YOLOv3 CI](https://github.com/ultralytics/yolov3/actions/workflows/ci-testing.yml/badge.svg)](https://github.com/ultralytics/yolov3/actions/workflows/ci-testing.yml)       | [![Publish Docker Images](https://github.com/ultralytics/yolov3/actions/workflows/docker.yml/badge.svg)](https://github.com/ultralytics/yolov3/actions/workflows/docker.yml)             | [![Check Broken links](https://github.com/ultralytics/yolov3/actions/workflows/links.yml/badge.svg)](https://github.com/ultralytics/yolov3/actions/workflows/links.yml)                                                                                                                                                                           | [![CodeQL](https://github.com/ultralytics/yolov3/actions/workflows/codeql-analysis.yml/badge.svg)](https://github.com/ultralytics/yolov3/actions/workflows/codeql-analysis.yml)                   |                                                                                                                                                                                                                       |
-| [yolov5](https://github.com/ultralytics/yolov5)           | [![YOLOv5 CI](https://github.com/ultralytics/yolov5/actions/workflows/ci-testing.yml/badge.svg)](https://github.com/ultralytics/yolov5/actions/workflows/ci-testing.yml)       | [![Publish Docker Images](https://github.com/ultralytics/yolov5/actions/workflows/docker.yml/badge.svg)](https://github.com/ultralytics/yolov5/actions/workflows/docker.yml)             | [![Check Broken links](https://github.com/ultralytics/yolov5/actions/workflows/links.yml/badge.svg)](https://github.com/ultralytics/yolov5/actions/workflows/links.yml)                                                                                                                                                                           | [![CodeQL](https://github.com/ultralytics/yolov5/actions/workflows/codeql-analysis.yml/badge.svg)](https://github.com/ultralytics/yolov5/actions/workflows/codeql-analysis.yml)                   |                                                                                                                                                                                                                       |
-| [ultralytics](https://github.com/ultralytics/ultralytics) | [![ultralytics CI](https://github.com/ultralytics/ultralytics/actions/workflows/ci.yaml/badge.svg)](https://github.com/ultralytics/ultralytics/actions/workflows/ci.yaml)      | [![Publish Docker Images](https://github.com/ultralytics/ultralytics/actions/workflows/docker.yaml/badge.svg)](https://github.com/ultralytics/ultralytics/actions/workflows/docker.yaml) | [![Check Broken links](https://github.com/ultralytics/ultralytics/actions/workflows/links.yml/badge.svg)](https://github.com/ultralytics/ultralytics/actions/workflows/links.yml)                                                                                                                                                                 | [![CodeQL](https://github.com/ultralytics/ultralytics/actions/workflows/codeql.yaml/badge.svg)](https://github.com/ultralytics/ultralytics/actions/workflows/codeql.yaml)                         | [![Publish to PyPI and Deploy Docs](https://github.com/ultralytics/ultralytics/actions/workflows/publish.yml/badge.svg)](https://github.com/ultralytics/ultralytics/actions/workflows/publish.yml)                    |
-| [hub-sdk](https://github.com/ultralytics/hub-sdk)         | [![HUB-SDK CI](https://github.com/ultralytics/hub-sdk/actions/workflows/ci.yml/badge.svg)](https://github.com/ultralytics/hub-sdk/actions/workflows/ci.yml)                    |                                                                                                                                                                                          | [![Check Broken links](https://github.com/ultralytics/hub-sdk/actions/workflows/links.yml/badge.svg)](https://github.com/ultralytics/hub-sdk/actions/workflows/links.yml)                                                                                                                                                                         | [![CodeQL](https://github.com/ultralytics/hub-sdk/actions/workflows/codeql.yaml/badge.svg)](https://github.com/ultralytics/hub-sdk/actions/workflows/codeql.yaml)                                 | [![Publish to PyPI](https://github.com/ultralytics/hub-sdk/actions/workflows/publish.yml/badge.svg)](https://github.com/ultralytics/hub-sdk/actions/workflows/publish.yml)                                            |
-| [hub](https://github.com/ultralytics/hub)                 | [![HUB CI](https://github.com/ultralytics/hub/actions/workflows/ci.yaml/badge.svg)](https://github.com/ultralytics/hub/actions/workflows/ci.yaml)                              |                                                                                                                                                                                          | [![Check Broken links](https://github.com/ultralytics/hub/actions/workflows/links.yml/badge.svg)](https://github.com/ultralytics/hub/actions/workflows/links.yml)                                                                                                                                                                                 |                                                                                                                                                                                                   |                                                                                                                                                                                                                       |
-| [mkdocs](https://github.com/ultralytics/mkdocs)           | [![Ultralytics Actions](https://github.com/ultralytics/mkdocs/actions/workflows/format.yml/badge.svg)](https://github.com/ultralytics/mkdocs/actions/workflows/format.yml)     |                                                                                                                                                                                          |                                                                                                                                                                                                                                                                                                                                                   | [![CodeQL](https://github.com/ultralytics/mkdocs/actions/workflows/github-code-scanning/codeql/badge.svg)](https://github.com/ultralytics/mkdocs/actions/workflows/github-code-scanning/codeql)   | [![Publish to PyPI](https://github.com/ultralytics/mkdocs/actions/workflows/publish.yml/badge.svg)](https://github.com/ultralytics/mkdocs/actions/workflows/publish.yml)                                              |
-| [thop](https://github.com/ultralytics/thop)               | [![Ultralytics Actions](https://github.com/ultralytics/thop/actions/workflows/format.yml/badge.svg)](https://github.com/ultralytics/thop/actions/workflows/format.yml)         |                                                                                                                                                                                          |                                                                                                                                                                                                                                                                                                                                                   | [![CodeQL](https://github.com/ultralytics/thop/actions/workflows/github-code-scanning/codeql/badge.svg)](https://github.com/ultralytics/thop/actions/workflows/github-code-scanning/codeql)       | [![Publish to PyPI](https://github.com/ultralytics/thop/actions/workflows/publish.yml/badge.svg)](https://github.com/ultralytics/mkdocs/actions/workflows/publish.yml)                                                |
-| [actions](https://github.com/ultralytics/mkdocs)          | [![Ultralytics Actions](https://github.com/ultralytics/actions/actions/workflows/format.yml/badge.svg)](https://github.com/ultralytics/actions/actions/workflows/format.yml)   |                                                                                                                                                                                          |                                                                                                                                                                                                                                                                                                                                                   | [![CodeQL](https://github.com/ultralytics/actions/actions/workflows/github-code-scanning/codeql/badge.svg)](https://github.com/ultralytics/actions/actions/workflows/github-code-scanning/codeql) | [![Publish to PyPI](https://github.com/ultralytics/actions/actions/workflows/publish.yml/badge.svg)](https://github.com/ultralytics/actions/actions/workflows/publish.yml)                                            |
-| [docs](https://github.com/ultralytics/docs)               | [![Ultralytics Actions](https://github.com/ultralytics/docs/actions/workflows/format.yml/badge.svg)](https://github.com/ultralytics/docs/actions/workflows/format.yml)         |                                                                                                                                                                                          | [![Check Broken links](https://github.com/ultralytics/docs/actions/workflows/links.yml/badge.svg)](https://github.com/ultralytics/docs/actions/workflows/links.yml)[![Check Domains](https://github.com/ultralytics/docs/actions/workflows/check_domains.yml/badge.svg)](https://github.com/ultralytics/docs/actions/workflows/check_domains.yml) |                                                                                                                                                                                                   | [![pages-build-deployment](https://github.com/ultralytics/docs/actions/workflows/pages/pages-build-deployment/badge.svg)](https://github.com/ultralytics/docs/actions/workflows/pages/pages-build-deployment)         |
-| [handbook](https://github.com/ultralytics/handbook)       | [![Ultralytics Actions](https://github.com/ultralytics/handbook/actions/workflows/format.yml/badge.svg)](https://github.com/ultralytics/handbook/actions/workflows/format.yml) |                                                                                                                                                                                          | [![Check Broken links](https://github.com/ultralytics/handbook/actions/workflows/links.yml/badge.svg)](https://github.com/ultralytics/handbook/actions/workflows/links.yml)                                                                                                                                                                       |                                                                                                                                                                                                   | [![pages-build-deployment](https://github.com/ultralytics/handbook/actions/workflows/pages/pages-build-deployment/badge.svg)](https://github.com/ultralytics/handbook/actions/workflows/pages/pages-build-deployment) |
+| Repository                                                | CI                                                                                                                                                                             | Docker Deployment                                                                                                                                                                        | Broken Links                                                                                                                                                                                                                                                                                                                                      | CodeQL                                                                                                                                                                                                    | PyPI and Docs Publishing                                                                                                                                                                                              |
+| --------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| [yolov3](https://github.com/ultralytics/yolov3)           | [![YOLOv3 CI](https://github.com/ultralytics/yolov3/actions/workflows/ci-testing.yml/badge.svg)](https://github.com/ultralytics/yolov3/actions/workflows/ci-testing.yml)       | [![Publish Docker Images](https://github.com/ultralytics/yolov3/actions/workflows/docker.yml/badge.svg)](https://github.com/ultralytics/yolov3/actions/workflows/docker.yml)             | [![Check Broken links](https://github.com/ultralytics/yolov3/actions/workflows/links.yml/badge.svg)](https://github.com/ultralytics/yolov3/actions/workflows/links.yml)                                                                                                                                                                           | [![CodeQL](https://github.com/ultralytics/yolov3/actions/workflows/github-code-scanning/codeql/badge.svg)](https://github.com/ultralytics/yolov3/actions/workflows/github-code-scanning/codeql)           |                                                                                                                                                                                                                       |
+| [yolov5](https://github.com/ultralytics/yolov5)           | [![YOLOv5 CI](https://github.com/ultralytics/yolov5/actions/workflows/ci-testing.yml/badge.svg)](https://github.com/ultralytics/yolov5/actions/workflows/ci-testing.yml)       | [![Publish Docker Images](https://github.com/ultralytics/yolov5/actions/workflows/docker.yml/badge.svg)](https://github.com/ultralytics/yolov5/actions/workflows/docker.yml)             | [![Check Broken links](https://github.com/ultralytics/yolov5/actions/workflows/links.yml/badge.svg)](https://github.com/ultralytics/yolov5/actions/workflows/links.yml)                                                                                                                                                                           | [![CodeQL](https://github.com/ultralytics/yolov5/actions/workflows/github-code-scanning/codeql/badge.svg)](https://github.com/ultralytics/yolov5/actions/workflows/github-code-scanning/codeql)           |                                                                                                                                                                                                                       |
+| [ultralytics](https://github.com/ultralytics/ultralytics) | [![ultralytics CI](https://github.com/ultralytics/ultralytics/actions/workflows/ci.yaml/badge.svg)](https://github.com/ultralytics/ultralytics/actions/workflows/ci.yaml)      | [![Publish Docker Images](https://github.com/ultralytics/ultralytics/actions/workflows/docker.yaml/badge.svg)](https://github.com/ultralytics/ultralytics/actions/workflows/docker.yaml) | [![Check Broken links](https://github.com/ultralytics/ultralytics/actions/workflows/links.yml/badge.svg)](https://github.com/ultralytics/ultralytics/actions/workflows/links.yml)                                                                                                                                                                 | [![CodeQL](https://github.com/ultralytics/ultralytics/actions/workflows/github-code-scanning/codeql/badge.svg)](https://github.com/ultralytics/ultralytics/actions/workflows/github-code-scanning/codeql) | [![Publish to PyPI and Deploy Docs](https://github.com/ultralytics/ultralytics/actions/workflows/publish.yml/badge.svg)](https://github.com/ultralytics/ultralytics/actions/workflows/publish.yml)                    |
+| [hub-sdk](https://github.com/ultralytics/hub-sdk)         | [![HUB-SDK CI](https://github.com/ultralytics/hub-sdk/actions/workflows/ci.yml/badge.svg)](https://github.com/ultralytics/hub-sdk/actions/workflows/ci.yml)                    |                                                                                                                                                                                          | [![Check Broken links](https://github.com/ultralytics/hub-sdk/actions/workflows/links.yml/badge.svg)](https://github.com/ultralytics/hub-sdk/actions/workflows/links.yml)                                                                                                                                                                         | [![CodeQL](https://github.com/ultralytics/hub-sdk/actions/workflows/github-code-scanning/codeql/badge.svg)](https://github.com/ultralytics/hub-sdk/actions/workflows/github-code-scanning/codeql)         | [![Publish to PyPI](https://github.com/ultralytics/hub-sdk/actions/workflows/publish.yml/badge.svg)](https://github.com/ultralytics/hub-sdk/actions/workflows/publish.yml)                                            |
+| [hub](https://github.com/ultralytics/hub)                 | [![HUB CI](https://github.com/ultralytics/hub/actions/workflows/ci.yaml/badge.svg)](https://github.com/ultralytics/hub/actions/workflows/ci.yaml)                              |                                                                                                                                                                                          | [![Check Broken links](https://github.com/ultralytics/hub/actions/workflows/links.yml/badge.svg)](https://github.com/ultralytics/hub/actions/workflows/links.yml)                                                                                                                                                                                 |                                                                                                                                                                                                           |                                                                                                                                                                                                                       |
+| [mkdocs](https://github.com/ultralytics/mkdocs)           | [![Ultralytics Actions](https://github.com/ultralytics/mkdocs/actions/workflows/format.yml/badge.svg)](https://github.com/ultralytics/mkdocs/actions/workflows/format.yml)     |                                                                                                                                                                                          |                                                                                                                                                                                                                                                                                                                                                   | [![CodeQL](https://github.com/ultralytics/mkdocs/actions/workflows/github-code-scanning/codeql/badge.svg)](https://github.com/ultralytics/mkdocs/actions/workflows/github-code-scanning/codeql)           | [![Publish to PyPI](https://github.com/ultralytics/mkdocs/actions/workflows/publish.yml/badge.svg)](https://github.com/ultralytics/mkdocs/actions/workflows/publish.yml)                                              |
+| [thop](https://github.com/ultralytics/thop)               | [![Ultralytics Actions](https://github.com/ultralytics/thop/actions/workflows/format.yml/badge.svg)](https://github.com/ultralytics/thop/actions/workflows/format.yml)         |                                                                                                                                                                                          |                                                                                                                                                                                                                                                                                                                                                   | [![CodeQL](https://github.com/ultralytics/thop/actions/workflows/github-code-scanning/codeql/badge.svg)](https://github.com/ultralytics/thop/actions/workflows/github-code-scanning/codeql)               | [![Publish to PyPI](https://github.com/ultralytics/thop/actions/workflows/publish.yml/badge.svg)](https://github.com/ultralytics/mkdocs/actions/workflows/publish.yml)                                                |
+| [actions](https://github.com/ultralytics/mkdocs)          | [![Ultralytics Actions](https://github.com/ultralytics/actions/actions/workflows/format.yml/badge.svg)](https://github.com/ultralytics/actions/actions/workflows/format.yml)   |                                                                                                                                                                                          |                                                                                                                                                                                                                                                                                                                                                   | [![CodeQL](https://github.com/ultralytics/actions/actions/workflows/github-code-scanning/codeql/badge.svg)](https://github.com/ultralytics/actions/actions/workflows/github-code-scanning/codeql)         | [![Publish to PyPI](https://github.com/ultralytics/actions/actions/workflows/publish.yml/badge.svg)](https://github.com/ultralytics/actions/actions/workflows/publish.yml)                                            |
+| [docs](https://github.com/ultralytics/docs)               | [![Ultralytics Actions](https://github.com/ultralytics/docs/actions/workflows/format.yml/badge.svg)](https://github.com/ultralytics/docs/actions/workflows/format.yml)         |                                                                                                                                                                                          | [![Check Broken links](https://github.com/ultralytics/docs/actions/workflows/links.yml/badge.svg)](https://github.com/ultralytics/docs/actions/workflows/links.yml)[![Check Domains](https://github.com/ultralytics/docs/actions/workflows/check_domains.yml/badge.svg)](https://github.com/ultralytics/docs/actions/workflows/check_domains.yml) |                                                                                                                                                                                                           | [![pages-build-deployment](https://github.com/ultralytics/docs/actions/workflows/pages/pages-build-deployment/badge.svg)](https://github.com/ultralytics/docs/actions/workflows/pages/pages-build-deployment)         |
+| [handbook](https://github.com/ultralytics/handbook)       | [![Ultralytics Actions](https://github.com/ultralytics/handbook/actions/workflows/format.yml/badge.svg)](https://github.com/ultralytics/handbook/actions/workflows/format.yml) |                                                                                                                                                                                          | [![Check Broken links](https://github.com/ultralytics/handbook/actions/workflows/links.yml/badge.svg)](https://github.com/ultralytics/handbook/actions/workflows/links.yml)                                                                                                                                                                       |                                                                                                                                                                                                           | [![pages-build-deployment](https://github.com/ultralytics/handbook/actions/workflows/pages/pages-build-deployment/badge.svg)](https://github.com/ultralytics/handbook/actions/workflows/pages/pages-build-deployment) |
 
 Each badge shows the status of the last run of the corresponding CI test on the `main` branch of the respective repository. If a test fails, the badge will display a "failing" status, and if it passes, it will display a "passing" status.
 
diff --git a/docs/en/help/privacy.md b/docs/en/help/privacy.md
index 567a72aea..fc669286d 100644
--- a/docs/en/help/privacy.md
+++ b/docs/en/help/privacy.md
@@ -153,7 +153,8 @@ Ultralytics collects three primary types of data using Google Analytics:
 - **Usage Metrics**: These include how often and in what ways the YOLO Python package is used, preferred features, and typical command-line arguments.
 - **System Information**: General non-identifiable information about the computing environments where the package is run.
 - **Performance Data**: Metrics related to the performance of models during training, validation, and inference.
-    This data helps us enhance user experience and optimize software performance. Learn more in the [Anonymized Google Analytics](#anonymized-google-analytics) section.
+
+This data helps us enhance user experience and optimize software performance. Learn more in the [Anonymized Google Analytics](#anonymized-google-analytics) section.
 
 ### How can I disable data collection in the Ultralytics YOLO package?
 
diff --git a/docs/en/help/security.md b/docs/en/help/security.md
index 39fe3829f..73d5e99c4 100644
--- a/docs/en/help/security.md
+++ b/docs/en/help/security.md
@@ -17,7 +17,7 @@ We utilize [Snyk](https://snyk.io/advisor/python/ultralytics) to conduct compreh
 
 Our security strategy includes GitHub's [CodeQL](https://docs.github.com/en/code-security/code-scanning/introduction-to-code-scanning/about-code-scanning-with-codeql) scanning. CodeQL delves deep into our codebase, identifying complex vulnerabilities like SQL injection and XSS by analyzing the code's semantic structure. This advanced level of analysis ensures early detection and resolution of potential security risks.
 
-[![CodeQL](https://github.com/ultralytics/ultralytics/actions/workflows/codeql.yaml/badge.svg)](https://github.com/ultralytics/ultralytics/actions/workflows/codeql.yaml)
+[![CodeQL](https://github.com/ultralytics/ultralytics/actions/workflows/github-code-scanning/codeql/badge.svg)](https://github.com/ultralytics/ultralytics/actions/workflows/github-code-scanning/codeql)
 
 ## GitHub Dependabot Alerts
 
diff --git a/docs/en/hub/models.md b/docs/en/hub/models.md
index db098669a..45cc7881e 100644
--- a/docs/en/hub/models.md
+++ b/docs/en/hub/models.md
@@ -1,7 +1,7 @@
 ---
 comments: true
-description: Explore Ultralytics HUB for easy training, analysis, preview, deployment and sharing of custom vision AI models using YOLOv8. Start training today!.
-keywords: Ultralytics HUB, YOLOv8, custom AI models, model training, model deployment, model analysis, vision AI
+description: Explore Ultralytics HUB for easy training, analysis, preview, deployment and sharing of custom vision AI models using YOLO11. Start training today!.
+keywords: Ultralytics HUB, YOLO11, custom AI models, model training, model deployment, model analysis, vision AI
 ---
 
 # Ultralytics HUB Models
@@ -66,7 +66,7 @@ In this step, you have to choose the project in which you want to create your mo
 
 !!! info
 
-    You can read more about the available [YOLOv8](https://docs.ultralytics.com/models/yolov8/) (and [YOLOv5](https://docs.ultralytics.com/models/yolov5/)) architectures in our documentation.
+    You can read more about the available [YOLO models](https://docs.ultralytics.com/models/) and architectures in our documentation.
 
 By default, your model will use a pre-trained model (trained on the [COCO](https://docs.ultralytics.com/datasets/detect/coco/) dataset) to reduce training time. You can change this behavior and tweak your model's configuration by opening the **Advanced Model Configuration** accordion.
 
diff --git a/docs/en/index.md b/docs/en/index.md
index ef1245f89..1d52a315e 100644
--- a/docs/en/index.md
+++ b/docs/en/index.md
@@ -20,7 +20,7 @@ keywords: Ultralytics, YOLO, YOLO11, object detection, image segmentation, deep
 <br>
 <br>
     <a href="https://github.com/ultralytics/ultralytics/actions/workflows/ci.yaml"><img src="https://github.com/ultralytics/ultralytics/actions/workflows/ci.yaml/badge.svg" alt="Ultralytics CI"></a>
-    <a href="https://pepy.tech/project/ultralytics"><img src="https://static.pepy.tech/badge/ultralytics" alt="Ultralytics Downloads"></a>
+    <a href="https://pepy.tech/projects/ultralytics"><img src="https://static.pepy.tech/badge/ultralytics" alt="Ultralytics Downloads"></a>
     <a href="https://zenodo.org/badge/latestdoi/264818686"><img src="https://zenodo.org/badge/264818686.svg" alt="Ultralytics YOLO Citation"></a>
     <a href="https://discord.com/invite/ultralytics"><img alt="Ultralytics Discord" src="https://img.shields.io/discord/1089800235347353640?logo=discord&logoColor=white&label=Discord&color=blue"></a>
     <a href="https://community.ultralytics.com/"><img alt="Ultralytics Forums" src="https://img.shields.io/discourse/users?server=https%3A%2F%2Fcommunity.ultralytics.com&logo=discourse&label=Forums&color=blue"></a>
diff --git a/docs/en/integrations/albumentations.md b/docs/en/integrations/albumentations.md
index e7b0d02c6..fe7b081c2 100644
--- a/docs/en/integrations/albumentations.md
+++ b/docs/en/integrations/albumentations.md
@@ -158,3 +158,42 @@ If you are interested in learning more about Albumentations, check out the follo
 In this guide, we explored the key aspects of Albumentations, a great Python library for image augmentation. We discussed its wide range of transformations, optimized performance, and how you can use it in your next YOLO11 project.
 
 Also, if you'd like to know more about other Ultralytics YOLO11 integrations, visit our [integration guide page](../integrations/index.md). You'll find valuable resources and insights there.
+
+## FAQ
+
+### How can I integrate Albumentations with YOLO11 for improved data augmentation?
+
+Albumentations integrates seamlessly with YOLO11 and applies automatically during training if you have the package installed. Here's how to get started:
+
+```python
+# Install required packages
+# !pip install albumentations ultralytics
+from ultralytics import YOLO
+
+# Load and train model with automatic augmentations
+model = YOLO("yolo11n.pt")
+model.train(data="coco8.yaml", epochs=100)
+```
+
+The integration includes optimized augmentations like blur, median blur, grayscale conversion, and CLAHE with carefully tuned probabilities to enhance model performance.
+
+### What are the key benefits of using Albumentations over other augmentation libraries?
+
+Albumentations stands out for several reasons:
+
+1. Performance: Built on OpenCV and NumPy with SIMD optimization for superior speed
+2. Flexibility: Supports 70+ transformations across pixel-level, spatial-level, and mixing-level augmentations
+3. Compatibility: Works seamlessly with popular frameworks like [PyTorch](../integrations/torchscript.md) and [TensorFlow](../integrations/tensorboard.md)
+4. Reliability: Extensive test suite prevents silent data corruption
+5. Ease of use: Single unified API for all augmentation types
+
+### What types of computer vision tasks can benefit from Albumentations augmentation?
+
+Albumentations enhances various [computer vision tasks](../tasks/index.md) including:
+
+- [Object Detection](../tasks/detect.md): Improves model robustness to lighting, scale, and orientation variations
+- [Instance Segmentation](../tasks/segment.md): Enhances mask prediction accuracy through diverse transformations
+- [Classification](../tasks/classify.md): Increases model generalization with color and geometric augmentations
+- [Pose Estimation](../tasks/pose.md): Helps models adapt to different viewpoints and lighting conditions
+
+The library's diverse augmentation options make it valuable for any vision task requiring robust model performance.
diff --git a/docs/en/integrations/index.md b/docs/en/integrations/index.md
index 05af43993..8ed822bda 100644
--- a/docs/en/integrations/index.md
+++ b/docs/en/integrations/index.md
@@ -61,6 +61,8 @@ Welcome to the Ultralytics Integrations page! This page provides an overview of
 
 - [Albumentations](albumentations.md): Enhance your Ultralytics models with powerful image augmentations to improve model robustness and generalization.
 
+- [SONY IMX500](sony-imx500.md): Optimize and deploy [Ultralytics YOLOv8](https://docs.ultralytics.com/models/yolov8/) models on Raspberry Pi AI Cameras with the IMX500 sensor for fast, low-power performance.
+
 ## Deployment Integrations
 
 - [CoreML](coreml.md): CoreML, developed by [Apple](https://www.apple.com/), is a framework designed for efficiently integrating machine learning models into applications across iOS, macOS, watchOS, and tvOS, using Apple's hardware for effective and secure [model deployment](https://www.ultralytics.com/glossary/model-deployment).
diff --git a/docs/en/integrations/kaggle.md b/docs/en/integrations/kaggle.md
index 920c5dbc8..cee6b847c 100644
--- a/docs/en/integrations/kaggle.md
+++ b/docs/en/integrations/kaggle.md
@@ -127,7 +127,8 @@ Kaggle offers unique features that make it an excellent choice:
 - **Free Access to TPUs**: Speed up training with powerful TPUs without extra costs.
 - **Comprehensive History**: Track changes over time with a detailed history of notebook commits.
 - **Resource Availability**: Significant resources are provided for each notebook session, including 12 hours of execution time for CPU and GPU sessions.
-    For a comparison with Google Colab, refer to our [Google Colab guide](./google-colab.md).
+
+For a comparison with Google Colab, refer to our [Google Colab guide](./google-colab.md).
 
 ### How can I revert to a previous version of my Kaggle notebook?
 
diff --git a/docs/en/integrations/ray-tune.md b/docs/en/integrations/ray-tune.md
index 86987f643..29eb3a517 100644
--- a/docs/en/integrations/ray-tune.md
+++ b/docs/en/integrations/ray-tune.md
@@ -106,6 +106,8 @@ In this example, we demonstrate how to use a custom search space for hyperparame
 !!! example "Usage"
 
     ```python
+    from ray import tune
+
     from ultralytics import YOLO
 
     # Define a YOLO model
diff --git a/docs/en/integrations/sony-imx500.md b/docs/en/integrations/sony-imx500.md
new file mode 100644
index 000000000..88338ebb6
--- /dev/null
+++ b/docs/en/integrations/sony-imx500.md
@@ -0,0 +1,325 @@
+---
+comments: true
+description: Learn to export Ultralytics YOLOv8 models to Sony's IMX500 format to optimize your models for efficient deployment.
+keywords: Sony, IMX500, IMX 500, Atrios, MCT, model export, quantization, pruning, deep learning optimization, Raspberry Pi AI Camera, edge AI, PyTorch, IMX
+---
+
+# Sony IMX500 Export for Ultralytics YOLOv8
+
+This guide covers exporting and deploying Ultralytics YOLOv8 models to Raspberry Pi AI Cameras that feature the Sony IMX500 sensor.
+
+Deploying computer vision models on devices with limited computational power, such as [Raspberry Pi AI Camera](https://www.raspberrypi.com/products/ai-camera/), can be tricky. Using a model format optimized for faster performance makes a huge difference.
+
+The IMX500 model format is designed to use minimal power while delivering fast performance for neural networks. It allows you to optimize your [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics) models for high-speed and low-power inferencing. In this guide, we'll walk you through exporting and deploying your models to the IMX500 format while making it easier for your models to perform well on the [Raspberry Pi AI Camera](https://www.raspberrypi.com/products/ai-camera/).
+
+<p align="center">
+  <img width="100%" src="https://github.com/ultralytics/assets/releases/download/v8.3.0/ai-camera.avif" alt="Raspberry Pi AI Camera">
+</p>
+
+## Why Should You Export to IMX500
+
+Sony's [IMX500 Intelligent Vision Sensor](https://developer.aitrios.sony-semicon.com/en/raspberrypi-ai-camera) is a game-changing piece of hardware in edge AI processing. It's the world's first intelligent vision sensor with on-chip AI capabilities. This sensor helps overcome many challenges in edge AI, including data processing bottlenecks, privacy concerns, and performance limitations.  
+While other sensors merely pass along images and frames, the IMX500 tells a whole story. It processes data directly on the sensor, allowing devices to generate insights in real-time.
+
+## Sony's IMX500 Export for YOLOv8 Models
+
+The IMX500 is designed to transform how devices handle data directly on the sensor, without needing to send it off to the cloud for processing.
+
+The IMX500 works with quantized models. Quantization makes models smaller and faster without losing much [accuracy](https://www.ultralytics.com/glossary/accuracy). It is ideal for the limited resources of edge computing, allowing applications to respond quickly by reducing latency and allowing for quick data processing locally, without cloud dependency. Local processing also keeps user data private and secure since it's not sent to a remote server.
+
+**IMX500 Key Features:**
+
+- **Metadata Output:** Instead of transmitting images only, the IMX500 can output both image and metadata (inference result), and can output metadata only for minimizing data size, reducing bandwidth, and lowering costs.
+- **Addresses Privacy Concerns:** By processing data on the device, the IMX500 addresses privacy concerns, ideal for human-centric applications like person counting and occupancy tracking.
+- **Real-time Processing:** Fast, on-sensor processing supports real-time decisions, perfect for edge AI applications such as autonomous systems.
+
+**Before You Begin:** For best results, ensure your YOLOv8 model is well-prepared for export by following our [Model Training Guide](https://docs.ultralytics.com/modes/train/), [Data Preparation Guide](https://docs.ultralytics.com/datasets/), and [Hyperparameter Tuning Guide](https://docs.ultralytics.com/guides/hyperparameter-tuning/).
+
+## Usage Examples
+
+Export an Ultralytics YOLOv8 model to IMX500 format and run inference with the exported model.
+
+!!! note
+
+    Here we perform inference just to make sure the model works as expected. However, for deployment and inference on the Raspberry Pi AI Camera, please jump to [Using IMX500 Export in Deployment](#using-imx500-export-in-deployment) section.
+
+!!! example
+
+    === "Python"
+
+         ```python
+         from ultralytics import YOLO
+
+         # Load a YOLOv8n PyTorch model
+         model = YOLO("yolov8n.pt")
+
+         # Export the model
+         model.export(format="imx")  # exports with PTQ quantization by default
+
+         # Load the exported model
+         imx_model = YOLO("yolov8n_imx_model")
+
+         # Run inference
+         results = imx_model("https://ultralytics.com/images/bus.jpg")
+         ```
+
+    === "CLI"
+
+         ```bash
+         # Export a YOLOv8n PyTorch model to imx format with Post-Training Quantization (PTQ)
+         yolo export model=yolov8n.pt format=imx
+
+         # Run inference with the exported model
+         yolo predict model=yolov8n_imx_model source='https://ultralytics.com/images/bus.jpg'
+         ```
+
+The export process will create an ONNX model for quantization validation, along with a directory named `<model-name>_imx_model`. This directory will include the `packerOut.zip` file, which is essential for packaging the model for deployment on the IMX500 hardware. Additionally, the `<model-name>_imx_model` folder will contain a text file (`labels.txt`) listing all the labels associated with the model.
+
+```bash
+yolov8n_imx_model
+├── dnnParams.xml
+├── labels.txt
+├── packerOut.zip
+├── yolov8n_imx.onnx
+├── yolov8n_imx500_model_MemoryReport.json
+└── yolov8n_imx500_model.pbtxt
+```
+
+## Arguments
+
+When exporting a model to IMX500 format, you can specify various arguments:
+
+| Key      | Value  | Description                                              |
+| -------- | ------ | -------------------------------------------------------- |
+| `format` | `imx`  | Format to export to (imx)                                |
+| `int8`   | `True` | Enable INT8 quantization for the model (default: `True`) |
+| `imgsz`  | `640`  | Image size for the model input (default: `640`)          |
+
+## Using IMX500 Export in Deployment
+
+After exporting Ultralytics YOLOv8n model to IMX500 format, it can be deployed to Raspberry Pi AI Camera for inference.
+
+### Hardware Prerequisites
+
+Make sure you have the below hardware:
+
+1. Raspberry Pi 5 or Raspberry Pi 4 Model B
+2. Raspberry Pi AI Camera
+
+Connect the Raspberry Pi AI camera to the 15-pin MIPI CSI connector on the Raspberry Pi and power on the Raspberry Pi
+
+### Software Prerequisites
+
+!!! note
+
+    This guide has been tested with Raspberry Pi OS Bookworm running on a Raspberry Pi 5
+
+Step 1: Open a terminal window and execute the following commands to update the Raspberry Pi software to the latest version.
+
+```bash
+sudo apt update && sudo apt full-upgrade
+```
+
+Step 2: Install IMX500 firmware which is required to operate the IMX500 sensor along with a packager tool.
+
+```bash
+sudo apt install imx500-all imx500-tools
+```
+
+Step 3: Install prerequisites to run `picamera2` application. We will use this application later for the deployment process.
+
+```bash
+sudo apt install python3-opencv python3-munkres
+```
+
+Step 4: Reboot Raspberry Pi for the changes to take into effect
+
+```bash
+sudo reboot
+```
+
+### Package Model and Deploy to AI Camera
+
+After obtaining `packerOut.zip` from the IMX500 conversion process, you can pass this file into the packager tool to obtain an RPK file. This file can then be deployed directly to the AI Camera using `picamera2`.
+
+Step 1: Package the model into RPK file
+
+```bash
+imx500-package -i <path to packerOut.zip> -o <output folder>
+```
+
+The above will generate a `network.rpk` file inside the specified output folder.
+
+Step 2: Clone `picamera2` repository, install it and navigate to the imx500 examples
+
+```bash
+git clone -b next https://github.com/raspberrypi/picamera2
+cd picamera2
+pip install -e .  --break-system-packages
+cd examples/imx500
+```
+
+Step 3: Run YOLOv8 object detection, using the labels.txt file that has been generated during the IMX500 export.
+
+```bash
+python imx500_object_detection_demo.py --model <path to network.rpk> --fps 25 --bbox-normalization --ignore-dash-labels --bbox-order xy –labels <path to labels.txt>
+```
+
+Then you will be able to see live inference output as follows
+
+<p align="center">
+  <img width="100%" src="https://github.com/ultralytics/assets/releases/download/v8.3.0/imx500-inference-rpi.avif" alt="Inference on Raspberry Pi AI Camera">
+</p>
+
+## Benchmarks
+
+YOLOv8 benchmarks below were run by the Ultralytics team on Raspberry Pi AI Camera with `imx` model format measuring speed and accuracy.
+
+| Model   | Format | Status | Size (MB) | mAP50-95(B) | Inference time (ms/im) |
+| ------- | ------ | ------ | --------- | ----------- | ---------------------- |
+| YOLOv8n | imx    | ✅     | 2.9       | 0.522       | 66.66                  |
+
+!!! note
+
+    Validation for the above benchmark was done using coco8 dataset
+
+## What's Under the Hood?
+
+<p align="center">
+  <img width="640" src="https://github.com/ultralytics/assets/releases/download/v8.3.0/imx500-deploy.avif" alt="IMX500 deployment">
+</p>
+
+### Sony Model Compression Toolkit (MCT)
+
+[Sony's Model Compression Toolkit (MCT)](https://github.com/sony/model_optimization) is a powerful tool for optimizing deep learning models through quantization and pruning. It supports various quantization methods and provides advanced algorithms to reduce model size and computational complexity without significantly sacrificing accuracy. MCT is particularly useful for deploying models on resource-constrained devices, ensuring efficient inference and reduced latency.
+
+### Supported Features of MCT
+
+Sony's MCT offers a range of features designed to optimize neural network models:
+
+1. **Graph Optimizations**: Transforms models into more efficient versions by folding layers like batch normalization into preceding layers.
+2. **Quantization Parameter Search**: Minimizes quantization noise using metrics like Mean-Square-Error, No-Clipping, and Mean-Average-Error.
+3. **Advanced Quantization Algorithms**:
+    - **Shift Negative Correction**: Addresses performance issues from symmetric activation quantization.
+    - **Outliers Filtering**: Uses z-score to detect and remove outliers.
+    - **Clustering**: Utilizes non-uniform quantization grids for better distribution matching.
+    - **Mixed-Precision Search**: Assigns different quantization bit-widths per layer based on sensitivity.
+4. **Visualization**: Use TensorBoard to observe model performance insights, quantization phases, and bit-width configurations.
+
+#### Quantization
+
+MCT supports several quantization methods to reduce model size and improve inference speed:
+
+1. **Post-Training Quantization (PTQ)**:
+    - Available via Keras and PyTorch APIs.
+    - Complexity: Low
+    - Computational Cost: Low (CPU minutes)
+2. **Gradient-based Post-Training Quantization (GPTQ)**:
+    - Available via Keras and PyTorch APIs.
+    - Complexity: Medium
+    - Computational Cost: Moderate (2-3 GPU hours)
+3. **Quantization-Aware Training (QAT)**:
+    - Complexity: High
+    - Computational Cost: High (12-36 GPU hours)
+
+MCT also supports various quantization schemes for weights and activations:
+
+1. Power-of-Two (hardware-friendly)
+2. Symmetric
+3. Uniform
+
+#### Structured Pruning
+
+MCT introduces structured, hardware-aware model pruning designed for specific hardware architectures. This technique leverages the target platform's Single Instruction, Multiple Data (SIMD) capabilities by pruning SIMD groups. This reduces model size and complexity while optimizing channel utilization, aligned with the SIMD architecture for targeted resource utilization of weights memory footprint. Available via Keras and PyTorch APIs.
+
+### IMX500 Converter Tool (Compiler)
+
+The IMX500 Converter Tool is integral to the IMX500 toolset, allowing the compilation of models for deployment on Sony's IMX500 sensor (for instance, Raspberry Pi AI Cameras). This tool facilitates the transition of Ultralytics YOLOv8 models processed through Ultralytics software, ensuring they are compatible and perform efficiently on the specified hardware. The export procedure following model quantization involves the generation of binary files that encapsulate essential data and device-specific configurations, streamlining the deployment process on the Raspberry Pi AI Camera.
+
+## Real-World Use Cases
+
+Export to IMX500 format has wide applicability across industries. Here are some examples:
+
+- **Edge AI and IoT**: Enable object detection on drones or security cameras, where real-time processing on low-power devices is essential.
+- **Wearable Devices**: Deploy models optimized for small-scale AI processing on health-monitoring wearables.
+- **Smart Cities**: Use IMX500-exported YOLOv8 models for traffic monitoring and safety analysis with faster processing and minimal latency.
+- **Retail Analytics**: Enhance in-store monitoring by deploying optimized models in point-of-sale systems or smart shelves.
+
+## Conclusion
+
+Exporting Ultralytics YOLOv8 models to Sony's IMX500 format allows you to deploy your models for efficient inference on IMX500-based cameras. By leveraging advanced quantization techniques, you can reduce model size and improve inference speed without significantly compromising accuracy.
+
+For more information and detailed guidelines, refer to Sony's [IMX500 website](https://developer.aitrios.sony-semicon.com/en/raspberrypi-ai-camera).
+
+## FAQ
+
+### How do I export a YOLOv8 model to IMX500 format for Raspberry Pi AI Camera?
+
+To export a YOLOv8 model to IMX500 format, use either the Python API or CLI command:
+
+```python
+from ultralytics import YOLO
+
+model = YOLO("yolov8n.pt")
+model.export(format="imx")  # Exports with PTQ quantization by default
+```
+
+The export process will create a directory containing the necessary files for deployment, including `packerOut.zip` which can be used with the IMX500 packager tool on Raspberry Pi.
+
+### What are the key benefits of using the IMX500 format for edge AI deployment?
+
+The IMX500 format offers several important advantages for edge deployment:
+
+- On-chip AI processing reduces latency and power consumption
+- Outputs both image and metadata (inference result) instead of images only
+- Enhanced privacy by processing data locally without cloud dependency
+- Real-time processing capabilities ideal for time-sensitive applications
+- Optimized quantization for efficient model deployment on resource-constrained devices
+
+### What hardware and software prerequisites are needed for IMX500 deployment?
+
+For deploying IMX500 models, you'll need:
+
+Hardware:
+
+- Raspberry Pi 5 or Raspberry Pi 4 Model B
+- Raspberry Pi AI Camera with IMX500 sensor
+
+Software:
+
+- Raspberry Pi OS Bookworm
+- IMX500 firmware and tools (`sudo apt install imx500-all imx500-tools`)
+- Python packages for `picamera2` (`sudo apt install python3-opencv python3-munkres`)
+
+### What performance can I expect from YOLOv8 models on the IMX500?
+
+Based on Ultralytics benchmarks on Raspberry Pi AI Camera:
+
+- YOLOv8n achieves 66.66ms inference time per image
+- mAP50-95 of 0.522 on COCO8 dataset
+- Model size of only 2.9MB after quantization
+
+This demonstrates that IMX500 format provides efficient real-time inference while maintaining good accuracy for edge AI applications.
+
+### How do I package and deploy my exported model to the Raspberry Pi AI Camera?
+
+After exporting to IMX500 format:
+
+1. Use the packager tool to create an RPK file:
+
+    ```bash
+    imx500-package -i <path to packerOut.zip> -o <output folder>
+    ```
+
+2. Clone and install picamera2:
+
+    ```bash
+    git clone -b next https://github.com/raspberrypi/picamera2
+    cd picamera2 && pip install -e . --break-system-packages
+    ```
+
+3. Run inference using the generated RPK file:
+
+    ```bash
+    python imx500_object_detection_demo.py --model <path to network.rpk> --fps 25 --bbox-normalization --labels <path to labels.txt>
+    ```
diff --git a/docs/en/integrations/tensorrt.md b/docs/en/integrations/tensorrt.md
index 1a8e5a916..ec1cfc3c9 100644
--- a/docs/en/integrations/tensorrt.md
+++ b/docs/en/integrations/tensorrt.md
@@ -127,11 +127,11 @@ The arguments provided when using [export](../modes/export.md) for an Ultralytic
 
     - Adjust the `workspace` value according to your calibration needs and resource availability. While a larger `workspace` may increase calibration time, it allows TensorRT to explore a wider range of optimization tactics, potentially enhancing model performance and [accuracy](https://www.ultralytics.com/glossary/accuracy). Conversely, a smaller `workspace` can reduce calibration time but may limit the optimization strategies, affecting the quality of the quantized model.
 
-    - Default is `workspace=4` (GiB), this value may need to be increased if calibration crashes (exits without warning).
+    - Default is `workspace=None`, which will allow for TensorRT to automatically allocate memory, when configuring manually, this value may need to be increased if calibration crashes (exits without warning).
 
-    - TensorRT will report `UNSUPPORTED_STATE` during export if the value for `workspace` is larger than the memory available to the device, which means the value for `workspace` should be lowered.
+    - TensorRT will report `UNSUPPORTED_STATE` during export if the value for `workspace` is larger than the memory available to the device, which means the value for `workspace` should be lowered or set to `None`.
 
-    - If `workspace` is set to max value and calibration fails/crashes, consider reducing the values for `imgsz` and `batch` to reduce memory requirements.
+    - If `workspace` is set to max value and calibration fails/crashes, consider using `None` for auto-allocation or by reducing the values for `imgsz` and `batch` to reduce memory requirements.
 
     - <u><b>Remember</b> calibration for INT8 is specific to each device</u>, borrowing a "high-end" GPU for calibration, might result in poor performance when inference is run on another device.
 
diff --git a/docs/en/macros/export-args.md b/docs/en/macros/export-args.md
index 242090d7c..803ce1490 100644
--- a/docs/en/macros/export-args.md
+++ b/docs/en/macros/export-args.md
@@ -1,15 +1,15 @@
-| Argument    | Type             | Default         | Description                                                                                                                                                                                   |
-| ----------- | ---------------- | --------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `format`    | `str`            | `'torchscript'` | Target format for the exported model, such as `'onnx'`, `'torchscript'`, `'tensorflow'`, or others, defining compatibility with various deployment environments.                              |
-| `imgsz`     | `int` or `tuple` | `640`           | Desired image size for the model input. Can be an integer for square images or a tuple `(height, width)` for specific dimensions.                                                             |
-| `keras`     | `bool`           | `False`         | Enables export to Keras format for [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) SavedModel, providing compatibility with TensorFlow serving and APIs.                        |
-| `optimize`  | `bool`           | `False`         | Applies optimization for mobile devices when exporting to TorchScript, potentially reducing model size and improving performance.                                                             |
-| `half`      | `bool`           | `False`         | Enables FP16 (half-precision) quantization, reducing model size and potentially speeding up inference on supported hardware.                                                                  |
-| `int8`      | `bool`           | `False`         | Activates INT8 quantization, further compressing the model and speeding up inference with minimal [accuracy](https://www.ultralytics.com/glossary/accuracy) loss, primarily for edge devices. |
-| `dynamic`   | `bool`           | `False`         | Allows dynamic input sizes for ONNX, TensorRT and OpenVINO exports, enhancing flexibility in handling varying image dimensions.                                                               |
-| `simplify`  | `bool`           | `True`          | Simplifies the model graph for ONNX exports with `onnxslim`, potentially improving performance and compatibility.                                                                             |
-| `opset`     | `int`            | `None`          | Specifies the ONNX opset version for compatibility with different ONNX parsers and runtimes. If not set, uses the latest supported version.                                                   |
-| `workspace` | `float`          | `4.0`           | Sets the maximum workspace size in GiB for TensorRT optimizations, balancing memory usage and performance.                                                                                    |
-| `nms`       | `bool`           | `False`         | Adds Non-Maximum Suppression (NMS) to the CoreML export, essential for accurate and efficient detection post-processing.                                                                      |
-| `batch`     | `int`            | `1`             | Specifies export model batch inference size or the max number of images the exported model will process concurrently in `predict` mode.                                                       |
-| `device`    | `str`            | `None`          | Specifies the device for exporting: GPU (`device=0`), CPU (`device=cpu`), MPS for Apple silicon (`device=mps`) or DLA for NVIDIA Jetson (`device=dla:0` or `device=dla:1`).                   |
+| Argument    | Type              | Default         | Description                                                                                                                                                                                   |
+| ----------- | ----------------- | --------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `format`    | `str`             | `'torchscript'` | Target format for the exported model, such as `'onnx'`, `'torchscript'`, `'tensorflow'`, or others, defining compatibility with various deployment environments.                              |
+| `imgsz`     | `int` or `tuple`  | `640`           | Desired image size for the model input. Can be an integer for square images or a tuple `(height, width)` for specific dimensions.                                                             |
+| `keras`     | `bool`            | `False`         | Enables export to Keras format for [TensorFlow](https://www.ultralytics.com/glossary/tensorflow) SavedModel, providing compatibility with TensorFlow serving and APIs.                        |
+| `optimize`  | `bool`            | `False`         | Applies optimization for mobile devices when exporting to TorchScript, potentially reducing model size and improving performance.                                                             |
+| `half`      | `bool`            | `False`         | Enables FP16 (half-precision) quantization, reducing model size and potentially speeding up inference on supported hardware.                                                                  |
+| `int8`      | `bool`            | `False`         | Activates INT8 quantization, further compressing the model and speeding up inference with minimal [accuracy](https://www.ultralytics.com/glossary/accuracy) loss, primarily for edge devices. |
+| `dynamic`   | `bool`            | `False`         | Allows dynamic input sizes for ONNX, TensorRT and OpenVINO exports, enhancing flexibility in handling varying image dimensions.                                                               |
+| `simplify`  | `bool`            | `True`          | Simplifies the model graph for ONNX exports with `onnxslim`, potentially improving performance and compatibility.                                                                             |
+| `opset`     | `int`             | `None`          | Specifies the ONNX opset version for compatibility with different ONNX parsers and runtimes. If not set, uses the latest supported version.                                                   |
+| `workspace` | `float` or `None` | `None`          | Sets the maximum workspace size in GiB for TensorRT optimizations, balancing memory usage and performance; use `None` for auto-allocation by TensorRT up to device maximum.                   |
+| `nms`       | `bool`            | `False`         | Adds Non-Maximum Suppression (NMS) to the CoreML export, essential for accurate and efficient detection post-processing.                                                                      |
+| `batch`     | `int`             | `1`             | Specifies export model batch inference size or the max number of images the exported model will process concurrently in `predict` mode.                                                       |
+| `device`    | `str`             | `None`          | Specifies the device for exporting: GPU (`device=0`), CPU (`device=cpu`), MPS for Apple silicon (`device=mps`) or DLA for NVIDIA Jetson (`device=dla:0` or `device=dla:1`).                   |
diff --git a/docs/en/macros/export-table.md b/docs/en/macros/export-table.md
index b7134f42b..4d026850c 100644
--- a/docs/en/macros/export-table.md
+++ b/docs/en/macros/export-table.md
@@ -14,3 +14,4 @@
 | [PaddlePaddle](../integrations/paddlepaddle.md)   | `paddle`          | `{{ model_name or "yolo11n" }}_paddle_model/`   | ✅       | `imgsz`, `batch`                                                     |
 | [MNN](../integrations/mnn.md)                     | `mnn`             | `{{ model_name or "yolo11n" }}.mnn`             | ✅       | `imgsz`, `batch`, `int8`, `half`                                     |
 | [NCNN](../integrations/ncnn.md)                   | `ncnn`            | `{{ model_name or "yolo11n" }}_ncnn_model/`     | ✅       | `imgsz`, `half`, `batch`                                             |
+| [IMX500](../integrations/sony-imx500.md)          | `imx`             | `{{ model_name or "yolov8n" }}_imx_model/`      | ✅       | `imgsz`, `int8`                                                      |
diff --git a/docs/en/macros/predict-args.md b/docs/en/macros/predict-args.md
index 091e692a6..b40650d49 100644
--- a/docs/en/macros/predict-args.md
+++ b/docs/en/macros/predict-args.md
@@ -13,7 +13,7 @@
 | `augment`       | `bool`         | `False`                | Enables test-time augmentation (TTA) for predictions, potentially improving detection robustness at the cost of inference speed.                                                                                                                                                                               |
 | `agnostic_nms`  | `bool`         | `False`                | Enables class-agnostic Non-Maximum Suppression (NMS), which merges overlapping boxes of different classes. Useful in multi-class detection scenarios where class overlap is common.                                                                                                                            |
 | `classes`       | `list[int]`    | `None`                 | Filters predictions to a set of class IDs. Only detections belonging to the specified classes will be returned. Useful for focusing on relevant objects in multi-class detection tasks.                                                                                                                        |
-| `retina_masks`  | `bool`         | `False`                | Uses high-resolution segmentation masks if available in the model. This can enhance mask quality for segmentation tasks, providing finer detail.                                                                                                                                                               |
+| `retina_masks`  | `bool`         | `False`                | Returns high-resolution segmentation masks. The returned masks (`masks.data`) will match the original image size if enabled. If disabled, they have the image size used during inference.                                                                                                                      |
 | `embed`         | `list[int]`    | `None`                 | Specifies the layers from which to extract feature vectors or [embeddings](https://www.ultralytics.com/glossary/embeddings). Useful for downstream tasks like clustering or similarity search.                                                                                                                 |
 | `project`       | `str`          | `None`                 | Name of the project directory where prediction outputs are saved if `save` is enabled.                                                                                                                                                                                                                         |
 | `name`          | `str`          | `None`                 | Name of the prediction run. Used for creating a subdirectory within the project folder, where prediction outputs are stored if `save` is enabled.                                                                                                                                                              |
diff --git a/docs/en/macros/train-args.md b/docs/en/macros/train-args.md
index ede32f910..924bd3134 100644
--- a/docs/en/macros/train-args.md
+++ b/docs/en/macros/train-args.md
@@ -17,7 +17,6 @@
 | `exist_ok`        | `False`  | If True, allows overwriting of an existing project/name directory. Useful for iterative experimentation without needing to manually clear previous outputs.                                                                                                  |
 | `pretrained`      | `True`   | Determines whether to start training from a pretrained model. Can be a boolean value or a string path to a specific model from which to load weights. Enhances training efficiency and model performance.                                                    |
 | `optimizer`       | `'auto'` | Choice of optimizer for training. Options include `SGD`, `Adam`, `AdamW`, `NAdam`, `RAdam`, `RMSProp` etc., or `auto` for automatic selection based on model configuration. Affects convergence speed and stability.                                         |
-| `verbose`         | `False`  | Enables verbose output during training, providing detailed logs and progress updates. Useful for debugging and closely monitoring the training process.                                                                                                      |
 | `seed`            | `0`      | Sets the random seed for training, ensuring reproducibility of results across runs with the same configurations.                                                                                                                                             |
 | `deterministic`   | `True`   | Forces deterministic algorithm use, ensuring reproducibility but may affect performance and speed due to the restriction on non-deterministic algorithms.                                                                                                    |
 | `single_cls`      | `False`  | Treats all classes in multi-class datasets as a single class during training. Useful for binary classification tasks or when focusing on object presence rather than classification.                                                                         |
@@ -41,7 +40,6 @@
 | `dfl`             | `1.5`    | Weight of the distribution focal loss, used in certain YOLO versions for fine-grained classification.                                                                                                                                                        |
 | `pose`            | `12.0`   | Weight of the pose loss in models trained for pose estimation, influencing the emphasis on accurately predicting pose keypoints.                                                                                                                             |
 | `kobj`            | `2.0`    | Weight of the keypoint objectness loss in pose estimation models, balancing detection confidence with pose accuracy.                                                                                                                                         |
-| `label_smoothing` | `0.0`    | Applies label smoothing, softening hard labels to a mix of the target label and a uniform distribution over labels, can improve generalization.                                                                                                              |
 | `nbs`             | `64`     | Nominal batch size for normalization of loss.                                                                                                                                                                                                                |
 | `overlap_mask`    | `True`   | Determines whether object masks should be merged into a single mask for training, or kept separate for each object. In case of overlap, the smaller mask is overlayed on top of the larger mask during merge.                                                |
 | `mask_ratio`      | `4`      | Downsample ratio for segmentation masks, affecting the resolution of masks used during training.                                                                                                                                                             |
diff --git a/docs/en/macros/validation-args.md b/docs/en/macros/validation-args.md
index 5eeea81f4..c28a8e473 100644
--- a/docs/en/macros/validation-args.md
+++ b/docs/en/macros/validation-args.md
@@ -12,7 +12,7 @@
 | `device`      | `str`   | `None`  | Specifies the device for validation (`cpu`, `cuda:0`, etc.). Allows flexibility in utilizing CPU or GPU resources.                                                                                                                    |
 | `dnn`         | `bool`  | `False` | If `True`, uses the [OpenCV](https://www.ultralytics.com/glossary/opencv) DNN module for ONNX model inference, offering an alternative to [PyTorch](https://www.ultralytics.com/glossary/pytorch) inference methods.                  |
 | `plots`       | `bool`  | `False` | When set to `True`, generates and saves plots of predictions versus ground truth for visual evaluation of the model's performance.                                                                                                    |
-| `rect`        | `bool`  | `False` | If `True`, uses rectangular inference for batching, reducing padding and potentially increasing speed and efficiency.                                                                                                                 |
+| `rect`        | `bool`  | `True`  | If `True`, uses rectangular inference for batching, reducing padding and potentially increasing speed and efficiency.                                                                                                                 |
 | `split`       | `str`   | `val`   | Determines the dataset split to use for validation (`val`, `test`, or `train`). Allows flexibility in choosing the data segment for performance evaluation.                                                                           |
 | `project`     | `str`   | `None`  | Name of the project directory where validation outputs are saved.                                                                                                                                                                     |
 | `name`        | `str`   | `None`  | Name of the validation run. Used for creating a subdirectory within the project folder, where valdiation logs and outputs are stored.                                                                                                 |
diff --git a/docs/en/models/sam-2.md b/docs/en/models/sam-2.md
index 86059422d..983d8cdcd 100644
--- a/docs/en/models/sam-2.md
+++ b/docs/en/models/sam-2.md
@@ -194,6 +194,34 @@ SAM 2 can be utilized across a broad spectrum of tasks, including real-time vide
         yolo predict model=sam2.1_b.pt source=path/to/video.mp4
         ```
 
+#### Segment Video and Track objects
+
+!!! example "Segment Video"
+
+    Segment the entire video content with specific prompts and track objects.
+
+    === "Python"
+
+        ```python
+        from ultralytics.models.sam import SAM2VideoPredictor
+
+        # Create SAM2VideoPredictor
+        overrides = dict(conf=0.25, task="segment", mode="predict", imgsz=1024, model="sam2_b.pt")
+        predictor = SAM2VideoPredictor(overrides=overrides)
+
+        # Run inference with single point
+        results = predictor(source="test.mp4", points=[920, 470], labels=1)
+
+        # Run inference with multiple points
+        results = predictor(source="test.mp4", points=[[920, 470], [909, 138]], labels=[1, 1])
+
+        # Run inference with multiple points prompt per object
+        results = predictor(source="test.mp4", points=[[[920, 470], [909, 138]]], labels=[[1, 1]])
+
+        # Run inference with negative points prompt
+        results = predictor(source="test.mp4", points=[[[920, 470], [909, 138]]], labels=[[1, 0]])
+        ```
+
 - This example demonstrates how SAM 2 can be used to segment the entire content of an image or video if no prompts (bboxes/points/masks) are provided.
 
 ## SAM 2 comparison vs YOLOv8
diff --git a/docs/en/models/yolo-nas.md b/docs/en/models/yolo-nas.md
index 5523cb1b3..394bc8319 100644
--- a/docs/en/models/yolo-nas.md
+++ b/docs/en/models/yolo-nas.md
@@ -149,7 +149,8 @@ YOLO-NAS introduces several key features that make it a superior choice for obje
 - **Quantization-Friendly Basic Block:** Enhanced architecture that improves model performance with minimal [precision](https://www.ultralytics.com/glossary/precision) drop post quantization.
 - **Sophisticated Training and Quantization:** Employs advanced training schemes and post-training quantization techniques.
 - **AutoNAC Optimization and Pre-training:** Utilizes AutoNAC optimization and is pre-trained on prominent datasets like COCO, Objects365, and Roboflow 100.
-    These features contribute to its high accuracy, efficient performance, and suitability for deployment in production environments. Learn more in the [Key Features](#key-features) section.
+
+These features contribute to its high accuracy, efficient performance, and suitability for deployment in production environments. Learn more in the [Key Features](#key-features) section.
 
 ### Which tasks and modes are supported by YOLO-NAS models?
 
diff --git a/docs/en/models/yolo11.md b/docs/en/models/yolo11.md
index dee9344b4..872c7e1e0 100644
--- a/docs/en/models/yolo11.md
+++ b/docs/en/models/yolo11.md
@@ -130,7 +130,7 @@ Note that the example below is for YOLO11 [Detect](../tasks/detect.md) models fo
 
 !!! tip "Ultralytics YOLO11 Publication"
 
-    Ultralytics has not published a formal research paper for YOLO11 due to the rapidly evolving nature of the models. We focus on advancing the technology and making it easier to use, rather than producing static documentation. For the most up-to-date information on YOLO architecture, features, and usage, please refer to our [GitHub repository](https://github.com/ultralytics/ultralytics) and [documentation](https://docs.ultralytics.com).
+    Ultralytics has not published a formal research paper for YOLO11 due to the rapidly evolving nature of the models. We focus on advancing the technology and making it easier to use, rather than producing static documentation. For the most up-to-date information on YOLO architecture, features, and usage, please refer to our [GitHub repository](https://github.com/ultralytics/ultralytics) and [documentation](https://docs.ultralytics.com/).
 
 If you use YOLO11 or any other software from this repository in your work, please cite it using the following format:
 
diff --git a/docs/en/models/yolov5.md b/docs/en/models/yolov5.md
index 4d261df5c..c973bf050 100644
--- a/docs/en/models/yolov5.md
+++ b/docs/en/models/yolov5.md
@@ -94,7 +94,7 @@ This example provides simple YOLOv5 training and inference examples. For full do
 
 !!! tip "Ultralytics YOLOv5 Publication"
 
-    Ultralytics has not published a formal research paper for YOLOv5 due to the rapidly evolving nature of the models. We focus on advancing the technology and making it easier to use, rather than producing static documentation. For the most up-to-date information on YOLO architecture, features, and usage, please refer to our [GitHub repository](https://github.com/ultralytics/ultralytics) and [documentation](https://docs.ultralytics.com).
+    Ultralytics has not published a formal research paper for YOLOv5 due to the rapidly evolving nature of the models. We focus on advancing the technology and making it easier to use, rather than producing static documentation. For the most up-to-date information on YOLO architecture, features, and usage, please refer to our [GitHub repository](https://github.com/ultralytics/ultralytics) and [documentation](https://docs.ultralytics.com/).
 
 If you use YOLOv5 or YOLOv5u in your research, please cite the Ultralytics YOLOv5 repository as follows:
 
diff --git a/docs/en/models/yolov7.md b/docs/en/models/yolov7.md
index 1ba9dc271..78fbbfa10 100644
--- a/docs/en/models/yolov7.md
+++ b/docs/en/models/yolov7.md
@@ -151,4 +151,5 @@ YOLOv7 offers several key features that revolutionize real-time object detection
 - **Dynamic Label Assignment**: Uses a coarse-to-fine lead guided method to assign dynamic targets for outputs across different branches, improving accuracy.
 - **Extended and Compound Scaling**: Efficiently utilizes parameters and computation to scale the model for various real-time applications.
 - **Efficiency**: Reduces parameter count by 40% and computation by 50% compared to other state-of-the-art models while achieving faster inference speeds.
-    For further details on these features, see the [YOLOv7 Overview](#overview) section.
+
+For further details on these features, see the [YOLOv7 Overview](#overview) section.
diff --git a/docs/en/models/yolov8.md b/docs/en/models/yolov8.md
index bb4f287a9..7d3508481 100644
--- a/docs/en/models/yolov8.md
+++ b/docs/en/models/yolov8.md
@@ -167,7 +167,7 @@ Note the below example is for YOLOv8 [Detect](../tasks/detect.md) models for obj
 
 !!! tip "Ultralytics YOLOv8 Publication"
 
-    Ultralytics has not published a formal research paper for YOLOv8 due to the rapidly evolving nature of the models. We focus on advancing the technology and making it easier to use, rather than producing static documentation. For the most up-to-date information on YOLO architecture, features, and usage, please refer to our [GitHub repository](https://github.com/ultralytics/ultralytics) and [documentation](https://docs.ultralytics.com).
+    Ultralytics has not published a formal research paper for YOLOv8 due to the rapidly evolving nature of the models. We focus on advancing the technology and making it easier to use, rather than producing static documentation. For the most up-to-date information on YOLO architecture, features, and usage, please refer to our [GitHub repository](https://github.com/ultralytics/ultralytics) and [documentation](https://docs.ultralytics.com/).
 
 If you use the YOLOv8 model or any other software from this repository in your work, please cite it using the following format:
 
diff --git a/docs/en/modes/benchmark.md b/docs/en/modes/benchmark.md
index b562a979e..587462dfe 100644
--- a/docs/en/modes/benchmark.md
+++ b/docs/en/modes/benchmark.md
@@ -4,30 +4,41 @@ description: Learn how to evaluate your YOLO11 model's performance in real-world
 keywords: model benchmarking, YOLO11, Ultralytics, performance evaluation, export formats, ONNX, TensorRT, OpenVINO, CoreML, TensorFlow, optimization, mAP50-95, inference time
 ---
 
+<script>
+  const script = document.createElement('script');
+  script.src = "https://cdn.jsdelivr.net/npm/chart.js@3.9.1/dist/chart.min.js";
+  document.head.appendChild(script);
+
+  const anotherScript = document.createElement('script');
+  anotherScript.src = "../../javascript/benchmark.js";
+  document.head.appendChild(anotherScript);
+</script>
+
 # Model Benchmarking with Ultralytics YOLO
 
 <img width="1024" src="https://github.com/ultralytics/docs/releases/download/0/ultralytics-yolov8-ecosystem-integrations.avif" alt="Ultralytics YOLO ecosystem and integrations">
 
 ## Benchmark Visualization
 
-<script src="https://cdn.jsdelivr.net/npm/chart.js@3.9.1/dist/chart.min.js"></script>
-
 !!! tip "Refresh Browser"
 
     You may need to refresh the page to view the graphs correctly due to potential cookie issues.
 
 <div style="display: flex; align-items: flex-start;">
   <div style="margin-right: 20px;">
-    <label><input type="checkbox" name="algorithm" value="YOLO11" checked><span>Ultralytics YOLO11</span></label><br>
-    <label><input type="checkbox" name="algorithm" value="YOLOv6" checked><span>YOLOv6</span></label><br>
-    <label><input type="checkbox" name="algorithm" value="YOLOv7" checked><span>YOLOv7</span></label><br>
+    <label><input type="checkbox" name="algorithm" value="YOLO11" checked><span>YOLO11</span></label><br>
     <label><input type="checkbox" name="algorithm" value="YOLOv10" checked><span>YOLOv10</span></label><br>
     <label><input type="checkbox" name="algorithm" value="YOLOv9" checked><span>YOLOv9</span></label><br>
-    <label><input type="checkbox" name="algorithm" value="YOLOv8" checked><span>Ultralytics YOLOv8</span></label><br>
-    <label><input type="checkbox" name="algorithm" value="PPYOLOE" checked><span>PPYOLOE</span></label><br>
-    <label><input type="checkbox" name="algorithm" value="YOLOv5" checked><span>Ultralytics YOLOv5</span></label>
-  </div>
-  <div style="flex-grow: 1;"><canvas id="chart"></canvas></div> <!-- Canva for plotting benchmarks -->
+    <label><input type="checkbox" name="algorithm" value="YOLOv8" checked><span>YOLOv8</span></label><br>
+    <label><input type="checkbox" name="algorithm" value="YOLOv7" checked><span>YOLOv7</span></label><br>
+    <label><input type="checkbox" name="algorithm" value="YOLOv6-3.0" checked><span>YOLOv6-3.0</span></label><br>
+    <label><input type="checkbox" name="algorithm" value="YOLOv5" checked><span>YOLOv5</span></label><br>
+    <label><input type="checkbox" name="algorithm" value="PP-YOLOE+" checked><span>PP-YOLOE+</span></label><br>
+    <label><input type="checkbox" name="algorithm" value="DAMO-YOLO" checked><span>DAMO-YOLO</span></label><br>
+    <label><input type="checkbox" name="algorithm" value="YOLOX" checked><span>YOLOX</span></label><br>
+    <label><input type="checkbox" name="algorithm" value="RTDETRv2" checked><span>RTDETRv2</span></label>
+</div> 
+  <div style="flex-grow: 1;"><canvas id="chart"></canvas></div>
 </div>
 
 ## Introduction
@@ -102,7 +113,7 @@ Arguments such as `model`, `data`, `imgsz`, `half`, `device`, and `verbose` prov
 | `imgsz`   | `640`         | The input image size for the model. Can be a single integer for square images or a tuple `(width, height)` for non-square, e.g., `(640, 480)`.                                                          |
 | `half`    | `False`       | Enables FP16 (half-precision) inference, reducing memory usage and possibly increasing speed on compatible hardware. Use `half=True` to enable.                                                         |
 | `int8`    | `False`       | Activates INT8 quantization for further optimized performance on supported devices, especially useful for edge devices. Set `int8=True` to use.                                                         |
-| `device`  | `None`        | Defines the computation device(s) for benchmarking, such as `"cpu"`, `"cuda:0"`, or a list of devices like `"cuda:0,1"` for multi-GPU setups.                                                           |
+| `device`  | `None`        | Defines the computation device(s) for benchmarking, such as `"cpu"` or `"cuda:0"`.                                                                                                                      |
 | `verbose` | `False`       | Controls the level of detail in logging output. A boolean value; set `verbose=True` for detailed logs or a float for thresholding errors.                                                               |
 
 ## Export Formats
@@ -145,7 +156,8 @@ Exporting YOLO11 models to different formats such as ONNX, TensorRT, and OpenVIN
 - **ONNX:** Provides up to 3x CPU speedup.
 - **TensorRT:** Offers up to 5x GPU speedup.
 - **OpenVINO:** Specifically optimized for Intel hardware.
-    These formats enhance both the speed and accuracy of your models, making them more efficient for various real-world applications. Visit the [Export](../modes/export.md) page for complete details.
+
+These formats enhance both the speed and accuracy of your models, making them more efficient for various real-world applications. Visit the [Export](../modes/export.md) page for complete details.
 
 ### Why is benchmarking crucial in evaluating YOLO11 models?
 
@@ -155,7 +167,8 @@ Benchmarking your YOLO11 models is essential for several reasons:
 - **Resource Allocation:** Gauge the performance across different hardware options.
 - **Optimization:** Determine which export format offers the best performance for specific use cases.
 - **Cost Efficiency:** Optimize hardware usage based on benchmark results.
-    Key metrics such as mAP50-95, Top-5 accuracy, and inference time help in making these evaluations. Refer to the [Key Metrics](#key-metrics-in-benchmark-mode) section for more information.
+
+Key metrics such as mAP50-95, Top-5 accuracy, and inference time help in making these evaluations. Refer to the [Key Metrics](#key-metrics-in-benchmark-mode) section for more information.
 
 ### Which export formats are supported by YOLO11, and what are their advantages?
 
@@ -165,7 +178,8 @@ YOLO11 supports a variety of export formats, each tailored for specific hardware
 - **TensorRT:** Ideal for GPU efficiency.
 - **OpenVINO:** Optimized for Intel hardware.
 - **CoreML & [TensorFlow](https://www.ultralytics.com/glossary/tensorflow):** Useful for iOS and general ML applications.
-    For a complete list of supported formats and their respective advantages, check out the [Supported Export Formats](#supported-export-formats) section.
+
+For a complete list of supported formats and their respective advantages, check out the [Supported Export Formats](#supported-export-formats) section.
 
 ### What arguments can I use to fine-tune my YOLO11 benchmarks?
 
@@ -178,4 +192,5 @@ When running benchmarks, several arguments can be customized to suit specific ne
 - **int8:** Activate INT8 quantization for edge devices.
 - **device:** Specify the computation device (e.g., "cpu", "cuda:0").
 - **verbose:** Control the level of logging detail.
-    For a full list of arguments, refer to the [Arguments](#arguments) section.
+
+For a full list of arguments, refer to the [Arguments](#arguments) section.
diff --git a/docs/en/quickstart.md b/docs/en/quickstart.md
index 204623cca..da4d86241 100644
--- a/docs/en/quickstart.md
+++ b/docs/en/quickstart.md
@@ -28,7 +28,7 @@ Ultralytics provides various installation methods including pip, conda, and Dock
         Install the `ultralytics` package using pip, or update an existing installation by running `pip install -U ultralytics`. Visit the Python Package Index (PyPI) for more details on the `ultralytics` package: [https://pypi.org/project/ultralytics/](https://pypi.org/project/ultralytics/).
 
         [![PyPI - Version](https://img.shields.io/pypi/v/ultralytics?logo=pypi&logoColor=white)](https://pypi.org/project/ultralytics/)
-        [![Downloads](https://static.pepy.tech/badge/ultralytics)](https://pepy.tech/project/ultralytics)
+        [![Downloads](https://static.pepy.tech/badge/ultralytics)](https://www.pepy.tech/projects/ultralytics)
 
         ```bash
         # Install the ultralytics package from PyPI
diff --git a/docs/en/reference/models/sam/predict.md b/docs/en/reference/models/sam/predict.md
index e715225c6..17f8b472c 100644
--- a/docs/en/reference/models/sam/predict.md
+++ b/docs/en/reference/models/sam/predict.md
@@ -17,4 +17,8 @@ keywords: Ultralytics, SAM, Segment Anything Model, SAM 2, Segment Anything Mode
 
 ## ::: ultralytics.models.sam.predict.SAM2Predictor
 
+<br><br><hr><br>
+
+## ::: ultralytics.models.sam.predict.SAM2VideoPredictor
+
 <br><br>
diff --git a/docs/en/reference/solutions/region_counter.md b/docs/en/reference/solutions/region_counter.md
new file mode 100644
index 000000000..0f27adff2
--- /dev/null
+++ b/docs/en/reference/solutions/region_counter.md
@@ -0,0 +1,16 @@
+---
+description: Explore the Ultralytics Object Counter for real-time video streams. Learn about initializing parameters, tracking objects, and more.
+keywords: Ultralytics, Object Counter, Real-time Tracking, Video Stream, Python, Object Detection
+---
+
+# Reference for `ultralytics/solutions/region_counter.py`
+
+!!! note
+
+    This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/solutions/region_counter.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/solutions/region_counter.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/solutions/region_counter.py) 🛠️. Thank you 🙏!
+
+<br>
+
+## ::: ultralytics.solutions.region_counter.RegionCounter
+
+<br><br>
diff --git a/docs/en/reference/utils/torch_utils.md b/docs/en/reference/utils/torch_utils.md
index ac31ec2c3..8ec53d826 100644
--- a/docs/en/reference/utils/torch_utils.md
+++ b/docs/en/reference/utils/torch_utils.md
@@ -19,6 +19,10 @@ keywords: Ultralytics, torch utils, model optimization, device selection, infere
 
 <br><br><hr><br>
 
+## ::: ultralytics.utils.torch_utils.FXModel
+
+<br><br><hr><br>
+
 ## ::: ultralytics.utils.torch_utils.torch_distributed_zero_first
 
 <br><br><hr><br>
diff --git a/docs/en/tasks/segment.md b/docs/en/tasks/segment.md
index c422c6fd6..33c19d9d3 100644
--- a/docs/en/tasks/segment.md
+++ b/docs/en/tasks/segment.md
@@ -36,8 +36,8 @@ YOLO11 pretrained Segment models are shown here. Detect, Segment and Pose models
 
 {% include "macros/yolo-seg-perf.md" %}
 
-- **mAP<sup>val</sup>** values are for single-model single-scale on [COCO val2017](https://cocodataset.org/) dataset. <br>Reproduce by `yolo val segment data=coco-seg.yaml device=0`
-- **Speed** averaged over COCO val images using an [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) instance. <br>Reproduce by `yolo val segment data=coco-seg.yaml batch=1 device=0|cpu`
+- **mAP<sup>val</sup>** values are for single-model single-scale on [COCO val2017](https://cocodataset.org/) dataset. <br>Reproduce by `yolo val segment data=coco.yaml device=0`
+- **Speed** averaged over COCO val images using an [Amazon EC2 P4d](https://aws.amazon.com/ec2/instance-types/p4/) instance. <br>Reproduce by `yolo val segment data=coco.yaml batch=1 device=0|cpu`
 
 ## Train
 
diff --git a/docs/en/usage/cfg.md b/docs/en/usage/cfg.md
index a7fef9e2a..95dc8b46f 100644
--- a/docs/en/usage/cfg.md
+++ b/docs/en/usage/cfg.md
@@ -186,7 +186,8 @@ Default inference settings include:
 - **IoU Threshold (`iou=0.7`)**: For Non-Maximum Suppression (NMS).
 - **Image Size (`imgsz=640`)**: Resizes input images prior to inference.
 - **Device (`device=None`)**: Selects CPU or GPU for inference.
-    For a comprehensive overview, visit the [Predict Settings](#predict-settings) section and the [Predict Guide](../modes/predict.md).
+
+For a comprehensive overview, visit the [Predict Settings](#predict-settings) section and the [Predict Guide](../modes/predict.md).
 
 ### Why should I use mixed precision training with YOLO models?
 
diff --git a/docs/en/usage/simple-utilities.md b/docs/en/usage/simple-utilities.md
index 45d3dc66c..2026e5a21 100644
--- a/docs/en/usage/simple-utilities.md
+++ b/docs/en/usage/simple-utilities.md
@@ -458,6 +458,17 @@ image_with_obb = ann.result()
 
 #### Bounding Boxes Circle Annotation [Circle Label](https://docs.ultralytics.com/reference/utils/plotting/#ultralytics.utils.plotting.Annotator.circle_label)
 
+<p align="center">
+  <br>
+  <iframe loading="lazy" width="720" height="405" src="https://www.youtube.com/embed/c-S5M36XWmg"
+    title="YouTube video player" frameborder="0"
+    allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share"
+    allowfullscreen>
+  </iframe>
+  <br>
+  <strong>Watch:</strong> In-Depth Guide to Text & Circle Annotations with Python Live Demos | Ultralytics Annotations 🚀
+</p>
+
 ```python
 import cv2
 
diff --git a/docs/mkdocs_github_authors.yaml b/docs/mkdocs_github_authors.yaml
index 6d91127d5..3e650937f 100644
--- a/docs/mkdocs_github_authors.yaml
+++ b/docs/mkdocs_github_authors.yaml
@@ -10,6 +10,9 @@
 130829914+IvorZhu331@users.noreply.github.com:
   avatar: https://avatars.githubusercontent.com/u/130829914?v=4
   username: IvorZhu331
+131249114+ServiAmirPM@users.noreply.github.com:
+  avatar: https://avatars.githubusercontent.com/u/131249114?v=4
+  username: ServiAmirPM
 131261051+MatthewNoyce@users.noreply.github.com:
   avatar: https://avatars.githubusercontent.com/u/131261051?v=4
   username: MatthewNoyce
@@ -109,6 +112,9 @@ chr043416@gmail.com:
 davis.justin@mssm.org:
   avatar: https://avatars.githubusercontent.com/u/23462437?v=4
   username: justincdavis
+francesco.mttl@gmail.com:
+  avatar: https://avatars.githubusercontent.com/u/3855193?v=4
+  username: ambitious-octopus
 glenn.jocher@ultralytics.com:
   avatar: https://avatars.githubusercontent.com/u/26833433?v=4
   username: glenn-jocher
diff --git a/docs/overrides/javascript/benchmark.js b/docs/overrides/javascript/benchmark.js
new file mode 100644
index 000000000..65efb8635
--- /dev/null
+++ b/docs/overrides/javascript/benchmark.js
@@ -0,0 +1,199 @@
+// YOLO models chart ---------------------------------------------------------------------------------------------------
+const data = {
+  YOLO11: {
+    n: { speed: 1.55, mAP: 39.5 },
+    s: { speed: 2.63, mAP: 47.0 },
+    m: { speed: 5.27, mAP: 51.4 },
+    l: { speed: 6.84, mAP: 53.2 },
+    x: { speed: 12.49, mAP: 54.7 },
+  },
+  YOLOv10: {
+    n: { speed: 1.56, mAP: 39.5 },
+    s: { speed: 2.66, mAP: 46.7 },
+    m: { speed: 5.48, mAP: 51.3 },
+    b: { speed: 6.54, mAP: 52.7 },
+    l: { speed: 8.33, mAP: 53.3 },
+    x: { speed: 12.2, mAP: 54.4 },
+  },
+  YOLOv9: {
+    t: { speed: 2.3, mAP: 37.8 },
+    s: { speed: 3.54, mAP: 46.5 },
+    m: { speed: 6.43, mAP: 51.5 },
+    c: { speed: 7.16, mAP: 52.8 },
+    e: { speed: 16.77, mAP: 55.1 },
+  },
+  YOLOv8: {
+    n: { speed: 1.47, mAP: 37.3 },
+    s: { speed: 2.66, mAP: 44.9 },
+    m: { speed: 5.86, mAP: 50.2 },
+    l: { speed: 9.06, mAP: 52.9 },
+    x: { speed: 14.37, mAP: 53.9 },
+  },
+  YOLOv7: { l: { speed: 6.84, mAP: 51.4 }, x: { speed: 11.57, mAP: 53.1 } },
+  "YOLOv6-3.0": {
+    n: { speed: 1.17, mAP: 37.5 },
+    s: { speed: 2.66, mAP: 45.0 },
+    m: { speed: 5.28, mAP: 50.0 },
+    l: { speed: 8.95, mAP: 52.8 },
+  },
+  YOLOv5: {
+    s: { speed: 1.92, mAP: 37.4 },
+    m: { speed: 4.03, mAP: 45.4 },
+    l: { speed: 6.61, mAP: 49.0 },
+    x: { speed: 11.89, mAP: 50.7 },
+  },
+  "PP-YOLOE+": {
+    t: { speed: 2.84, mAP: 39.9 },
+    s: { speed: 2.62, mAP: 43.7 },
+    m: { speed: 5.56, mAP: 49.8 },
+    l: { speed: 8.36, mAP: 52.9 },
+    x: { speed: 14.3, mAP: 54.7 },
+  },
+  "DAMO-YOLO": {
+    t: { speed: 2.32, mAP: 42.0 },
+    s: { speed: 3.45, mAP: 46.0 },
+    m: { speed: 5.09, mAP: 49.2 },
+    l: { speed: 7.18, mAP: 50.8 },
+  },
+  YOLOX: {
+    s: { speed: 2.56, mAP: 40.5 },
+    m: { speed: 5.43, mAP: 46.9 },
+    l: { speed: 9.04, mAP: 49.7 },
+    x: { speed: 16.1, mAP: 51.1 },
+  },
+  RTDETRv2: {
+    s: { speed: 5.03, mAP: 48.1 },
+    m: { speed: 7.51, mAP: 51.9 },
+    l: { speed: 9.76, mAP: 53.4 },
+    x: { speed: 15.03, mAP: 54.3 },
+  },
+};
+
+let chart = null; // chart variable will hold the reference to the current chart instance.
+
+// Function to lighten a hex color by a specified amount.
+function lightenHexColor(color, amount = 0.5) {
+  const r = parseInt(color.slice(1, 3), 16);
+  const g = parseInt(color.slice(3, 5), 16);
+  const b = parseInt(color.slice(5, 7), 16);
+  const newR = Math.min(255, Math.round(r + (255 - r) * amount));
+  const newG = Math.min(255, Math.round(g + (255 - g) * amount));
+  const newB = Math.min(255, Math.round(b + (255 - b) * amount));
+  return `#${newR.toString(16).padStart(2, "0")}${newG.toString(16).padStart(2, "0")}${newB.toString(16).padStart(2, "0")}`;
+}
+
+// Function to update the benchmarks chart.
+function updateChart() {
+  if (chart) {
+    chart.destroy();
+  } // If a chart instance already exists, destroy it.
+
+  // Define a specific color map for models.
+  const colorMap = {
+    YOLO11: "#0b23a9",
+    YOLOv10: "#ff7f0e",
+    YOLOv9: "#2ca02c",
+    YOLOv8: "#d62728",
+    YOLOv7: "#9467bd",
+    "YOLOv6-3.0": "#8c564b",
+    YOLOv5: "#e377c2",
+    "PP-YOLOE+": "#7f7f7f",
+    "DAMO-YOLO": "#bcbd22",
+    YOLOX: "#17becf",
+    RTDETRv2: "#eccd22",
+  };
+
+  // Get the selected algorithms from the checkboxes.
+  const selectedAlgorithms = [
+    ...document.querySelectorAll('input[name="algorithm"]:checked'),
+  ].map((e) => e.value);
+
+  // Create the datasets for the selected algorithms.
+  const datasets = selectedAlgorithms.map((algorithm, i) => {
+    const baseColor =
+      colorMap[algorithm] || `hsl(${Math.random() * 360}, 70%, 50%)`;
+    const lineColor = i === 0 ? baseColor : lightenHexColor(baseColor, 0.6); // Lighten non-primary lines.
+
+    return {
+      label: algorithm, // Label for the data points in the legend.
+      data: Object.entries(data[algorithm]).map(([version, point]) => ({
+        x: point.speed, // Speed data points on the x-axis.
+        y: point.mAP, // mAP data points on the y-axis.
+        version: version.toUpperCase(), // Store the version as additional data.
+      })),
+      fill: false, // Don't fill the chart.
+      borderColor: lineColor, // Use the lightened color for the line.
+      tension: 0.3, // Smooth the line.
+      pointRadius: i === 0 ? 7 : 4, // Highlight primary dataset points.
+      pointHoverRadius: i === 0 ? 9 : 6, // Highlight hover for primary dataset.
+      pointBackgroundColor: lineColor, // Fill points with the line color.
+      pointBorderColor: "#ffffff", // Add a border around points for contrast.
+      borderWidth: i === 0 ? 3 : 1.5, // Slightly increase line size for the primary dataset.
+    };
+  });
+
+  if (datasets.length === 0) {
+    return;
+  } // If there are no selected algorithms, return without creating a new chart.
+
+  // Create a new chart instance.
+  chart = new Chart(document.getElementById("chart").getContext("2d"), {
+    type: "line", // Set the chart type to line.
+    data: { datasets },
+    options: {
+      plugins: {
+        legend: {
+          display: true,
+          position: "top",
+          labels: { color: "#808080" },
+        }, // Configure the legend.
+        tooltip: {
+          callbacks: {
+            label: (tooltipItem) => {
+              const { dataset, dataIndex } = tooltipItem;
+              const point = dataset.data[dataIndex];
+              return `${dataset.label}${point.version.toLowerCase()}: Speed = ${point.x}, mAP = ${point.y}`; // Custom tooltip label.
+            },
+          },
+          mode: "nearest",
+          intersect: false,
+        }, // Configure the tooltip.
+      },
+      interaction: { mode: "nearest", axis: "x", intersect: false }, // Configure the interaction mode.
+      scales: {
+        x: {
+          type: "linear",
+          position: "bottom",
+          title: {
+            display: true,
+            text: "Latency T4 TensorRT10 FP16 (ms/img)",
+            color: "#808080",
+          }, // X-axis title.
+          grid: { color: "#e0e0e0" }, // Grid line color.
+          ticks: { color: "#808080" }, // Tick label color.
+        },
+        y: {
+          title: { display: true, text: "mAP", color: "#808080" }, // Y-axis title.
+          grid: { color: "#e0e0e0" }, // Grid line color.
+          ticks: { color: "#808080" }, // Tick label color.
+        },
+      },
+    },
+  });
+}
+
+document$.subscribe(function () {
+  function initializeApp() {
+    if (typeof Chart !== "undefined") {
+      document
+        .querySelectorAll('input[name="algorithm"]')
+        .forEach((checkbox) =>
+          checkbox.addEventListener("change", updateChart),
+        );
+      updateChart();
+    } else {
+      setTimeout(initializeApp, 100); // Retry every 100ms
+    }
+  }
+  initializeApp(); // Initial chart rendering
+});
diff --git a/docs/overrides/javascript/extra.js b/docs/overrides/javascript/extra.js
index e2faf7986..2de7572e7 100644
--- a/docs/overrides/javascript/extra.js
+++ b/docs/overrides/javascript/extra.js
@@ -1,4 +1,4 @@
-// Apply theme based on user preference
+// Apply theme colors based on dark/light mode
 const applyTheme = (isDark) => {
   document.body.setAttribute(
     "data-md-color-scheme",
@@ -10,80 +10,74 @@ const applyTheme = (isDark) => {
   );
 };
 
-// Check and apply auto theme
-const checkAutoTheme = () => {
-  const supportedLangCodes = [
-    "en",
-    "zh",
-    "ko",
-    "ja",
-    "ru",
-    "de",
-    "fr",
-    "es",
-    "pt",
-    "it",
-    "tr",
-    "vi",
-    "ar",
-  ];
-  const langCode = window.location.pathname.split("/")[1];
-  const localStorageKey = `${supportedLangCodes.includes(langCode) ? `/${langCode}` : ""}/.__palette`;
-  const palette = JSON.parse(localStorage.getItem(localStorageKey) || "{}");
-
+// Check and apply appropriate theme based on system/user preference
+const checkTheme = () => {
+  const palette = JSON.parse(localStorage.getItem(".__palette") || "{}");
   if (palette.index === 0) {
+    // Auto mode is selected
     applyTheme(window.matchMedia("(prefers-color-scheme: dark)").matches);
   }
 };
 
-// Event listeners for theme changes
-const mediaQueryList = window.matchMedia("(prefers-color-scheme: dark)");
-mediaQueryList.addListener(checkAutoTheme);
-
-// Initial theme check
-checkAutoTheme();
+// Watch for system theme changes
+window
+  .matchMedia("(prefers-color-scheme: dark)")
+  .addEventListener("change", checkTheme);
 
-// Auto theme input listener
+// Initialize theme handling on page load
 document.addEventListener("DOMContentLoaded", () => {
-  const autoThemeInput = document.getElementById("__palette_1");
-  autoThemeInput?.addEventListener("click", () => {
-    if (autoThemeInput.checked) setTimeout(checkAutoTheme);
-  });
+  // Watch for theme toggle changes
+  document
+    .getElementById("__palette_1")
+    ?.addEventListener(
+      "change",
+      (e) => e.target.checked && setTimeout(checkTheme),
+    );
+  // Initial theme check
+  checkTheme();
 });
 
-// Iframe navigation
-window.onhashchange = () => {
-  window.parent.postMessage(
-    {
-      type: "navigation",
-      hash:
-        window.location.pathname +
-        window.location.search +
-        window.location.hash,
-    },
-    "*",
-  );
-};
-
-// Add Inkeep button
+// Inkeep --------------------------------------------------------------------------------------------------------------
 document.addEventListener("DOMContentLoaded", () => {
+  const enableSearchBar = true;
+
   const inkeepScript = document.createElement("script");
-  inkeepScript.src = "https://unpkg.com/@inkeep/uikit-js@0.3.11/dist/embed.js";
+  inkeepScript.src = "https://unpkg.com/@inkeep/uikit-js@0.3.18/dist/embed.js";
   inkeepScript.type = "module";
   inkeepScript.defer = true;
   document.head.appendChild(inkeepScript);
 
-  // Configure and initialize the widget
-  const addInkeepWidget = () => {
+  if (enableSearchBar) {
+    const containerDiv = document.createElement("div");
+    containerDiv.style.transform = "scale(0.7)";
+    containerDiv.style.transformOrigin = "left center";
+
+    const inkeepDiv = document.createElement("div");
+    inkeepDiv.id = "inkeepSearchBar";
+    containerDiv.appendChild(inkeepDiv);
+
+    const headerElement = document.querySelector(".md-header__inner");
+    const searchContainer = headerElement.querySelector(".md-header__source");
+
+    if (headerElement && searchContainer) {
+      headerElement.insertBefore(containerDiv, searchContainer);
+    }
+  }
+
+  // configure and initialize the widget
+  const addInkeepWidget = (componentType, targetElementId) => {
     const inkeepWidget = Inkeep().embed({
-      componentType: "ChatButton",
+      componentType,
+      ...(componentType !== "ChatButton"
+        ? { targetElement: targetElementId }
+        : {}),
       colorModeSync: {
         observedElement: document.documentElement,
         isDarkModeCallback: (el) => {
           const currentTheme = el.getAttribute("data-color-mode");
           return currentTheme === "dark";
         },
-        colorModeAttribute: "data-color-mode",
+        colorModeAttribute: "data-color-mode-scheme",
       },
       properties: {
         chatButtonType: "PILL",
@@ -99,13 +93,12 @@ document.addEventListener("DOMContentLoaded", () => {
           theme: {
             stylesheetUrls: ["/stylesheets/style.css"],
           },
-          // ...optional settings
         },
         modalSettings: {
           // optional settings
         },
         searchSettings: {
-          // optional settings
+          placeholder: "Search",
         },
         aiChatSettings: {
           chatSubjectName: "Ultralytics",
@@ -144,97 +137,9 @@ document.addEventListener("DOMContentLoaded", () => {
     });
   };
   inkeepScript.addEventListener("load", () => {
-    addInkeepWidget(); // initialize the widget
+    const widgetContainer = document.getElementById("inkeepSearchBar");
+
+    addInkeepWidget("ChatButton");
+    widgetContainer && addInkeepWidget("SearchBar", "#inkeepSearchBar");
   });
 });
-
-// This object contains the benchmark data for various object detection models.
-const data = {
-    'YOLOv5':  {s: {speed: 1.92, mAP: 37.4}, m: {speed: 4.03, mAP: 45.4}, l: {speed: 6.61, mAP: 49.0}, x: {speed: 11.89, mAP: 50.7}},
-    'YOLOv6':  {n: {speed: 1.17, mAP: 37.5}, s: {speed: 2.66, mAP: 45.0}, m: {speed: 5.28, mAP: 50.0}, l: {speed: 8.95, mAP: 52.8}},
-    'YOLOv7':  {l: {speed: 6.84, mAP: 51.4}, x: {speed: 11.57, mAP: 53.1}},
-    'YOLOv8':  {n: {speed: 1.47, mAP: 37.3}, s: {speed: 2.66, mAP: 44.9}, m: {speed: 5.86, mAP: 50.2}, l: {speed: 9.06, mAP: 52.9}, x: {speed: 14.37, mAP: 53.9}},
-    'YOLOv9':  {t: {speed: 2.30, mAP: 37.8}, s: {speed: 3.54, mAP: 46.5}, m: {speed: 6.43, mAP: 51.5}, c: {speed: 7.16, mAP: 52.8}, e: {speed: 16.77, mAP: 55.1}},
-    'YOLOv10': {n: {speed: 1.56, mAP: 39.5}, s: {speed: 2.66, mAP: 46.7}, m: {speed: 5.48, mAP: 51.3}, b: {speed: 6.54, mAP: 52.7}, l: {speed: 8.33, mAP: 53.3}, x: {speed: 12.2, mAP: 54.4}},
-    'PPYOLOE': {t: {speed: 2.84, mAP: 39.9}, s: {speed: 2.62, mAP: 43.7}, m: {speed: 5.56, mAP: 49.8}, l: {speed: 8.36, mAP: 52.9}, x: {speed: 14.3, mAP: 54.7}},
-    'YOLO11':  {n: {speed: 1.55, mAP: 39.5}, s: {speed: 2.63, mAP: 47.0}, m: {speed: 5.27, mAP: 51.4}, l: {speed: 6.84, mAP: 53.2}, x: {speed: 12.49, mAP: 54.7}}
-};
-
-let chart = null;  // chart variable will hold the reference to the current chart instance.
-
-// This function is responsible for updating the benchmarks chart.
-function updateChart() {
-    // If a chart instance already exists, destroy it.
-    if (chart) chart.destroy();
-
-    // Get the selected algorithms from the checkboxes.
-    const selectedAlgorithms = [...document.querySelectorAll('input[name="algorithm"]:checked')].map(e => e.value);
-
-    // Create the datasets for the selected algorithms.
-    const datasets = selectedAlgorithms.map((algorithm, index) => ({
-        label: algorithm,  // Label for the data points in the legend.
-        data: Object.entries(data[algorithm]).map(([version, point]) => ({
-            x: point.speed,     // Speed data points on the x-axis.
-            y: point.mAP,       // mAP data points on the y-axis.
-            version: version.toUpperCase() // Store the version as additional data.
-        })),
-        fill: false,    // Don't fill the chart.
-        borderColor: `hsl(${index * 90}, 70%, 50%)`,  // Assign a unique color to each dataset.
-        tension: 0.3, // Smooth the line.
-        pointRadius: 5, // Increase the dot size.
-        pointHoverRadius: 10, // Increase the dot size on hover.
-        borderWidth: 2 // Set the line thickness.
-    }));
-
-    // If there are no selected algorithms, return without creating a new chart.
-    if (datasets.length === 0) return;
-
-    // Create a new chart instance.
-    chart = new Chart(document.getElementById('chart').getContext('2d'), {
-        type: 'line', // Set the chart type to line.
-        data: { datasets },
-        options: {
-            plugins: {
-                legend: { display: true, position: 'top', labels: {color: '#808080'} }, // Configure the legend.
-                tooltip: {
-                    callbacks: {
-                        label: (tooltipItem) => {
-                            const { dataset, dataIndex } = tooltipItem;
-                            const point = dataset.data[dataIndex];
-                            return `${dataset.label}${point.version.toLowerCase()}: Speed = ${point.x}, mAP = ${point.y}`; // Custom tooltip label.
-                        }
-                    },
-                    mode: 'nearest',
-                    intersect: false
-                } // Configure the tooltip.
-            },
-            interaction: { mode: 'nearest', axis: 'x', intersect: false }, // Configure the interaction mode.
-            scales: {
-                x: {
-                    type: 'linear', position: 'bottom',
-                    title: { display: true, text: 'Latency T4 TensorRT10 FP16 (ms/img)', color: '#808080'}, // X-axis title.
-                    grid: { color: '#e0e0e0' }, // Grid line color.
-                    ticks: { color: '#808080' } // Tick label color.
-                },
-                y: {
-                    title: { display: true, text: 'mAP', color: '#808080'}, // Y-axis title.
-                    grid: { color: '#e0e0e0' }, // Grid line color.
-                    ticks: { color: '#808080' } // Tick label color.
-                }
-            }
-        }
-    });
-}
-
-// Poll for Chart.js to load, then initialize checkboxes and chart
-function initializeApp() {
-    if (typeof Chart !== 'undefined') {
-        document.querySelectorAll('input[name="algorithm"]').forEach(checkbox =>
-            checkbox.addEventListener('change', updateChart)
-        );
-        updateChart();
-    } else {
-        setTimeout(initializeApp, 100);  // Retry every 100ms
-    }
-}
-document.addEventListener("DOMContentLoaded", initializeApp); // Initial chart rendering on page load
diff --git a/docs/overrides/javascript/giscus.js b/docs/overrides/javascript/giscus.js
index a64e29734..b57e4437d 100644
--- a/docs/overrides/javascript/giscus.js
+++ b/docs/overrides/javascript/giscus.js
@@ -1,7 +1,9 @@
 // Giscus functionality
 function loadGiscus() {
   const giscusContainer = document.getElementById("giscus-container");
-  if (!giscusContainer || giscusContainer.querySelector("script")) return;
+  if (!giscusContainer || giscusContainer.querySelector("script")) {
+    return;
+  }
 
   const script = document.createElement("script");
   script.src = "https://giscus.app/client.js";
@@ -55,14 +57,17 @@ function setupGiscusLoader() {
   const giscusContainer = document.getElementById("giscus-container");
 
   if (giscusContainer) {
-    const observer = new IntersectionObserver((entries) => {
-      entries.forEach((entry) => {
-        if (entry.isIntersecting) {
-          loadGiscus();
-          observer.unobserve(entry.target);
-        }
-      });
-    }, { threshold: 0.1 }); // Trigger when 10% of the element is visible
+    const observer = new IntersectionObserver(
+      (entries) => {
+        entries.forEach((entry) => {
+          if (entry.isIntersecting) {
+            loadGiscus();
+            observer.unobserve(entry.target);
+          }
+        });
+      },
+      { threshold: 0.1 },
+    ); // Trigger when 10% of the element is visible
 
     observer.observe(giscusContainer);
   }
diff --git a/docs/overrides/stylesheets/style.css b/docs/overrides/stylesheets/style.css
index d10582db4..5c9f3c22d 100644
--- a/docs/overrides/stylesheets/style.css
+++ b/docs/overrides/stylesheets/style.css
@@ -265,8 +265,15 @@ div.highlight {
 }
 /* MkDocs Ultralytics Plugin ---------------------------------------------------------------------------------------- */
 
-/* Inkeep button font color ----------------------------------------------------------------------------------------- */
+/* Inkeep ----------------------------------------------------------------------------------------------------------- */
 .ikp-floating-button {
   color: #111f68;
 }
-/* Inkeep button ---------------------------------------------------------------------------------------------------- */
+#inkeepSearchBar {
+  transition: all 0.2s ease-in-out;
+}
+#inkeepSearchBar:hover {
+  transform: scale(1.1);
+  filter: brightness(1.2);
+}
+/* Inkeep ----------------------------------------------------------------------------------------------------------- */
diff --git a/examples/YOLOv8-SAHI-Inference-Video/yolov8_sahi.py b/examples/YOLOv8-SAHI-Inference-Video/yolov8_sahi.py
index 4243cc35b..9e0ba13d9 100644
--- a/examples/YOLOv8-SAHI-Inference-Video/yolov8_sahi.py
+++ b/examples/YOLOv8-SAHI-Inference-Video/yolov8_sahi.py
@@ -64,7 +64,7 @@ class SAHIInference:
                 break
             annotator = Annotator(frame)  # Initialize annotator for plotting detection and tracking results
             results = get_sliced_prediction(
-                frame,
+                frame[..., ::-1],
                 self.detection_model,
                 slice_height=512,
                 slice_width=512,
diff --git a/examples/heatmaps.ipynb b/examples/heatmaps.ipynb
index d0124df89..b4c94c35f 100644
--- a/examples/heatmaps.ipynb
+++ b/examples/heatmaps.ipynb
@@ -38,7 +38,7 @@
     "\n",
     "Pip install `ultralytics` and [dependencies](https://github.com/ultralytics/ultralytics/blob/main/pyproject.toml) and check software and hardware.\n",
     "\n",
-    "[![PyPI - Version](https://img.shields.io/pypi/v/ultralytics?logo=pypi&logoColor=white)](https://pypi.org/project/ultralytics/) [![Downloads](https://static.pepy.tech/badge/ultralytics)](https://pepy.tech/project/ultralytics) [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/ultralytics?logo=python&logoColor=gold)](https://pypi.org/project/ultralytics/)"
+    "[![PyPI - Version](https://img.shields.io/pypi/v/ultralytics?logo=pypi&logoColor=white)](https://pypi.org/project/ultralytics/) [![Downloads](https://static.pepy.tech/badge/ultralytics)](https://www.pepy.tech/projects/ultralytics) [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/ultralytics?logo=python&logoColor=gold)](https://pypi.org/project/ultralytics/)"
    ]
   },
   {
diff --git a/examples/hub.ipynb b/examples/hub.ipynb
index 03382596c..bf7e27f97 100644
--- a/examples/hub.ipynb
+++ b/examples/hub.ipynb
@@ -36,7 +36,7 @@
     "\n",
     "Pip install `ultralytics` and [dependencies](https://github.com/ultralytics/ultralytics/blob/main/pyproject.toml) and check software and hardware.\n",
     "\n",
-    "[![PyPI - Version](https://img.shields.io/pypi/v/ultralytics?logo=pypi&logoColor=white)](https://pypi.org/project/ultralytics/) [![Downloads](https://static.pepy.tech/badge/ultralytics)](https://pepy.tech/project/ultralytics) [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/ultralytics?logo=python&logoColor=gold)](https://pypi.org/project/ultralytics/)"
+    "[![PyPI - Version](https://img.shields.io/pypi/v/ultralytics?logo=pypi&logoColor=white)](https://pypi.org/project/ultralytics/) [![Downloads](https://static.pepy.tech/badge/ultralytics)](https://www.pepy.tech/projects/ultralytics) [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/ultralytics?logo=python&logoColor=gold)](https://pypi.org/project/ultralytics/)"
    ]
   },
   {
diff --git a/examples/object_counting.ipynb b/examples/object_counting.ipynb
index e742cff6a..83f245d60 100644
--- a/examples/object_counting.ipynb
+++ b/examples/object_counting.ipynb
@@ -38,7 +38,7 @@
     "\n",
     "Pip install `ultralytics` and [dependencies](https://github.com/ultralytics/ultralytics/blob/main/pyproject.toml) and check software and hardware.\n",
     "\n",
-    "[![PyPI - Version](https://img.shields.io/pypi/v/ultralytics?logo=pypi&logoColor=white)](https://pypi.org/project/ultralytics/) [![Downloads](https://static.pepy.tech/badge/ultralytics)](https://pepy.tech/project/ultralytics) [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/ultralytics?logo=python&logoColor=gold)](https://pypi.org/project/ultralytics/)"
+    "[![PyPI - Version](https://img.shields.io/pypi/v/ultralytics?logo=pypi&logoColor=white)](https://pypi.org/project/ultralytics/) [![Downloads](https://static.pepy.tech/badge/ultralytics)](https://www.pepy.tech/projects/ultralytics) [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/ultralytics?logo=python&logoColor=gold)](https://pypi.org/project/ultralytics/)"
    ]
   },
   {
diff --git a/examples/object_tracking.ipynb b/examples/object_tracking.ipynb
index cc4d03add..d7f4d42b8 100644
--- a/examples/object_tracking.ipynb
+++ b/examples/object_tracking.ipynb
@@ -38,7 +38,7 @@
     "\n",
     "Pip install `ultralytics` and [dependencies](https://github.com/ultralytics/ultralytics/blob/main/pyproject.toml) and check software and hardware.\n",
     "\n",
-    "[![PyPI - Version](https://img.shields.io/pypi/v/ultralytics?logo=pypi&logoColor=white)](https://pypi.org/project/ultralytics/) [![Downloads](https://static.pepy.tech/badge/ultralytics)](https://pepy.tech/project/ultralytics) [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/ultralytics?logo=python&logoColor=gold)](https://pypi.org/project/ultralytics/)"
+    "[![PyPI - Version](https://img.shields.io/pypi/v/ultralytics?logo=pypi&logoColor=white)](https://pypi.org/project/ultralytics/) [![Downloads](https://static.pepy.tech/badge/ultralytics)](https://www.pepy.tech/projects/ultralytics) [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/ultralytics?logo=python&logoColor=gold)](https://pypi.org/project/ultralytics/)"
    ]
   },
   {
diff --git a/examples/tutorial.ipynb b/examples/tutorial.ipynb
index 75dd455e9..5eadcf3e6 100644
--- a/examples/tutorial.ipynb
+++ b/examples/tutorial.ipynb
@@ -55,7 +55,7 @@
         "\n",
         "Pip install `ultralytics` and [dependencies](https://github.com/ultralytics/ultralytics/blob/main/pyproject.toml) and check software and hardware.\n",
         "\n",
-        "[![PyPI - Version](https://img.shields.io/pypi/v/ultralytics?logo=pypi&logoColor=white)](https://pypi.org/project/ultralytics/) [![Downloads](https://static.pepy.tech/badge/ultralytics)](https://pepy.tech/project/ultralytics) [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/ultralytics?logo=python&logoColor=gold)](https://pypi.org/project/ultralytics/)"
+        "[![PyPI - Version](https://img.shields.io/pypi/v/ultralytics?logo=pypi&logoColor=white)](https://pypi.org/project/ultralytics/) [![Downloads](https://static.pepy.tech/badge/ultralytics)](https://www.pepy.tech/projects/ultralytics) [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/ultralytics?logo=python&logoColor=gold)](https://pypi.org/project/ultralytics/)"
       ]
     },
     {
diff --git a/mkdocs.yml b/mkdocs.yml
index 20d8ec3bf..7046ba43f 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -291,6 +291,7 @@ nav:
           - COCO8-pose: datasets/pose/coco8-pose.md
           - Tiger-pose: datasets/pose/tiger-pose.md
           - Hand-keypoints: datasets/pose/hand-keypoints.md
+          - Dog-pose: datasets/pose/dog-pose.md
       - Classification:
           - datasets/classify/index.md
           - Caltech 101: datasets/classify/caltech101.md
@@ -412,12 +413,14 @@ nav:
       - TF.js: integrations/tfjs.md
       - TFLite: integrations/tflite.md
       - TFLite Edge TPU: integrations/edge-tpu.md
+      - Sony IMX500: integrations/sony-imx500.md
       - TensorBoard: integrations/tensorboard.md
       - TensorRT: integrations/tensorrt.md
       - TorchScript: integrations/torchscript.md
       - VS Code: integrations/vscode.md
       - Weights & Biases: integrations/weights-biases.md
       - Albumentations: integrations/albumentations.md
+      - SONY IMX500: integrations/sony-imx500.md
   - HUB:
       - hub/index.md
       - Web:
@@ -559,7 +562,6 @@ nav:
               - utils: reference/nn/modules/utils.md
           - tasks: reference/nn/tasks.md
       - solutions:
-          - solutions: reference/solutions/solutions.md
           - ai_gym: reference/solutions/ai_gym.md
           - analytics: reference/solutions/analytics.md
           - distance_calculation: reference/solutions/distance_calculation.md
@@ -567,8 +569,10 @@ nav:
           - object_counter: reference/solutions/object_counter.md
           - parking_management: reference/solutions/parking_management.md
           - queue_management: reference/solutions/queue_management.md
+          - solutions: reference/solutions/solutions.md
           - speed_estimation: reference/solutions/speed_estimation.md
           - streamlit_inference: reference/solutions/streamlit_inference.md
+          - region_counter: reference/solutions/region_counter.md
       - trackers:
           - basetrack: reference/trackers/basetrack.md
           - bot_sort: reference/trackers/bot_sort.md
@@ -624,8 +628,8 @@ nav:
 # Plugins including 301 redirects navigation ---------------------------------------------------------------------------
 plugins:
   - macros
-  - search:
-      lang: en
+  # - search:
+  #     lang: en
   - mkdocstrings:
       enabled: true
       default_handler: python
diff --git a/tests/test_exports.py b/tests/test_exports.py
index 5a54b1afa..e540e7d75 100644
--- a/tests/test_exports.py
+++ b/tests/test_exports.py
@@ -205,3 +205,12 @@ def test_export_ncnn():
     """Test YOLO exports to NCNN format."""
     file = YOLO(MODEL).export(format="ncnn", imgsz=32)
     YOLO(file)(SOURCE, imgsz=32)  # exported model inference
+
+
+@pytest.mark.skipif(True, reason="Test disabled as keras and tensorflow version conflicts with tflite export.")
+@pytest.mark.skipif(not LINUX or MACOS, reason="Skipping test on Windows and Macos")
+def test_export_imx():
+    """Test YOLOv8n exports to IMX format."""
+    model = YOLO("yolov8n.pt")
+    file = model.export(format="imx", imgsz=32)
+    YOLO(file)(SOURCE, imgsz=32)
diff --git a/tests/test_solutions.py b/tests/test_solutions.py
index e01da6d81..fbf6b954c 100644
--- a/tests/test_solutions.py
+++ b/tests/test_solutions.py
@@ -16,7 +16,7 @@ def test_major_solutions():
     safe_download(url=MAJOR_SOLUTIONS_DEMO)
     cap = cv2.VideoCapture("solutions_ci_demo.mp4")
     assert cap.isOpened(), "Error reading video file"
-    region_points = [(20, 400), (1080, 404), (1080, 360), (20, 360)]
+    region_points = [(20, 400), (1080, 400), (1080, 360), (20, 360)]
     counter = solutions.ObjectCounter(region=region_points, model="yolo11n.pt", show=False)  # Test object counter
     heatmap = solutions.Heatmap(colormap=cv2.COLORMAP_PARULA, model="yolo11n.pt", show=False)  # Test heatmaps
     speed = solutions.SpeedEstimator(region=region_points, model="yolo11n.pt", show=False)  # Test queue manager
diff --git a/ultralytics/__init__.py b/ultralytics/__init__.py
index f6b1d2e78..fe22ab07a 100644
--- a/ultralytics/__init__.py
+++ b/ultralytics/__init__.py
@@ -1,6 +1,6 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 
-__version__ = "8.3.28"
+__version__ = "8.3.38"
 
 import os
 
diff --git a/ultralytics/cfg/__init__.py b/ultralytics/cfg/__init__.py
index c0675620b..de9ef96a1 100644
--- a/ultralytics/cfg/__init__.py
+++ b/ultralytics/cfg/__init__.py
@@ -83,13 +83,13 @@ SOLUTIONS_HELP_MSG = f"""
                 See all ARGS at https://docs.ultralytics.com/usage/cfg or with 'yolo cfg'
 
     1. Call object counting solution
-        yolo solutions count source="path/to/video/file.mp4" region=[(20, 400), (1080, 404), (1080, 360), (20, 360)]
+        yolo solutions count source="path/to/video/file.mp4" region=[(20, 400), (1080, 400), (1080, 360), (20, 360)]
 
     2. Call heatmaps solution
         yolo solutions heatmap colormap=cv2.COLORMAP_PARAULA model=yolo11n.pt
 
     3. Call queue management solution
-        yolo solutions queue region=[(20, 400), (1080, 404), (1080, 360), (20, 360)] model=yolo11n.pt
+        yolo solutions queue region=[(20, 400), (1080, 400), (1080, 360), (20, 360)] model=yolo11n.pt
 
     4. Call workouts monitoring solution for push-ups
         yolo solutions workout model=yolo11n-pose.pt kpts=[6, 8, 10]
@@ -160,7 +160,6 @@ CFG_FRACTION_KEYS = {  # fractional float arguments with 0.0<=values<=1.0
     "weight_decay",
     "warmup_momentum",
     "warmup_bias_lr",
-    "label_smoothing",
     "hsv_h",
     "hsv_s",
     "hsv_v",
@@ -436,6 +435,9 @@ def _handle_deprecation(custom):
         if key == "line_thickness":
             deprecation_warn(key, "line_width")
             custom["line_width"] = custom.pop("line_thickness")
+        if key == "label_smoothing":
+            deprecation_warn(key)
+            custom.pop("label_smoothing")
 
     return custom
 
@@ -671,6 +673,9 @@ def handle_yolo_solutions(args: List[str]) -> None:
         )
         s_n = "count"  # Default solution if none provided
 
+    if args and args[0] == "help":  # Add check for return if user call `yolo solutions help`
+        return
+
     cls, method = SOLUTION_MAP[s_n]  # solution class name, method name and default source
 
     from ultralytics import solutions  # import ultralytics solutions
@@ -735,9 +740,8 @@ def parse_key_value_pair(pair: str = "key=value"):
         pair (str): A string containing a key-value pair in the format "key=value".
 
     Returns:
-        (tuple): A tuple containing two elements:
-            - key (str): The parsed key.
-            - value (str): The parsed value.
+        key (str): The parsed key.
+        value (str): The parsed value.
 
     Raises:
         AssertionError: If the value is missing or empty.
diff --git a/ultralytics/cfg/datasets/dog-pose.yaml b/ultralytics/cfg/datasets/dog-pose.yaml
new file mode 100644
index 000000000..4dab70a59
--- /dev/null
+++ b/ultralytics/cfg/datasets/dog-pose.yaml
@@ -0,0 +1,23 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Dogs dataset http://vision.stanford.edu/aditya86/ImageNetDogs/ by Stanford
+# Documentation: https://docs.ultralytics.com/datasets/pose/dog-pose/
+# Example usage: yolo train data=dog-pose.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── dog-pose  ← downloads here (337 MB)
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/dog-pose # dataset root dir
+train: train # train images (relative to 'path') 6773 images
+val: val # val images (relative to 'path') 1703 images
+
+# Keypoints
+kpt_shape: [24, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
+
+# Classes
+names:
+  0: dog
+
+# Download script/URL (optional)
+download: https://github.com/ultralytics/assets/releases/download/v0.0.0/dog-pose.zip
diff --git a/ultralytics/cfg/default.yaml b/ultralytics/cfg/default.yaml
index 2ef1f4284..0423366f7 100644
--- a/ultralytics/cfg/default.yaml
+++ b/ultralytics/cfg/default.yaml
@@ -83,7 +83,7 @@ int8: False # (bool) CoreML/TF INT8 quantization
 dynamic: False # (bool) ONNX/TF/TensorRT: dynamic axes
 simplify: True # (bool) ONNX: simplify model using `onnxslim`
 opset: # (int, optional) ONNX: opset version
-workspace: 4 # (int) TensorRT: workspace size (GB)
+workspace: None # (float, optional) TensorRT: workspace size (GiB), `None` will let TensorRT auto-allocate memory
 nms: False # (bool) CoreML: add NMS
 
 # Hyperparameters ------------------------------------------------------------------------------------------------------
@@ -99,7 +99,6 @@ cls: 0.5 # (float) cls loss gain (scale with pixels)
 dfl: 1.5 # (float) dfl loss gain
 pose: 12.0 # (float) pose loss gain
 kobj: 1.0 # (float) keypoint obj loss gain
-label_smoothing: 0.0 # (float) label smoothing (fraction)
 nbs: 64 # (int) nominal batch size
 hsv_h: 0.015 # (float) image HSV-Hue augmentation (fraction)
 hsv_s: 0.7 # (float) image HSV-Saturation augmentation (fraction)
diff --git a/ultralytics/cfg/solutions/default.yaml b/ultralytics/cfg/solutions/default.yaml
index 69e430b8c..b50a2a325 100644
--- a/ultralytics/cfg/solutions/default.yaml
+++ b/ultralytics/cfg/solutions/default.yaml
@@ -2,7 +2,7 @@
 # Configuration for Ultralytics Solutions
 
 # Object counting settings
-region: # Object counting, queue or speed estimation region points. Default region points are [(20, 400), (1080, 404), (1080, 360), (20, 360)]
+region: # Object counting, queue or speed estimation region points. Default region points are [(20, 400), (1080, 400), (1080, 360), (20, 360)]
 show_in: True # Flag to display objects moving *into* the defined region
 show_out: True # Flag to display objects moving *out of* the defined region
 
diff --git a/ultralytics/data/augment.py b/ultralytics/data/augment.py
index 49bdc9223..5ec011d89 100644
--- a/ultralytics/data/augment.py
+++ b/ultralytics/data/augment.py
@@ -1591,7 +1591,7 @@ class LetterBox:
             labels["ratio_pad"] = (labels["ratio_pad"], (left, top))  # for evaluation
 
         if len(labels):
-            labels = self._update_labels(labels, ratio, dw, dh)
+            labels = self._update_labels(labels, ratio, left, top)
             labels["img"] = img
             labels["resized_shape"] = new_shape
             return labels
@@ -2111,10 +2111,9 @@ class Format:
             h (int): Height of the image.
 
         Returns:
-            (tuple): Tuple containing:
-                masks (numpy.ndarray): Bitmap masks with shape (N, H, W) or (1, H, W) if mask_overlap is True.
-                instances (Instances): Updated instances object with sorted segments if mask_overlap is True.
-                cls (numpy.ndarray): Updated class labels, sorted if mask_overlap is True.
+            masks (numpy.ndarray): Bitmap masks with shape (N, H, W) or (1, H, W) if mask_overlap is True.
+            instances (Instances): Updated instances object with sorted segments if mask_overlap is True.
+            cls (numpy.ndarray): Updated class labels, sorted if mask_overlap is True.
 
         Notes:
             - If self.mask_overlap is True, masks are overlapped and sorted by area.
@@ -2280,7 +2279,7 @@ def v8_transforms(dataset, imgsz, hyp, stretch=False):
     Args:
         dataset (Dataset): The dataset object containing image data and annotations.
         imgsz (int): The target image size for resizing.
-        hyp (Dict): A dictionary of hyperparameters controlling various aspects of the transformations.
+        hyp (Namespace): A dictionary of hyperparameters controlling various aspects of the transformations.
         stretch (bool): If True, applies stretching to the image. If False, uses LetterBox resizing.
 
     Returns:
@@ -2288,8 +2287,9 @@ def v8_transforms(dataset, imgsz, hyp, stretch=False):
 
     Examples:
         >>> from ultralytics.data.dataset import YOLODataset
+        >>> from ultralytics.utils import IterableSimpleNamespace
         >>> dataset = YOLODataset(img_path="path/to/images", imgsz=640)
-        >>> hyp = {"mosaic": 1.0, "copy_paste": 0.5, "degrees": 10.0, "translate": 0.2, "scale": 0.9}
+        >>> hyp = IterableSimpleNamespace(mosaic=1.0, copy_paste=0.5, degrees=10.0, translate=0.2, scale=0.9)
         >>> transforms = v8_transforms(dataset, imgsz=640, hyp=hyp)
         >>> augmented_data = transforms(dataset[0])
     """
diff --git a/ultralytics/data/converter.py b/ultralytics/data/converter.py
index fa5821418..e854c60de 100644
--- a/ultralytics/data/converter.py
+++ b/ultralytics/data/converter.py
@@ -577,7 +577,7 @@ def merge_multi_segment(segments):
     return s
 
 
-def yolo_bbox2segment(im_dir, save_dir=None, sam_model="sam_b.pt"):
+def yolo_bbox2segment(im_dir, save_dir=None, sam_model="sam_b.pt", device=None):
     """
     Converts existing object detection dataset (bounding boxes) to segmentation dataset or oriented bounding box (OBB)
     in YOLO format. Generates segmentation data using SAM auto-annotator as needed.
@@ -587,6 +587,7 @@ def yolo_bbox2segment(im_dir, save_dir=None, sam_model="sam_b.pt"):
         save_dir (str | Path): Path to save the generated labels, labels will be saved
             into `labels-segment` in the same directory level of `im_dir` if save_dir is None. Default: None.
         sam_model (str): Segmentation model to use for intermediate segmentation data; optional.
+        device (int | str): The specific device to run SAM models. Default: None.
 
     Notes:
         The input directory structure assumed for dataset:
@@ -621,7 +622,7 @@ def yolo_bbox2segment(im_dir, save_dir=None, sam_model="sam_b.pt"):
         boxes[:, [0, 2]] *= w
         boxes[:, [1, 3]] *= h
         im = cv2.imread(label["im_file"])
-        sam_results = sam_model(im, bboxes=xywh2xyxy(boxes), verbose=False, save=False)
+        sam_results = sam_model(im, bboxes=xywh2xyxy(boxes), verbose=False, save=False, device=device)
         label["segments"] = sam_results[0].masks.xyn
 
     save_dir = Path(save_dir) if save_dir else Path(im_dir).parent / "labels-segment"
@@ -636,8 +637,8 @@ def yolo_bbox2segment(im_dir, save_dir=None, sam_model="sam_b.pt"):
                 continue
             line = (int(cls[i]), *s.reshape(-1))
             texts.append(("%g " * len(line)).rstrip() % line)
-            with open(txt_file, "a") as f:
-                f.writelines(text + "\n" for text in texts)
+        with open(txt_file, "a") as f:
+            f.writelines(text + "\n" for text in texts)
     LOGGER.info(f"Generated segment labels saved in {save_dir}")
 
 
diff --git a/ultralytics/data/loaders.py b/ultralytics/data/loaders.py
index ead7d6138..ae5677cc6 100644
--- a/ultralytics/data/loaders.py
+++ b/ultralytics/data/loaders.py
@@ -354,7 +354,7 @@ class LoadImagesAndVideos:
         self.nf = ni + nv  # number of files
         self.ni = ni  # number of images
         self.video_flag = [False] * ni + [True] * nv
-        self.mode = "image"
+        self.mode = "video" if ni == 0 else "image"  # default to video if no images
         self.vid_stride = vid_stride  # video frame-rate stride
         self.bs = batch
         if any(videos):
diff --git a/ultralytics/engine/exporter.py b/ultralytics/engine/exporter.py
index 00a7b6c7a..c0e29e7e1 100644
--- a/ultralytics/engine/exporter.py
+++ b/ultralytics/engine/exporter.py
@@ -18,6 +18,7 @@ TensorFlow.js           | `tfjs`                    | yolo11n_web_model/
 PaddlePaddle            | `paddle`                  | yolo11n_paddle_model/
 MNN                     | `mnn`                     | yolo11n.mnn
 NCNN                    | `ncnn`                    | yolo11n_ncnn_model/
+IMX                     | `imx`                     | yolo11n_imx_model/
 
 Requirements:
     $ pip install "ultralytics[export]"
@@ -44,6 +45,7 @@ Inference:
                          yolo11n_paddle_model       # PaddlePaddle
                          yolo11n.mnn                # MNN
                          yolo11n_ncnn_model         # NCNN
+                         yolo11n_imx_model          # IMX
 
 TensorFlow.js:
     $ cd .. && git clone https://github.com/zldrobit/tfjs-yolov5-example.git && cd tfjs-yolov5-example
@@ -77,7 +79,6 @@ from ultralytics.utils import (
     ARM64,
     DEFAULT_CFG,
     IS_JETSON,
-    IS_RASPBERRYPI,
     LINUX,
     LOGGER,
     MACOS,
@@ -94,7 +95,7 @@ from ultralytics.utils.checks import check_imgsz, check_is_path_safe, check_requ
 from ultralytics.utils.downloads import attempt_download_asset, get_github_assets, safe_download
 from ultralytics.utils.files import file_size, spaces_in_path
 from ultralytics.utils.ops import Profile
-from ultralytics.utils.torch_utils import TORCH_1_13, get_latest_opset, select_device, smart_inference_mode
+from ultralytics.utils.torch_utils import TORCH_1_13, get_latest_opset, select_device
 
 
 def export_formats():
@@ -114,6 +115,7 @@ def export_formats():
         ["PaddlePaddle", "paddle", "_paddle_model", True, True],
         ["MNN", "mnn", ".mnn", True, True],
         ["NCNN", "ncnn", "_ncnn_model", True, True],
+        ["IMX", "imx", "_imx_model", True, True],
     ]
     return dict(zip(["Format", "Argument", "Suffix", "CPU", "GPU"], zip(*x)))
 
@@ -171,7 +173,6 @@ class Exporter:
         self.callbacks = _callbacks or callbacks.get_default_callbacks()
         callbacks.add_integration_callbacks(self)
 
-    @smart_inference_mode()
     def __call__(self, model=None) -> str:
         """Returns list of exported files/dirs after running callbacks."""
         self.run_callbacks("on_export_start")
@@ -194,9 +195,22 @@ class Exporter:
         flags = [x == fmt for x in fmts]
         if sum(flags) != 1:
             raise ValueError(f"Invalid export format='{fmt}'. Valid formats are {fmts}")
-        jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle, mnn, ncnn = (
-            flags  # export booleans
-        )
+        (
+            jit,
+            onnx,
+            xml,
+            engine,
+            coreml,
+            saved_model,
+            pb,
+            tflite,
+            edgetpu,
+            tfjs,
+            paddle,
+            mnn,
+            ncnn,
+            imx,
+        ) = flags  # export booleans
         is_tf_format = any((saved_model, pb, tflite, edgetpu, tfjs))
 
         # Device
@@ -206,10 +220,14 @@ class Exporter:
             self.args.device = "0"
         if fmt == "engine" and "dla" in str(self.args.device):  # convert int/list to str first
             dla = self.args.device.split(":")[-1]
+            self.args.device = "0"  # update device to "0"
             assert dla in {"0", "1"}, f"Expected self.args.device='dla:0' or 'dla:1, but got {self.args.device}."
         self.device = select_device("cpu" if self.args.device is None else self.args.device)
 
         # Checks
+        if imx and not self.args.int8:
+            LOGGER.warning("WARNING ⚠️ IMX only supports int8 export, setting int8=True.")
+            self.args.int8 = True
         if not hasattr(model, "names"):
             model.names = default_class_names()
         model.names = check_class_names(model.names)
@@ -247,8 +265,7 @@ class Exporter:
                 "WARNING ⚠️ INT8 export requires a missing 'data' arg for calibration. "
                 f"Using default 'data={self.args.data}'."
             )
-        if mnn and (IS_RASPBERRYPI or IS_JETSON):
-            raise SystemError("MNN export not supported on Raspberry Pi and NVIDIA Jetson")
+
         # Input
         im = torch.zeros(self.args.batch, 3, *self.imgsz).to(self.device)
         file = Path(
@@ -264,6 +281,11 @@ class Exporter:
         model.eval()
         model.float()
         model = model.fuse()
+
+        if imx:
+            from ultralytics.utils.torch_utils import FXModel
+
+            model = FXModel(model)
         for m in model.modules():
             if isinstance(m, (Detect, RTDETRDecoder)):  # includes all Detect subclasses like Segment, Pose, OBB
                 m.dynamic = self.args.dynamic
@@ -273,6 +295,15 @@ class Exporter:
             elif isinstance(m, C2f) and not is_tf_format:
                 # EdgeTPU does not support FlexSplitV while split provides cleaner ONNX graph
                 m.forward = m.forward_split
+            if isinstance(m, Detect) and imx:
+                from ultralytics.utils.tal import make_anchors
+
+                m.anchors, m.strides = (
+                    x.transpose(0, 1)
+                    for x in make_anchors(
+                        torch.cat([s / m.stride.unsqueeze(-1) for s in self.imgsz], dim=1), m.stride, 0.5
+                    )
+                )
 
         y = None
         for _ in range(2):
@@ -347,6 +378,8 @@ class Exporter:
             f[11], _ = self.export_mnn()
         if ncnn:  # NCNN
             f[12], _ = self.export_ncnn()
+        if imx:
+            f[13], _ = self.export_imx()
 
         # Finish
         f = [str(x) for x in f if x]  # filter out '' and None
@@ -469,8 +502,7 @@ class Exporter:
     @try_export
     def export_openvino(self, prefix=colorstr("OpenVINO:")):
         """YOLO OpenVINO export."""
-        # WARNING: numpy>=2.0.0 issue with OpenVINO on macOS https://github.com/ultralytics/ultralytics/pull/17221
-        check_requirements(f'openvino{"<=2024.0.0" if ARM64 else ">=2024.0.0"}')  # fix OpenVINO issue on ARM64
+        check_requirements("openvino>=2024.5.0")
         import openvino as ov
 
         LOGGER.info(f"\n{prefix} starting export with openvino {ov.__version__}...")
@@ -498,7 +530,7 @@ class Exporter:
         if self.args.int8:
             fq = str(self.file).replace(self.file.suffix, f"_int8_openvino_model{os.sep}")
             fq_ov = str(Path(fq) / self.file.with_suffix(".xml").name)
-            check_requirements("nncf>=2.8.0")
+            check_requirements("nncf>=2.14.0")
             import nncf
 
             def transform_fn(data_item) -> np.ndarray:
@@ -568,8 +600,7 @@ class Exporter:
         f = str(self.file.with_suffix(".mnn"))  # MNN model file
         args = ["", "-f", "ONNX", "--modelFile", f_onnx, "--MNNModel", f, "--bizCode", json.dumps(self.metadata)]
         if self.args.int8:
-            args.append("--weightQuantBits")
-            args.append("8")
+            args.extend(("--weightQuantBits", "8"))
         if self.args.half:
             args.append("--fp16")
         mnnconvert.convert(args)
@@ -751,10 +782,10 @@ class Exporter:
         # Engine builder
         builder = trt.Builder(logger)
         config = builder.create_builder_config()
-        workspace = int(self.args.workspace * (1 << 30))
-        if is_trt10:
+        workspace = int(self.args.workspace * (1 << 30)) if self.args.workspace is not None else 0
+        if is_trt10 and workspace > 0:
             config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, workspace)
-        else:  # TensorRT versions 7, 8
+        elif workspace > 0 and not is_trt10:  # TensorRT versions 7, 8
             config.max_workspace_size = workspace
         flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
         network = builder.create_network(flag)
@@ -793,7 +824,7 @@ class Exporter:
                 LOGGER.warning(f"{prefix} WARNING ⚠️ 'dynamic=True' model requires max batch size, i.e. 'batch=16'")
             profile = builder.create_optimization_profile()
             min_shape = (1, shape[1], 32, 32)  # minimum input shape
-            max_shape = (*shape[:2], *(int(max(1, self.args.workspace) * d) for d in shape[2:]))  # max input shape
+            max_shape = (*shape[:2], *(int(max(1, workspace) * d) for d in shape[2:]))  # max input shape
             for inp in inputs:
                 profile.set_shape(inp.name, min=min_shape, opt=shape, max=max_shape)
             config.add_optimization_profile(profile)
@@ -1069,6 +1100,137 @@ class Exporter:
         yaml_save(Path(f) / "metadata.yaml", self.metadata)  # add metadata.yaml
         return f, None
 
+    @try_export
+    def export_imx(self, prefix=colorstr("IMX:")):
+        """YOLO IMX export."""
+        gptq = False
+        assert LINUX, "export only supported on Linux. See https://developer.aitrios.sony-semicon.com/en/raspberrypi-ai-camera/documentation/imx500-converter"
+        if getattr(self.model, "end2end", False):
+            raise ValueError("IMX export is not supported for end2end models.")
+        if "C2f" not in self.model.__str__():
+            raise ValueError("IMX export is only supported for YOLOv8 detection models")
+        check_requirements(("model-compression-toolkit==2.1.1", "sony-custom-layers==0.2.0", "tensorflow==2.12.0"))
+        check_requirements("imx500-converter[pt]==3.14.3")  # Separate requirements for imx500-converter
+
+        import model_compression_toolkit as mct
+        import onnx
+        from sony_custom_layers.pytorch.object_detection.nms import multiclass_nms
+
+        try:
+            out = subprocess.run(
+                ["java", "--version"], check=True, capture_output=True
+            )  # Java 17 is required for imx500-converter
+            if "openjdk 17" not in str(out.stdout):
+                raise FileNotFoundError
+        except FileNotFoundError:
+            subprocess.run(["sudo", "apt", "install", "-y", "openjdk-17-jdk", "openjdk-17-jre"], check=True)
+
+        def representative_dataset_gen(dataloader=self.get_int8_calibration_dataloader(prefix)):
+            for batch in dataloader:
+                img = batch["img"]
+                img = img / 255.0
+                yield [img]
+
+        tpc = mct.get_target_platform_capabilities(
+            fw_name="pytorch", target_platform_name="imx500", target_platform_version="v1"
+        )
+
+        config = mct.core.CoreConfig(
+            mixed_precision_config=mct.core.MixedPrecisionQuantizationConfig(num_of_images=10),
+            quantization_config=mct.core.QuantizationConfig(concat_threshold_update=True),
+        )
+
+        resource_utilization = mct.core.ResourceUtilization(weights_memory=3146176 * 0.76)
+
+        quant_model = (
+            mct.gptq.pytorch_gradient_post_training_quantization(  # Perform Gradient-Based Post Training Quantization
+                model=self.model,
+                representative_data_gen=representative_dataset_gen,
+                target_resource_utilization=resource_utilization,
+                gptq_config=mct.gptq.get_pytorch_gptq_config(n_epochs=1000, use_hessian_based_weights=False),
+                core_config=config,
+                target_platform_capabilities=tpc,
+            )[0]
+            if gptq
+            else mct.ptq.pytorch_post_training_quantization(  # Perform post training quantization
+                in_module=self.model,
+                representative_data_gen=representative_dataset_gen,
+                target_resource_utilization=resource_utilization,
+                core_config=config,
+                target_platform_capabilities=tpc,
+            )[0]
+        )
+
+        class NMSWrapper(torch.nn.Module):
+            def __init__(
+                self,
+                model: torch.nn.Module,
+                score_threshold: float = 0.001,
+                iou_threshold: float = 0.7,
+                max_detections: int = 300,
+            ):
+                """
+                Wrapping PyTorch Module with multiclass_nms layer from sony_custom_layers.
+
+                Args:
+                    model (nn.Module): Model instance.
+                    score_threshold (float): Score threshold for non-maximum suppression.
+                    iou_threshold (float): Intersection over union threshold for non-maximum suppression.
+                    max_detections (float): The number of detections to return.
+                """
+                super().__init__()
+                self.model = model
+                self.score_threshold = score_threshold
+                self.iou_threshold = iou_threshold
+                self.max_detections = max_detections
+
+            def forward(self, images):
+                # model inference
+                outputs = self.model(images)
+
+                boxes = outputs[0]
+                scores = outputs[1]
+                nms = multiclass_nms(
+                    boxes=boxes,
+                    scores=scores,
+                    score_threshold=self.score_threshold,
+                    iou_threshold=self.iou_threshold,
+                    max_detections=self.max_detections,
+                )
+                return nms
+
+        quant_model = NMSWrapper(
+            model=quant_model,
+            score_threshold=self.args.conf or 0.001,
+            iou_threshold=self.args.iou,
+            max_detections=self.args.max_det,
+        ).to(self.device)
+
+        f = Path(str(self.file).replace(self.file.suffix, "_imx_model"))
+        f.mkdir(exist_ok=True)
+        onnx_model = f / Path(str(self.file).replace(self.file.suffix, "_imx.onnx"))  # js dir
+        mct.exporter.pytorch_export_model(
+            model=quant_model, save_model_path=onnx_model, repr_dataset=representative_dataset_gen
+        )
+
+        model_onnx = onnx.load(onnx_model)  # load onnx model
+        for k, v in self.metadata.items():
+            meta = model_onnx.metadata_props.add()
+            meta.key, meta.value = k, str(v)
+
+        onnx.save(model_onnx, onnx_model)
+
+        subprocess.run(
+            ["imxconv-pt", "-i", str(onnx_model), "-o", str(f), "--no-input-persistency", "--overwrite-output"],
+            check=True,
+        )
+
+        # Needed for imx models.
+        with open(f / "labels.txt", "w") as file:
+            file.writelines([f"{name}\n" for _, name in self.model.names.items()])
+
+        return f, None
+
     def _add_tflite_metadata(self, file):
         """Add metadata to *.tflite models per https://www.tensorflow.org/lite/models/convert/metadata."""
         import flatbuffers
diff --git a/ultralytics/engine/model.py b/ultralytics/engine/model.py
index c5b63eed8..667b54eba 100644
--- a/ultralytics/engine/model.py
+++ b/ultralytics/engine/model.py
@@ -2,7 +2,7 @@
 
 import inspect
 from pathlib import Path
-from typing import List, Union
+from typing import Dict, List, Union
 
 import numpy as np
 import torch
@@ -881,7 +881,7 @@ class Model(nn.Module):
         return self
 
     @property
-    def names(self) -> list:
+    def names(self) -> Dict[int, str]:
         """
         Retrieves the class names associated with the loaded model.
 
@@ -1126,3 +1126,20 @@ class Model(nn.Module):
             description of the expected behavior and structure.
         """
         raise NotImplementedError("Please provide task map for your model!")
+
+    def eval(self):
+        """
+        Sets the model to evaluation mode.
+
+        This method changes the model's mode to evaluation, which affects layers like dropout and batch normalization
+        that behave differently during training and evaluation.
+
+        Returns:
+            (Model): The model instance with evaluation mode set.
+
+        Examples:
+            >> model = YOLO("yolo11n.pt")
+            >> model.eval()
+        """
+        self.model.eval()
+        return self
diff --git a/ultralytics/engine/predictor.py b/ultralytics/engine/predictor.py
index 739ff3d77..5c9c0210a 100644
--- a/ultralytics/engine/predictor.py
+++ b/ultralytics/engine/predictor.py
@@ -153,7 +153,11 @@ class BasePredictor:
             (list): A list of transformed images.
         """
         same_shapes = len({x.shape for x in im}) == 1
-        letterbox = LetterBox(self.imgsz, auto=same_shapes and self.model.pt, stride=self.model.stride)
+        letterbox = LetterBox(
+            self.imgsz,
+            auto=same_shapes and (self.model.pt or getattr(self.model, "dynamic", False)),
+            stride=self.model.stride,
+        )
         return [letterbox(image=x) for x in im]
 
     def postprocess(self, preds, img, orig_imgs):
diff --git a/ultralytics/engine/results.py b/ultralytics/engine/results.py
index 7d8192d63..8de0a2e6a 100644
--- a/ultralytics/engine/results.py
+++ b/ultralytics/engine/results.py
@@ -535,9 +535,9 @@ class Results(SimpleClass):
         # Plot Detect results
         if pred_boxes is not None and show_boxes:
             for i, d in enumerate(reversed(pred_boxes)):
-                c, conf, id = int(d.cls), float(d.conf) if conf else None, None if d.id is None else int(d.id.item())
+                c, d_conf, id = int(d.cls), float(d.conf) if conf else None, None if d.id is None else int(d.id.item())
                 name = ("" if id is None else f"id:{id} ") + names[c]
-                label = (f"{name} {conf:.2f}" if conf else name) if labels else None
+                label = (f"{name} {d_conf:.2f}" if conf else name) if labels else None
                 box = d.xyxyxyxy.reshape(-1, 4, 2).squeeze() if is_obb else d.xyxy.squeeze()
                 annotator.box_label(
                     box,
@@ -750,7 +750,7 @@ class Results(SimpleClass):
             save_one_box(
                 d.xyxy,
                 self.orig_img.copy(),
-                file=Path(save_dir) / self.names[int(d.cls)] / f"{Path(file_name)}.jpg",
+                file=Path(save_dir) / self.names[int(d.cls)] / Path(file_name).with_suffix(".jpg"),
                 BGR=True,
             )
 
diff --git a/ultralytics/engine/trainer.py b/ultralytics/engine/trainer.py
index 068274a42..c088111fd 100644
--- a/ultralytics/engine/trainer.py
+++ b/ultralytics/engine/trainer.py
@@ -279,12 +279,7 @@ class BaseTrainer:
 
         # Batch size
         if self.batch_size < 1 and RANK == -1:  # single-GPU only, estimate best batch size
-            self.args.batch = self.batch_size = check_train_batch_size(
-                model=self.model,
-                imgsz=self.args.imgsz,
-                amp=self.amp,
-                batch=self.batch_size,
-            )
+            self.args.batch = self.batch_size = self.auto_batch()
 
         # Dataloaders
         batch_size = self.batch_size // max(world_size, 1)
@@ -478,6 +473,16 @@ class BaseTrainer:
         self._clear_memory()
         self.run_callbacks("teardown")
 
+    def auto_batch(self, max_num_obj=0):
+        """Get batch size by calculating memory occupation of model."""
+        return check_train_batch_size(
+            model=self.model,
+            imgsz=self.args.imgsz,
+            amp=self.amp,
+            batch=self.batch_size,
+            max_num_obj=max_num_obj,
+        )  # returns batch size
+
     def _get_memory(self):
         """Get accelerator memory utilization in GB."""
         if self.device.type == "mps":
@@ -792,7 +797,7 @@ class BaseTrainer:
                     g[0].append(param)
 
         optimizers = {"Adam", "Adamax", "AdamW", "NAdam", "RAdam", "RMSProp", "SGD", "auto"}
-        name = {x.lower(): x for x in optimizers}.get(name.lower(), None)
+        name = {x.lower(): x for x in optimizers}.get(name.lower())
         if name in {"Adam", "Adamax", "AdamW", "NAdam", "RAdam"}:
             optimizer = getattr(optim, name, optim.Adam)(g[2], lr=lr, betas=(momentum, 0.999), weight_decay=0.0)
         elif name == "RMSProp":
diff --git a/ultralytics/models/fastsam/predict.py b/ultralytics/models/fastsam/predict.py
index 9910237b0..0fbe16ffe 100644
--- a/ultralytics/models/fastsam/predict.py
+++ b/ultralytics/models/fastsam/predict.py
@@ -64,6 +64,9 @@ class FastSAMPredictor(SegmentationPredictor):
         if not isinstance(results, list):
             results = [results]
         for result in results:
+            if len(result) == 0:
+                prompt_results.append(result)
+                continue
             masks = result.masks.data
             if masks.shape[1:] != result.orig_shape:
                 masks = scale_masks(masks[None], result.orig_shape)[0]
diff --git a/ultralytics/models/rtdetr/train.py b/ultralytics/models/rtdetr/train.py
index cb11a7279..962aeaf9b 100644
--- a/ultralytics/models/rtdetr/train.py
+++ b/ultralytics/models/rtdetr/train.py
@@ -68,8 +68,11 @@ class RTDETRTrainer(DetectionTrainer):
             hyp=self.args,
             rect=False,
             cache=self.args.cache or None,
+            single_cls=self.args.single_cls or False,
             prefix=colorstr(f"{mode}: "),
+            classes=self.args.classes,
             data=self.data,
+            fraction=self.args.fraction if mode == "train" else 1.0,
         )
 
     def get_validator(self):
diff --git a/ultralytics/models/sam/__init__.py b/ultralytics/models/sam/__init__.py
index a29f5cb3f..30e34236a 100644
--- a/ultralytics/models/sam/__init__.py
+++ b/ultralytics/models/sam/__init__.py
@@ -1,6 +1,6 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 
 from .model import SAM
-from .predict import Predictor, SAM2Predictor
+from .predict import Predictor, SAM2Predictor, SAM2VideoPredictor
 
-__all__ = "SAM", "Predictor", "SAM2Predictor"  # tuple or list
+__all__ = "SAM", "Predictor", "SAM2Predictor", "SAM2VideoPredictor"  # tuple or list
diff --git a/ultralytics/models/sam/model.py b/ultralytics/models/sam/model.py
index e685dc4e4..97349a665 100644
--- a/ultralytics/models/sam/model.py
+++ b/ultralytics/models/sam/model.py
@@ -148,7 +148,7 @@ class SAM(Model):
             verbose (bool): If True, prints the information to the console.
 
         Returns:
-            (Tuple): A tuple containing the model's information (string representations of the model).
+            (tuple): A tuple containing the model's information (string representations of the model).
 
         Examples:
             >>> sam = SAM("sam_b.pt")
diff --git a/ultralytics/models/sam/modules/sam.py b/ultralytics/models/sam/modules/sam.py
index 7bfd71661..5d48ed1fa 100644
--- a/ultralytics/models/sam/modules/sam.py
+++ b/ultralytics/models/sam/modules/sam.py
@@ -36,8 +36,6 @@ class SAMModel(nn.Module):
         image_encoder (ImageEncoderViT): Backbone for encoding images into embeddings.
         prompt_encoder (PromptEncoder): Encoder for various types of input prompts.
         mask_decoder (MaskDecoder): Predicts object masks from image and prompt embeddings.
-        pixel_mean (torch.Tensor): Mean pixel values for image normalization, shape (3, 1, 1).
-        pixel_std (torch.Tensor): Standard deviation values for image normalization, shape (3, 1, 1).
 
     Methods:
         __init__: Initializes the SAMModel with encoders, decoder, and normalization parameters.
@@ -349,8 +347,7 @@ class SAM2Model(torch.nn.Module):
         self.sam_prompt_embed_dim = self.hidden_dim
         self.sam_image_embedding_size = self.image_size // self.backbone_stride
 
-        # build PromptEncoder and MaskDecoder from SAM
-        # (their hyperparameters like `mask_in_chans=16` are from SAM code)
+        # Build PromptEncoder and MaskDecoder from SAM (hyperparameters like `mask_in_chans=16` are from SAM code)
         self.sam_prompt_encoder = PromptEncoder(
             embed_dim=self.sam_prompt_embed_dim,
             image_embedding_size=(
@@ -425,8 +422,8 @@ class SAM2Model(torch.nn.Module):
                 low_res_multimasks: Tensor of shape (B, M, H*4, W*4) with SAM output mask logits.
                 high_res_multimasks: Tensor of shape (B, M, H*16, W*16) with upsampled mask logits.
                 ious: Tensor of shape (B, M) with estimated IoU for each output mask.
-                low_res_masks: Tensor of shape (B, 1, H*4, W*4) with best low-resolution mask.
-                high_res_masks: Tensor of shape (B, 1, H*16, W*16) with best high-resolution mask.
+                low_res_masks: Tensor of shape (B, 1, H*4, W*4) with the best low-resolution mask.
+                high_res_masks: Tensor of shape (B, 1, H*16, W*16) with the best high-resolution mask.
                 obj_ptr: Tensor of shape (B, C) with object pointer vector for the output mask.
                 object_score_logits: Tensor of shape (B,) with object score logits.
 
@@ -488,12 +485,7 @@ class SAM2Model(torch.nn.Module):
             boxes=None,
             masks=sam_mask_prompt,
         )
-        (
-            low_res_multimasks,
-            ious,
-            sam_output_tokens,
-            object_score_logits,
-        ) = self.sam_mask_decoder(
+        low_res_multimasks, ious, sam_output_tokens, object_score_logits = self.sam_mask_decoder(
             image_embeddings=backbone_features,
             image_pe=self.sam_prompt_encoder.get_dense_pe(),
             sparse_prompt_embeddings=sparse_embeddings,
@@ -505,13 +497,8 @@ class SAM2Model(torch.nn.Module):
         if self.pred_obj_scores:
             is_obj_appearing = object_score_logits > 0
 
-            # Mask used for spatial memories is always a *hard* choice between obj and no obj,
-            # consistent with the actual mask prediction
-            low_res_multimasks = torch.where(
-                is_obj_appearing[:, None, None],
-                low_res_multimasks,
-                NO_OBJ_SCORE,
-            )
+            # Spatial memory mask is a *hard* choice between obj and no obj, consistent with actual mask prediction
+            low_res_multimasks = torch.where(is_obj_appearing[:, None, None], low_res_multimasks, NO_OBJ_SCORE)
 
         # convert masks from possibly bfloat16 (or float16) to float32
         # (older PyTorch versions before 2.1 don't support `interpolate` on bf16)
@@ -617,7 +604,6 @@ class SAM2Model(torch.nn.Module):
 
     def _prepare_backbone_features(self, backbone_out):
         """Prepares and flattens visual features from the image backbone output for further processing."""
-        backbone_out = backbone_out.copy()
         assert len(backbone_out["backbone_fpn"]) == len(backbone_out["vision_pos_enc"])
         assert len(backbone_out["backbone_fpn"]) >= self.num_feature_levels
 
@@ -826,11 +812,7 @@ class SAM2Model(torch.nn.Module):
             mask_for_mem = mask_for_mem * self.sigmoid_scale_for_mem_enc
         if self.sigmoid_bias_for_mem_enc != 0.0:
             mask_for_mem = mask_for_mem + self.sigmoid_bias_for_mem_enc
-        maskmem_out = self.memory_encoder(
-            pix_feat,
-            mask_for_mem,
-            skip_mask_sigmoid=True,  # sigmoid already applied
-        )
+        maskmem_out = self.memory_encoder(pix_feat, mask_for_mem, skip_mask_sigmoid=True)  # sigmoid already applied
         maskmem_features = maskmem_out["vision_features"]
         maskmem_pos_enc = maskmem_out["vision_pos_enc"]
         # add a no-object embedding to the spatial memory to indicate that the frame
@@ -965,16 +947,7 @@ class SAM2Model(torch.nn.Module):
             track_in_reverse,
             prev_sam_mask_logits,
         )
-
-        (
-            _,
-            _,
-            _,
-            low_res_masks,
-            high_res_masks,
-            obj_ptr,
-            object_score_logits,
-        ) = sam_outputs
+        _, _, _, low_res_masks, high_res_masks, obj_ptr, object_score_logits = sam_outputs
 
         current_out["pred_masks"] = low_res_masks
         current_out["pred_masks_high_res"] = high_res_masks
@@ -984,8 +957,7 @@ class SAM2Model(torch.nn.Module):
             # it's mainly used in the demo to encode spatial memories w/ consolidated masks)
             current_out["object_score_logits"] = object_score_logits
 
-        # Finally run the memory encoder on the predicted mask to encode
-        # it into a new memory feature (that can be used in future frames)
+        # Run memory encoder on the predicted mask to encode it into a new memory feature (for use in future frames)
         self._encode_memory_in_output(
             current_vision_feats,
             feat_sizes,
@@ -1007,8 +979,9 @@ class SAM2Model(torch.nn.Module):
             and (self.multimask_min_pt_num <= num_pts <= self.multimask_max_pt_num)
         )
 
-    def _apply_non_overlapping_constraints(self, pred_masks):
-        """Applies non-overlapping constraints to masks, keeping highest scoring object per location."""
+    @staticmethod
+    def _apply_non_overlapping_constraints(pred_masks):
+        """Applies non-overlapping constraints to masks, keeping the highest scoring object per location."""
         batch_size = pred_masks.size(0)
         if batch_size == 1:
             return pred_masks
@@ -1024,6 +997,10 @@ class SAM2Model(torch.nn.Module):
         pred_masks = torch.where(keep, pred_masks, torch.clamp(pred_masks, max=-10.0))
         return pred_masks
 
+    def set_binarize(self, binarize=False):
+        """Set binarize for VideoPredictor."""
+        self.binarize_mask_from_pts_for_mem_enc = binarize
+
     def set_imgsz(self, imgsz):
         """
         Set image size to make model compatible with different image sizes.
diff --git a/ultralytics/models/sam/predict.py b/ultralytics/models/sam/predict.py
index a83159080..540d1007a 100644
--- a/ultralytics/models/sam/predict.py
+++ b/ultralytics/models/sam/predict.py
@@ -8,6 +8,8 @@ using SAM. It forms an integral part of the Ultralytics framework and is designe
 segmentation tasks.
 """
 
+from collections import OrderedDict
+
 import numpy as np
 import torch
 import torch.nn.functional as F
@@ -16,7 +18,7 @@ from ultralytics.data.augment import LetterBox
 from ultralytics.engine.predictor import BasePredictor
 from ultralytics.engine.results import Results
 from ultralytics.utils import DEFAULT_CFG, ops
-from ultralytics.utils.torch_utils import select_device
+from ultralytics.utils.torch_utils import select_device, smart_inference_mode
 
 from .amg import (
     batch_iterator,
@@ -95,7 +97,7 @@ class Predictor(BasePredictor):
         """
         if overrides is None:
             overrides = {}
-        overrides.update(dict(task="segment", mode="predict"))
+        overrides.update(dict(task="segment", mode="predict", batch=1))
         super().__init__(cfg, overrides, _callbacks)
         self.args.retina_masks = True
         self.im = None
@@ -114,7 +116,7 @@ class Predictor(BasePredictor):
             im (torch.Tensor | List[np.ndarray]): Input image(s) in BCHW tensor format or list of HWC numpy arrays.
 
         Returns:
-            (torch.Tensor): The preprocessed image tensor, normalized and converted to the appropriate dtype.
+            im (torch.Tensor): The preprocessed image tensor, normalized and converted to the appropriate dtype.
 
         Examples:
             >>> predictor = Predictor()
@@ -181,10 +183,9 @@ class Predictor(BasePredictor):
             **kwargs (Any): Additional keyword arguments.
 
         Returns:
-            (tuple): Contains the following three elements:
-                - np.ndarray: The output masks in shape (C, H, W), where C is the number of generated masks.
-                - np.ndarray: An array of length C containing quality scores predicted by the model for each mask.
-                - np.ndarray: Low-resolution logits of shape (C, H, W) for subsequent inference, where H=W=256.
+            (np.ndarray): The output masks in shape (C, H, W), where C is the number of generated masks.
+            (np.ndarray): An array of length C containing quality scores predicted by the model for each mask.
+            (np.ndarray): Low-resolution logits of shape (C, H, W) for subsequent inference, where H=W=256.
 
         Examples:
             >>> predictor = Predictor()
@@ -222,10 +223,8 @@ class Predictor(BasePredictor):
             AssertionError: If the number of points don't match the number of labels, in case labels were passed.
 
         Returns:
-            (tuple): Tuple containing:
-                - np.ndarray: Output masks with shape (C, H, W), where C is the number of generated masks.
-                - np.ndarray: Quality scores predicted by the model for each mask, with length C.
-                - np.ndarray: Low-resolution logits with shape (C, H, W) for subsequent inference, where H=W=256.
+            (np.ndarray): Output masks with shape (C, H, W), where C is the number of generated masks.
+            (np.ndarray): Quality scores predicted by the model for each mask, with length C.
 
         Examples:
             >>> predictor = Predictor()
@@ -329,10 +328,9 @@ class Predictor(BasePredictor):
             crop_nms_thresh (float): IoU cutoff for NMS to remove duplicate masks between crops.
 
         Returns:
-            (Tuple[torch.Tensor, torch.Tensor, torch.Tensor]): A tuple containing:
-                - pred_masks (torch.Tensor): Segmented masks with shape (N, H, W).
-                - pred_scores (torch.Tensor): Confidence scores for each mask with shape (N,).
-                - pred_bboxes (torch.Tensor): Bounding boxes for each mask with shape (N, 4).
+            pred_masks (torch.Tensor): Segmented masks with shape (N, H, W).
+            pred_scores (torch.Tensor): Confidence scores for each mask with shape (N,).
+            pred_bboxes (torch.Tensor): Bounding boxes for each mask with shape (N, 4).
 
         Examples:
             >>> predictor = Predictor()
@@ -408,7 +406,7 @@ class Predictor(BasePredictor):
 
         return pred_masks, pred_scores, pred_bboxes
 
-    def setup_model(self, model, verbose=True):
+    def setup_model(self, model=None, verbose=True):
         """
         Initializes the Segment Anything Model (SAM) for inference.
 
@@ -416,7 +414,7 @@ class Predictor(BasePredictor):
         parameters for image normalization and other Ultralytics compatibility settings.
 
         Args:
-            model (torch.nn.Module): A pre-trained SAM model. If None, a model will be built based on configuration.
+            model (torch.nn.Module | None): A pretrained SAM model. If None, a new model is built based on config.
             verbose (bool): If True, prints selected device information.
 
         Examples:
@@ -459,7 +457,7 @@ class Predictor(BasePredictor):
             orig_imgs (List[np.ndarray] | torch.Tensor): The original, unprocessed images.
 
         Returns:
-            (List[Results]): List of Results objects containing detection masks, bounding boxes, and other
+            results (List[Results]): List of Results objects containing detection masks, bounding boxes, and other
                 metadata for each processed image.
 
         Examples:
@@ -586,9 +584,8 @@ class Predictor(BasePredictor):
             nms_thresh (float): IoU threshold for the NMS algorithm to remove duplicate boxes.
 
         Returns:
-            (tuple):
-                - new_masks (torch.Tensor): Processed masks with small regions removed, shape (N, H, W).
-                - keep (List[int]): Indices of remaining masks after NMS, for filtering corresponding boxes.
+            new_masks (torch.Tensor): Processed masks with small regions removed, shape (N, H, W).
+            keep (List[int]): Indices of remaining masks after NMS, for filtering corresponding boxes.
 
         Examples:
             >>> masks = torch.rand(5, 640, 640) > 0.5  # 5 random binary masks
@@ -690,10 +687,8 @@ class SAM2Predictor(Predictor):
             img_idx (int): Index of the image in the batch to process.
 
         Returns:
-            (tuple): Tuple containing:
-                - np.ndarray: Output masks with shape (C, H, W), where C is the number of generated masks.
-                - np.ndarray: Quality scores for each mask, with length C.
-                - np.ndarray: Low-resolution logits with shape (C, 256, 256) for subsequent inference.
+            (np.ndarray): Output masks with shape (C, H, W), where C is the number of generated masks.
+            (np.ndarray): Quality scores for each mask, with length C.
 
         Examples:
             >>> predictor = SAM2Predictor(cfg)
@@ -712,7 +707,7 @@ class SAM2Predictor(Predictor):
         """
         features = self.get_im_features(im) if self.features is None else self.features
 
-        bboxes, points, labels, masks = self._prepare_prompts(im.shape[2:], bboxes, points, labels, masks)
+        points, labels, masks = self._prepare_prompts(im.shape[2:], bboxes, points, labels, masks)
         points = (points, labels) if points is not None else None
 
         sparse_embeddings, dense_embeddings = self.model.sam_prompt_encoder(
@@ -751,7 +746,7 @@ class SAM2Predictor(Predictor):
             AssertionError: If the number of points don't match the number of labels, in case labels were passed.
 
         Returns:
-            (tuple): A tuple containing transformed bounding boxes, points, labels, and masks.
+            (tuple): A tuple containing transformed points, labels, and masks.
         """
         bboxes, points, labels, masks = super()._prepare_prompts(dst_shape, bboxes, points, labels, masks)
         if bboxes is not None:
@@ -764,7 +759,7 @@ class SAM2Predictor(Predictor):
                 labels = torch.cat([bbox_labels, labels], dim=1)
             else:
                 points, labels = bboxes, bbox_labels
-        return bboxes, points, labels, masks
+        return points, labels, masks
 
     def set_image(self, image):
         """
@@ -815,3 +810,797 @@ class SAM2Predictor(Predictor):
             for feat, feat_size in zip(vision_feats[::-1], self._bb_feat_sizes[::-1])
         ][::-1]
         return {"image_embed": feats[-1], "high_res_feats": feats[:-1]}
+
+
+class SAM2VideoPredictor(SAM2Predictor):
+    """
+    SAM2VideoPredictor to handle user interactions with videos and manage inference states.
+
+    This class extends the functionality of SAM2Predictor to support video processing and maintains
+    the state of inference operations. It includes configurations for managing non-overlapping masks,
+    clearing memory for non-conditional inputs, and setting up callbacks for prediction events.
+
+    Attributes:
+        inference_state (Dict): A dictionary to store the current state of inference operations.
+        non_overlap_masks (bool): A flag indicating whether masks should be non-overlapping.
+        clear_non_cond_mem_around_input (bool): A flag to control clearing non-conditional memory around inputs.
+        clear_non_cond_mem_for_multi_obj (bool): A flag to control clearing non-conditional memory for multi-object scenarios.
+        callbacks (Dict): A dictionary of callbacks for various prediction lifecycle events.
+
+    Args:
+        cfg (Dict, Optional): Configuration settings for the predictor. Defaults to DEFAULT_CFG.
+        overrides (Dict, Optional): Additional configuration overrides. Defaults to None.
+        _callbacks (List, Optional): Custom callbacks to be added. Defaults to None.
+
+    Note:
+        The `fill_hole_area` attribute is defined but not used in the current implementation.
+    """
+
+    # fill_hole_area = 8  # not used
+
+    def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
+        """
+        Initialize the predictor with configuration and optional overrides.
+
+        This constructor initializes the SAM2VideoPredictor with a given configuration, applies any
+        specified overrides, and sets up the inference state along with certain flags
+        that control the behavior of the predictor.
+
+        Args:
+            cfg (Dict): Configuration dictionary containing default settings.
+            overrides (Dict | None): Dictionary of values to override default configuration.
+            _callbacks (Dict | None): Dictionary of callback functions to customize behavior.
+
+        Examples:
+            >>> predictor = SAM2VideoPredictor(cfg=DEFAULT_CFG)
+            >>> predictor = SAM2VideoPredictor(overrides={"imgsz": 640})
+            >>> predictor = SAM2VideoPredictor(_callbacks={"on_predict_start": custom_callback})
+        """
+        super().__init__(cfg, overrides, _callbacks)
+        self.inference_state = {}
+        self.non_overlap_masks = True
+        self.clear_non_cond_mem_around_input = False
+        self.clear_non_cond_mem_for_multi_obj = False
+        self.callbacks["on_predict_start"].append(self.init_state)
+
+    def get_model(self):
+        """
+        Retrieves and configures the model with binarization enabled.
+
+        Note:
+            This method overrides the base class implementation to set the binarize flag to True.
+        """
+        model = super().get_model()
+        model.set_binarize(True)
+        return model
+
+    def inference(self, im, bboxes=None, points=None, labels=None, masks=None):
+        """
+        Perform image segmentation inference based on the given input cues, using the currently loaded image. This
+        method leverages SAM's (Segment Anything Model) architecture consisting of image encoder, prompt encoder, and
+        mask decoder for real-time and promptable segmentation tasks.
+
+        Args:
+            im (torch.Tensor): The preprocessed input image in tensor format, with shape (N, C, H, W).
+            bboxes (np.ndarray | List, optional): Bounding boxes with shape (N, 4), in XYXY format.
+            points (np.ndarray | List, optional): Points indicating object locations with shape (N, 2), in pixels.
+            labels (np.ndarray | List, optional): Labels for point prompts, shape (N, ). 1 = foreground, 0 = background.
+            masks (np.ndarray, optional): Low-resolution masks from previous predictions shape (N,H,W). For SAM H=W=256.
+
+        Returns:
+            (np.ndarray): The output masks in shape CxHxW, where C is the number of generated masks.
+            (np.ndarray): An array of length C containing quality scores predicted by the model for each mask.
+        """
+        # Override prompts if any stored in self.prompts
+        bboxes = self.prompts.pop("bboxes", bboxes)
+        points = self.prompts.pop("points", points)
+        masks = self.prompts.pop("masks", masks)
+
+        frame = self.dataset.frame
+        self.inference_state["im"] = im
+        output_dict = self.inference_state["output_dict"]
+        if len(output_dict["cond_frame_outputs"]) == 0:  # initialize prompts
+            points, labels, masks = self._prepare_prompts(im.shape[2:], bboxes, points, labels, masks)
+            if points is not None:
+                for i in range(len(points)):
+                    self.add_new_prompts(obj_id=i, points=points[[i]], labels=labels[[i]], frame_idx=frame)
+            elif masks is not None:
+                for i in range(len(masks)):
+                    self.add_new_prompts(obj_id=i, masks=masks[[i]], frame_idx=frame)
+        self.propagate_in_video_preflight()
+
+        consolidated_frame_inds = self.inference_state["consolidated_frame_inds"]
+        batch_size = len(self.inference_state["obj_idx_to_id"])
+        if len(output_dict["cond_frame_outputs"]) == 0:
+            raise RuntimeError("No points are provided; please add points first")
+
+        if frame in consolidated_frame_inds["cond_frame_outputs"]:
+            storage_key = "cond_frame_outputs"
+            current_out = output_dict[storage_key][frame]
+            if self.clear_non_cond_mem_around_input and (self.clear_non_cond_mem_for_multi_obj or batch_size <= 1):
+                # clear non-conditioning memory of the surrounding frames
+                self._clear_non_cond_mem_around_input(frame)
+        elif frame in consolidated_frame_inds["non_cond_frame_outputs"]:
+            storage_key = "non_cond_frame_outputs"
+            current_out = output_dict[storage_key][frame]
+        else:
+            storage_key = "non_cond_frame_outputs"
+            current_out = self._run_single_frame_inference(
+                output_dict=output_dict,
+                frame_idx=frame,
+                batch_size=batch_size,
+                is_init_cond_frame=False,
+                point_inputs=None,
+                mask_inputs=None,
+                reverse=False,
+                run_mem_encoder=True,
+            )
+            output_dict[storage_key][frame] = current_out
+        # Create slices of per-object outputs for subsequent interaction with each
+        # individual object after tracking.
+        self._add_output_per_object(frame, current_out, storage_key)
+        self.inference_state["frames_already_tracked"].append(frame)
+        pred_masks = current_out["pred_masks"].flatten(0, 1)
+        pred_masks = pred_masks[(pred_masks > self.model.mask_threshold).sum((1, 2)) > 0]  # filter blank masks
+
+        return pred_masks, torch.ones(len(pred_masks), dtype=pred_masks.dtype, device=pred_masks.device)
+
+    def postprocess(self, preds, img, orig_imgs):
+        """
+        Post-processes the predictions to apply non-overlapping constraints if required.
+
+        This method extends the post-processing functionality by applying non-overlapping constraints
+        to the predicted masks if the `non_overlap_masks` flag is set to True. This ensures that
+        the masks do not overlap, which can be useful for certain applications.
+
+        Args:
+            preds (Tuple[torch.Tensor]): The predictions from the model.
+            img (torch.Tensor): The processed image tensor.
+            orig_imgs (List[np.ndarray]): The original images before processing.
+
+        Returns:
+            results (list): The post-processed predictions.
+
+        Note:
+            If `non_overlap_masks` is True, the method applies constraints to ensure non-overlapping masks.
+        """
+        results = super().postprocess(preds, img, orig_imgs)
+        if self.non_overlap_masks:
+            for result in results:
+                if result.masks is None or len(result.masks) == 0:
+                    continue
+                result.masks.data = self.model._apply_non_overlapping_constraints(result.masks.data.unsqueeze(0))[0]
+        return results
+
+    @smart_inference_mode()
+    def add_new_prompts(
+        self,
+        obj_id,
+        points=None,
+        labels=None,
+        masks=None,
+        frame_idx=0,
+    ):
+        """
+        Adds new points or masks to a specific frame for a given object ID.
+
+        This method updates the inference state with new prompts (points or masks) for a specified
+        object and frame index. It ensures that the prompts are either points or masks, but not both,
+        and updates the internal state accordingly. It also handles the generation of new segmentations
+        based on the provided prompts and the existing state.
+
+        Args:
+            obj_id (int): The ID of the object to which the prompts are associated.
+            points (torch.Tensor, Optional): The coordinates of the points of interest. Defaults to None.
+            labels (torch.Tensor, Optional): The labels corresponding to the points. Defaults to None.
+            masks (torch.Tensor, optional): Binary masks for the object. Defaults to None.
+            frame_idx (int, optional): The index of the frame to which the prompts are applied. Defaults to 0.
+
+        Returns:
+            (tuple): A tuple containing the flattened predicted masks and a tensor of ones indicating the number of objects.
+
+        Raises:
+            AssertionError: If both `masks` and `points` are provided, or neither is provided.
+
+        Note:
+            - Only one type of prompt (either points or masks) can be added per call.
+            - If the frame is being tracked for the first time, it is treated as an initial conditioning frame.
+            - The method handles the consolidation of outputs and resizing of masks to the original video resolution.
+        """
+        assert (masks is None) ^ (points is None), "'masks' and 'points' prompts are not compatible with each other."
+        obj_idx = self._obj_id_to_idx(obj_id)
+
+        point_inputs = None
+        pop_key = "point_inputs_per_obj"
+        if points is not None:
+            point_inputs = {"point_coords": points, "point_labels": labels}
+            self.inference_state["point_inputs_per_obj"][obj_idx][frame_idx] = point_inputs
+            pop_key = "mask_inputs_per_obj"
+        self.inference_state["mask_inputs_per_obj"][obj_idx][frame_idx] = masks
+        self.inference_state[pop_key][obj_idx].pop(frame_idx, None)
+        # If this frame hasn't been tracked before, we treat it as an initial conditioning
+        # frame, meaning that the inputs points are to generate segments on this frame without
+        # using any memory from other frames, like in SAM. Otherwise (if it has been tracked),
+        # the input points will be used to correct the already tracked masks.
+        is_init_cond_frame = frame_idx not in self.inference_state["frames_already_tracked"]
+        obj_output_dict = self.inference_state["output_dict_per_obj"][obj_idx]
+        obj_temp_output_dict = self.inference_state["temp_output_dict_per_obj"][obj_idx]
+        # Add a frame to conditioning output if it's an initial conditioning frame or
+        # if the model sees all frames receiving clicks/mask as conditioning frames.
+        is_cond = is_init_cond_frame or self.model.add_all_frames_to_correct_as_cond
+        storage_key = "cond_frame_outputs" if is_cond else "non_cond_frame_outputs"
+
+        # Get any previously predicted mask logits on this object and feed it along with
+        # the new clicks into the SAM mask decoder.
+        prev_sam_mask_logits = None
+        # lookup temporary output dict first, which contains the most recent output
+        # (if not found, then lookup conditioning and non-conditioning frame output)
+        if point_inputs is not None:
+            prev_out = (
+                obj_temp_output_dict[storage_key].get(frame_idx)
+                or obj_output_dict["cond_frame_outputs"].get(frame_idx)
+                or obj_output_dict["non_cond_frame_outputs"].get(frame_idx)
+            )
+
+            if prev_out is not None and prev_out.get("pred_masks") is not None:
+                prev_sam_mask_logits = prev_out["pred_masks"].to(device=self.device, non_blocking=True)
+                # Clamp the scale of prev_sam_mask_logits to avoid rare numerical issues.
+                prev_sam_mask_logits.clamp_(-32.0, 32.0)
+        current_out = self._run_single_frame_inference(
+            output_dict=obj_output_dict,  # run on the slice of a single object
+            frame_idx=frame_idx,
+            batch_size=1,  # run on the slice of a single object
+            is_init_cond_frame=is_init_cond_frame,
+            point_inputs=point_inputs,
+            mask_inputs=masks,
+            reverse=False,
+            # Skip the memory encoder when adding clicks or mask. We execute the memory encoder
+            # at the beginning of `propagate_in_video` (after user finalize their clicks). This
+            # allows us to enforce non-overlapping constraints on all objects before encoding
+            # them into memory.
+            run_mem_encoder=False,
+            prev_sam_mask_logits=prev_sam_mask_logits,
+        )
+        # Add the output to the output dict (to be used as future memory)
+        obj_temp_output_dict[storage_key][frame_idx] = current_out
+
+        # Resize the output mask to the original video resolution
+        consolidated_out = self._consolidate_temp_output_across_obj(
+            frame_idx,
+            is_cond=is_cond,
+            run_mem_encoder=False,
+        )
+        pred_masks = consolidated_out["pred_masks"].flatten(0, 1)
+        return pred_masks.flatten(0, 1), torch.ones(1, dtype=pred_masks.dtype, device=pred_masks.device)
+
+    @smart_inference_mode()
+    def propagate_in_video_preflight(self):
+        """
+        Prepare inference_state and consolidate temporary outputs before tracking.
+
+        This method marks the start of tracking, disallowing the addition of new objects until the session is reset.
+        It consolidates temporary outputs from `temp_output_dict_per_obj` and merges them into `output_dict`.
+        Additionally, it clears non-conditioning memory around input frames and ensures that the state is consistent
+        with the provided inputs.
+        """
+        # Tracking has started and we don't allow adding new objects until session is reset.
+        self.inference_state["tracking_has_started"] = True
+        batch_size = len(self.inference_state["obj_idx_to_id"])
+
+        # Consolidate per-object temporary outputs in "temp_output_dict_per_obj" and
+        # add them into "output_dict".
+        temp_output_dict_per_obj = self.inference_state["temp_output_dict_per_obj"]
+        output_dict = self.inference_state["output_dict"]
+        # "consolidated_frame_inds" contains indices of those frames where consolidated
+        # temporary outputs have been added (either in this call or any previous calls
+        # to `propagate_in_video_preflight`).
+        consolidated_frame_inds = self.inference_state["consolidated_frame_inds"]
+        for is_cond in {False, True}:
+            # Separately consolidate conditioning and non-conditioning temp outptus
+            storage_key = "cond_frame_outputs" if is_cond else "non_cond_frame_outputs"
+            # Find all the frames that contain temporary outputs for any objects
+            # (these should be the frames that have just received clicks for mask inputs
+            # via `add_new_points` or `add_new_mask`)
+            temp_frame_inds = set()
+            for obj_temp_output_dict in temp_output_dict_per_obj.values():
+                temp_frame_inds.update(obj_temp_output_dict[storage_key].keys())
+            consolidated_frame_inds[storage_key].update(temp_frame_inds)
+            # consolidate the temprary output across all objects on this frame
+            for frame_idx in temp_frame_inds:
+                consolidated_out = self._consolidate_temp_output_across_obj(
+                    frame_idx, is_cond=is_cond, run_mem_encoder=True
+                )
+                # merge them into "output_dict" and also create per-object slices
+                output_dict[storage_key][frame_idx] = consolidated_out
+                self._add_output_per_object(frame_idx, consolidated_out, storage_key)
+                if self.clear_non_cond_mem_around_input and (self.clear_non_cond_mem_for_multi_obj or batch_size <= 1):
+                    # clear non-conditioning memory of the surrounding frames
+                    self._clear_non_cond_mem_around_input(frame_idx)
+
+            # clear temporary outputs in `temp_output_dict_per_obj`
+            for obj_temp_output_dict in temp_output_dict_per_obj.values():
+                obj_temp_output_dict[storage_key].clear()
+
+        # edge case: if an output is added to "cond_frame_outputs", we remove any prior
+        # output on the same frame in "non_cond_frame_outputs"
+        for frame_idx in output_dict["cond_frame_outputs"]:
+            output_dict["non_cond_frame_outputs"].pop(frame_idx, None)
+        for obj_output_dict in self.inference_state["output_dict_per_obj"].values():
+            for frame_idx in obj_output_dict["cond_frame_outputs"]:
+                obj_output_dict["non_cond_frame_outputs"].pop(frame_idx, None)
+        for frame_idx in consolidated_frame_inds["cond_frame_outputs"]:
+            assert frame_idx in output_dict["cond_frame_outputs"]
+            consolidated_frame_inds["non_cond_frame_outputs"].discard(frame_idx)
+
+        # Make sure that the frame indices in "consolidated_frame_inds" are exactly those frames
+        # with either points or mask inputs (which should be true under a correct workflow).
+        all_consolidated_frame_inds = (
+            consolidated_frame_inds["cond_frame_outputs"] | consolidated_frame_inds["non_cond_frame_outputs"]
+        )
+        input_frames_inds = set()
+        for point_inputs_per_frame in self.inference_state["point_inputs_per_obj"].values():
+            input_frames_inds.update(point_inputs_per_frame.keys())
+        for mask_inputs_per_frame in self.inference_state["mask_inputs_per_obj"].values():
+            input_frames_inds.update(mask_inputs_per_frame.keys())
+        assert all_consolidated_frame_inds == input_frames_inds
+
+    @staticmethod
+    def init_state(predictor):
+        """
+        Initialize an inference state for the predictor.
+
+        This function sets up the initial state required for performing inference on video data.
+        It includes initializing various dictionaries and ordered dictionaries that will store
+        inputs, outputs, and other metadata relevant to the tracking process.
+
+        Args:
+            predictor (SAM2VideoPredictor): The predictor object for which to initialize the state.
+        """
+        if len(predictor.inference_state) > 0:  # means initialized
+            return
+        assert predictor.dataset is not None
+        assert predictor.dataset.mode == "video"
+
+        inference_state = {}
+        inference_state["num_frames"] = predictor.dataset.frames
+        # inputs on each frame
+        inference_state["point_inputs_per_obj"] = {}
+        inference_state["mask_inputs_per_obj"] = {}
+        # values that don't change across frames (so we only need to hold one copy of them)
+        inference_state["constants"] = {}
+        # mapping between client-side object id and model-side object index
+        inference_state["obj_id_to_idx"] = OrderedDict()
+        inference_state["obj_idx_to_id"] = OrderedDict()
+        inference_state["obj_ids"] = []
+        # A storage to hold the model's tracking results and states on each frame
+        inference_state["output_dict"] = {
+            "cond_frame_outputs": {},  # dict containing {frame_idx: <out>}
+            "non_cond_frame_outputs": {},  # dict containing {frame_idx: <out>}
+        }
+        # Slice (view) of each object tracking results, sharing the same memory with "output_dict"
+        inference_state["output_dict_per_obj"] = {}
+        # A temporary storage to hold new outputs when user interact with a frame
+        # to add clicks or mask (it's merged into "output_dict" before propagation starts)
+        inference_state["temp_output_dict_per_obj"] = {}
+        # Frames that already holds consolidated outputs from click or mask inputs
+        # (we directly use their consolidated outputs during tracking)
+        inference_state["consolidated_frame_inds"] = {
+            "cond_frame_outputs": set(),  # set containing frame indices
+            "non_cond_frame_outputs": set(),  # set containing frame indices
+        }
+        # metadata for each tracking frame (e.g. which direction it's tracked)
+        inference_state["tracking_has_started"] = False
+        inference_state["frames_already_tracked"] = []
+        predictor.inference_state = inference_state
+
+    def get_im_features(self, im, batch=1):
+        """
+        Extracts and processes image features using SAM2's image encoder for subsequent segmentation tasks.
+
+        Args:
+            im (torch.Tensor): The input image tensor.
+            batch (int, optional): The batch size for expanding features if there are multiple prompts. Defaults to 1.
+
+        Returns:
+            vis_feats (torch.Tensor): The visual features extracted from the image.
+            vis_pos_embed (torch.Tensor): The positional embeddings for the visual features.
+            feat_sizes (List(Tuple[int])): A list containing the sizes of the extracted features.
+
+        Note:
+            - If `batch` is greater than 1, the features are expanded to fit the batch size.
+            - The method leverages the model's `_prepare_backbone_features` method to prepare the backbone features.
+        """
+        backbone_out = self.model.forward_image(im)
+        if batch > 1:  # expand features if there's more than one prompt
+            for i, feat in enumerate(backbone_out["backbone_fpn"]):
+                backbone_out["backbone_fpn"][i] = feat.expand(batch, -1, -1, -1)
+            for i, pos in enumerate(backbone_out["vision_pos_enc"]):
+                pos = pos.expand(batch, -1, -1, -1)
+                backbone_out["vision_pos_enc"][i] = pos
+        _, vis_feats, vis_pos_embed, feat_sizes = self.model._prepare_backbone_features(backbone_out)
+        return vis_feats, vis_pos_embed, feat_sizes
+
+    def _obj_id_to_idx(self, obj_id):
+        """
+        Map client-side object id to model-side object index.
+
+        Args:
+            obj_id (int): The unique identifier of the object provided by the client side.
+
+        Returns:
+            obj_idx (int): The index of the object on the model side.
+
+        Raises:
+            RuntimeError: If an attempt is made to add a new object after tracking has started.
+
+        Note:
+            - The method updates or retrieves mappings between object IDs and indices stored in
+              `inference_state`.
+            - It ensures that new objects can only be added before tracking commences.
+            - It maintains two-way mappings between IDs and indices (`obj_id_to_idx` and `obj_idx_to_id`).
+            - Additional data structures are initialized for the new object to store inputs and outputs.
+        """
+        obj_idx = self.inference_state["obj_id_to_idx"].get(obj_id, None)
+        if obj_idx is not None:
+            return obj_idx
+
+        # This is a new object id not sent to the server before. We only allow adding
+        # new objects *before* the tracking starts.
+        allow_new_object = not self.inference_state["tracking_has_started"]
+        if allow_new_object:
+            # get the next object slot
+            obj_idx = len(self.inference_state["obj_id_to_idx"])
+            self.inference_state["obj_id_to_idx"][obj_id] = obj_idx
+            self.inference_state["obj_idx_to_id"][obj_idx] = obj_id
+            self.inference_state["obj_ids"] = list(self.inference_state["obj_id_to_idx"])
+            # set up input and output structures for this object
+            self.inference_state["point_inputs_per_obj"][obj_idx] = {}
+            self.inference_state["mask_inputs_per_obj"][obj_idx] = {}
+            self.inference_state["output_dict_per_obj"][obj_idx] = {
+                "cond_frame_outputs": {},  # dict containing {frame_idx: <out>}
+                "non_cond_frame_outputs": {},  # dict containing {frame_idx: <out>}
+            }
+            self.inference_state["temp_output_dict_per_obj"][obj_idx] = {
+                "cond_frame_outputs": {},  # dict containing {frame_idx: <out>}
+                "non_cond_frame_outputs": {},  # dict containing {frame_idx: <out>}
+            }
+            return obj_idx
+        else:
+            raise RuntimeError(
+                f"Cannot add new object id {obj_id} after tracking starts. "
+                f"All existing object ids: {self.inference_state['obj_ids']}. "
+                f"Please call 'reset_state' to restart from scratch."
+            )
+
+    def _run_single_frame_inference(
+        self,
+        output_dict,
+        frame_idx,
+        batch_size,
+        is_init_cond_frame,
+        point_inputs,
+        mask_inputs,
+        reverse,
+        run_mem_encoder,
+        prev_sam_mask_logits=None,
+    ):
+        """
+        Run tracking on a single frame based on current inputs and previous memory.
+
+        Args:
+            output_dict (Dict): The dictionary containing the output states of the tracking process.
+            frame_idx (int): The index of the current frame.
+            batch_size (int): The batch size for processing the frame.
+            is_init_cond_frame (bool): Indicates if the current frame is an initial conditioning frame.
+            point_inputs (Dict, Optional): Input points and their labels. Defaults to None.
+            mask_inputs (torch.Tensor, Optional): Input binary masks. Defaults to None.
+            reverse (bool): Indicates if the tracking should be performed in reverse order.
+            run_mem_encoder (bool): Indicates if the memory encoder should be executed.
+            prev_sam_mask_logits (torch.Tensor, Optional): Previous mask logits for the current object. Defaults to None.
+
+        Returns:
+            current_out (dict): A dictionary containing the output of the tracking step, including updated features and predictions.
+
+        Raises:
+            AssertionError: If both `point_inputs` and `mask_inputs` are provided, or neither is provided.
+
+        Note:
+            - The method assumes that `point_inputs` and `mask_inputs` are mutually exclusive.
+            - The method retrieves image features using the `get_im_features` method.
+            - The `maskmem_pos_enc` is assumed to be constant across frames, hence only one copy is stored.
+            - The `fill_holes_in_mask_scores` function is commented out and currently unsupported due to CUDA extension requirements.
+        """
+        # Retrieve correct image features
+        current_vision_feats, current_vision_pos_embeds, feat_sizes = self.get_im_features(
+            self.inference_state["im"], batch_size
+        )
+
+        # point and mask should not appear as input simultaneously on the same frame
+        assert point_inputs is None or mask_inputs is None
+        current_out = self.model.track_step(
+            frame_idx=frame_idx,
+            is_init_cond_frame=is_init_cond_frame,
+            current_vision_feats=current_vision_feats,
+            current_vision_pos_embeds=current_vision_pos_embeds,
+            feat_sizes=feat_sizes,
+            point_inputs=point_inputs,
+            mask_inputs=mask_inputs,
+            output_dict=output_dict,
+            num_frames=self.inference_state["num_frames"],
+            track_in_reverse=reverse,
+            run_mem_encoder=run_mem_encoder,
+            prev_sam_mask_logits=prev_sam_mask_logits,
+        )
+
+        maskmem_features = current_out["maskmem_features"]
+        if maskmem_features is not None:
+            current_out["maskmem_features"] = maskmem_features.to(
+                dtype=torch.float16, device=self.device, non_blocking=True
+            )
+        # NOTE: Do not support the `fill_holes_in_mask_scores` function since it needs cuda extensions
+        # potentially fill holes in the predicted masks
+        # if self.fill_hole_area > 0:
+        #     pred_masks = current_out["pred_masks"].to(self.device, non_blocking=True)
+        #     pred_masks = fill_holes_in_mask_scores(pred_masks, self.fill_hole_area)
+
+        # "maskmem_pos_enc" is the same across frames, so we only need to store one copy of it
+        current_out["maskmem_pos_enc"] = self._get_maskmem_pos_enc(current_out["maskmem_pos_enc"])
+        return current_out
+
+    def _get_maskmem_pos_enc(self, out_maskmem_pos_enc):
+        """
+        Caches and manages the positional encoding for mask memory across frames and objects.
+
+        This method optimizes storage by caching the positional encoding (`maskmem_pos_enc`) for
+        mask memory, which is constant across frames and objects, thus reducing the amount of
+        redundant information stored during an inference session. It checks if the positional
+        encoding has already been cached; if not, it caches a slice of the provided encoding.
+        If the batch size is greater than one, it expands the cached positional encoding to match
+        the current batch size.
+
+        Args:
+            out_maskmem_pos_enc (List[torch.Tensor] or None): The positional encoding for mask memory.
+                Should be a list of tensors or None.
+
+        Returns:
+            out_maskmem_pos_enc (List[torch.Tensor]): The positional encoding for mask memory, either cached or expanded.
+
+        Note:
+            - The method assumes that `out_maskmem_pos_enc` is a list of tensors or None.
+            - Only a single object's slice is cached since the encoding is the same across objects.
+            - The method checks if the positional encoding has already been cached in the session's constants.
+            - If the batch size is greater than one, the cached encoding is expanded to fit the batch size.
+        """
+        model_constants = self.inference_state["constants"]
+        # "out_maskmem_pos_enc" should be either a list of tensors or None
+        if out_maskmem_pos_enc is not None:
+            if "maskmem_pos_enc" not in model_constants:
+                assert isinstance(out_maskmem_pos_enc, list)
+                # only take the slice for one object, since it's same across objects
+                maskmem_pos_enc = [x[0:1].clone() for x in out_maskmem_pos_enc]
+                model_constants["maskmem_pos_enc"] = maskmem_pos_enc
+            else:
+                maskmem_pos_enc = model_constants["maskmem_pos_enc"]
+            # expand the cached maskmem_pos_enc to the actual batch size
+            batch_size = out_maskmem_pos_enc[0].size(0)
+            if batch_size > 1:
+                out_maskmem_pos_enc = [x.expand(batch_size, -1, -1, -1) for x in maskmem_pos_enc]
+        return out_maskmem_pos_enc
+
+    def _consolidate_temp_output_across_obj(
+        self,
+        frame_idx,
+        is_cond=False,
+        run_mem_encoder=False,
+    ):
+        """
+        Consolidates per-object temporary outputs into a single output for all objects.
+
+        This method combines the temporary outputs for each object on a given frame into a unified
+        output. It fills in any missing objects either from the main output dictionary or leaves
+        placeholders if they do not exist in the main output. Optionally, it can re-run the memory
+        encoder after applying non-overlapping constraints to the object scores.
+
+        Args:
+            frame_idx (int): The index of the frame for which to consolidate outputs.
+            is_cond (bool, Optional): Indicates if the frame is considered a conditioning frame.
+                Defaults to False.
+            run_mem_encoder (bool, Optional): Specifies whether to run the memory encoder after
+                consolidating the outputs. Defaults to False.
+
+        Returns:
+            consolidated_out (dict): A consolidated output dictionary containing the combined results for all objects.
+
+        Note:
+            - The method initializes the consolidated output with placeholder values for missing objects.
+            - It searches for outputs in both the temporary and main output dictionaries.
+            - If `run_mem_encoder` is True, it applies non-overlapping constraints and re-runs the memory encoder.
+            - The `maskmem_features` and `maskmem_pos_enc` are only populated when `run_mem_encoder` is True.
+        """
+        batch_size = len(self.inference_state["obj_idx_to_id"])
+        storage_key = "cond_frame_outputs" if is_cond else "non_cond_frame_outputs"
+
+        # Initialize `consolidated_out`. Its "maskmem_features" and "maskmem_pos_enc"
+        # will be added when rerunning the memory encoder after applying non-overlapping
+        # constraints to object scores. Its "pred_masks" are prefilled with a large
+        # negative value (NO_OBJ_SCORE) to represent missing objects.
+        consolidated_out = {
+            "maskmem_features": None,
+            "maskmem_pos_enc": None,
+            "pred_masks": torch.full(
+                size=(batch_size, 1, self.imgsz[0] // 4, self.imgsz[1] // 4),
+                fill_value=-1024.0,
+                dtype=torch.float32,
+                device=self.device,
+            ),
+            "obj_ptr": torch.full(
+                size=(batch_size, self.model.hidden_dim),
+                fill_value=-1024.0,
+                dtype=torch.float32,
+                device=self.device,
+            ),
+            "object_score_logits": torch.full(
+                size=(batch_size, 1),
+                # default to 10.0 for object_score_logits, i.e. assuming the object is
+                # present as sigmoid(10)=1, same as in `predict_masks` of `MaskDecoder`
+                fill_value=10.0,
+                dtype=torch.float32,
+                device=self.device,
+            ),
+        }
+        for obj_idx in range(batch_size):
+            obj_temp_output_dict = self.inference_state["temp_output_dict_per_obj"][obj_idx]
+            obj_output_dict = self.inference_state["output_dict_per_obj"][obj_idx]
+            out = (
+                obj_temp_output_dict[storage_key].get(frame_idx)
+                # If the object doesn't appear in "temp_output_dict_per_obj" on this frame,
+                # we fall back and look up its previous output in "output_dict_per_obj".
+                # We look up both "cond_frame_outputs" and "non_cond_frame_outputs" in
+                # "output_dict_per_obj" to find a previous output for this object.
+                or obj_output_dict["cond_frame_outputs"].get(frame_idx)
+                or obj_output_dict["non_cond_frame_outputs"].get(frame_idx)
+            )
+            # If the object doesn't appear in "output_dict_per_obj" either, we skip it
+            # and leave its mask scores to the default scores (i.e. the NO_OBJ_SCORE
+            # placeholder above) and set its object pointer to be a dummy pointer.
+            if out is None:
+                # Fill in dummy object pointers for those objects without any inputs or
+                # tracking outcomes on this frame (only do it under `run_mem_encoder=True`,
+                # i.e. when we need to build the memory for tracking).
+                if run_mem_encoder:
+                    # fill object pointer with a dummy pointer (based on an empty mask)
+                    consolidated_out["obj_ptr"][obj_idx : obj_idx + 1] = self._get_empty_mask_ptr(frame_idx)
+                continue
+            # Add the temporary object output mask to consolidated output mask
+            consolidated_out["pred_masks"][obj_idx : obj_idx + 1] = out["pred_masks"]
+            consolidated_out["obj_ptr"][obj_idx : obj_idx + 1] = out["obj_ptr"]
+
+        # Optionally, apply non-overlapping constraints on the consolidated scores and rerun the memory encoder
+        if run_mem_encoder:
+            high_res_masks = F.interpolate(
+                consolidated_out["pred_masks"],
+                size=self.imgsz,
+                mode="bilinear",
+                align_corners=False,
+            )
+            if self.model.non_overlap_masks_for_mem_enc:
+                high_res_masks = self.model._apply_non_overlapping_constraints(high_res_masks)
+            consolidated_out["maskmem_features"], consolidated_out["maskmem_pos_enc"] = self._run_memory_encoder(
+                batch_size=batch_size,
+                high_res_masks=high_res_masks,
+                is_mask_from_pts=True,  # these frames are what the user interacted with
+                object_score_logits=consolidated_out["object_score_logits"],
+            )
+
+        return consolidated_out
+
+    def _get_empty_mask_ptr(self, frame_idx):
+        """
+        Get a dummy object pointer based on an empty mask on the current frame.
+
+        Args:
+            frame_idx (int): The index of the current frame for which to generate the dummy object pointer.
+
+        Returns:
+            (torch.Tensor): A tensor representing the dummy object pointer generated from the empty mask.
+        """
+        # Retrieve correct image features
+        current_vision_feats, current_vision_pos_embeds, feat_sizes = self.get_im_features(self.inference_state["im"])
+
+        # Feed the empty mask and image feature above to get a dummy object pointer
+        current_out = self.model.track_step(
+            frame_idx=frame_idx,
+            is_init_cond_frame=True,
+            current_vision_feats=current_vision_feats,
+            current_vision_pos_embeds=current_vision_pos_embeds,
+            feat_sizes=feat_sizes,
+            point_inputs=None,
+            # A dummy (empty) mask with a single object
+            mask_inputs=torch.zeros((1, 1, *self.imgsz), dtype=torch.float32, device=self.device),
+            output_dict={},
+            num_frames=self.inference_state["num_frames"],
+            track_in_reverse=False,
+            run_mem_encoder=False,
+            prev_sam_mask_logits=None,
+        )
+        return current_out["obj_ptr"]
+
+    def _run_memory_encoder(self, batch_size, high_res_masks, object_score_logits, is_mask_from_pts):
+        """
+        Run the memory encoder on masks.
+
+        This is usually after applying non-overlapping constraints to object scores. Since their scores changed, their
+        memory also needs to be computed again with the memory encoder.
+
+        Args:
+            batch_size (int): The batch size for processing the frame.
+            high_res_masks (torch.Tensor): High-resolution masks for which to compute the memory.
+            object_score_logits (torch.Tensor): Logits representing the object scores.
+            is_mask_from_pts (bool): Indicates if the mask is derived from point interactions.
+
+        Returns:
+            (tuple[torch.Tensor, torch.Tensor]): A tuple containing the encoded mask features and positional encoding.
+        """
+        # Retrieve correct image features
+        current_vision_feats, _, feat_sizes = self.get_im_features(self.inference_state["im"], batch_size)
+        maskmem_features, maskmem_pos_enc = self.model._encode_new_memory(
+            current_vision_feats=current_vision_feats,
+            feat_sizes=feat_sizes,
+            pred_masks_high_res=high_res_masks,
+            is_mask_from_pts=is_mask_from_pts,
+            object_score_logits=object_score_logits,
+        )
+
+        # "maskmem_pos_enc" is the same across frames, so we only need to store one copy of it
+        maskmem_pos_enc = self._get_maskmem_pos_enc(maskmem_pos_enc)
+        return maskmem_features.to(dtype=torch.float16, device=self.device, non_blocking=True), maskmem_pos_enc
+
+    def _add_output_per_object(self, frame_idx, current_out, storage_key):
+        """
+        Split a multi-object output into per-object output slices and add them into Output_Dict_Per_Obj.
+
+        The resulting slices share the same tensor storage.
+
+        Args:
+            frame_idx (int): The index of the current frame.
+            current_out (Dict): The current output dictionary containing multi-object outputs.
+            storage_key (str): The key used to store the output in the per-object output dictionary.
+        """
+        maskmem_features = current_out["maskmem_features"]
+        assert maskmem_features is None or isinstance(maskmem_features, torch.Tensor)
+
+        maskmem_pos_enc = current_out["maskmem_pos_enc"]
+        assert maskmem_pos_enc is None or isinstance(maskmem_pos_enc, list)
+
+        for obj_idx, obj_output_dict in self.inference_state["output_dict_per_obj"].items():
+            obj_slice = slice(obj_idx, obj_idx + 1)
+            obj_out = {
+                "maskmem_features": None,
+                "maskmem_pos_enc": None,
+                "pred_masks": current_out["pred_masks"][obj_slice],
+                "obj_ptr": current_out["obj_ptr"][obj_slice],
+            }
+            if maskmem_features is not None:
+                obj_out["maskmem_features"] = maskmem_features[obj_slice]
+            if maskmem_pos_enc is not None:
+                obj_out["maskmem_pos_enc"] = [x[obj_slice] for x in maskmem_pos_enc]
+            obj_output_dict[storage_key][frame_idx] = obj_out
+
+    def _clear_non_cond_mem_around_input(self, frame_idx):
+        """
+        Remove the non-conditioning memory around the input frame.
+
+        When users provide correction clicks, the surrounding frames' non-conditioning memories can still contain outdated
+        object appearance information and could confuse the model. This method clears those non-conditioning memories
+        surrounding the interacted frame to avoid giving the model both old and new information about the object.
+
+        Args:
+            frame_idx (int): The index of the current frame where user interaction occurred.
+        """
+        r = self.model.memory_temporal_stride_for_eval
+        frame_idx_begin = frame_idx - r * self.model.num_maskmem
+        frame_idx_end = frame_idx + r * self.model.num_maskmem
+        for t in range(frame_idx_begin, frame_idx_end + 1):
+            self.inference_state["output_dict"]["non_cond_frame_outputs"].pop(t, None)
+            for obj_output_dict in self.inference_state["output_dict_per_obj"].values():
+                obj_output_dict["non_cond_frame_outputs"].pop(t, None)
diff --git a/ultralytics/models/yolo/detect/train.py b/ultralytics/models/yolo/detect/train.py
index e0dbb367f..606b9fb92 100644
--- a/ultralytics/models/yolo/detect/train.py
+++ b/ultralytics/models/yolo/detect/train.py
@@ -141,3 +141,10 @@ class DetectionTrainer(BaseTrainer):
         boxes = np.concatenate([lb["bboxes"] for lb in self.train_loader.dataset.labels], 0)
         cls = np.concatenate([lb["cls"] for lb in self.train_loader.dataset.labels], 0)
         plot_labels(boxes, cls.squeeze(), names=self.data["names"], save_dir=self.save_dir, on_plot=self.on_plot)
+
+    def auto_batch(self):
+        """Get batch size by calculating memory occupation of model."""
+        train_dataset = self.build_dataset(self.trainset, mode="train", batch=16)
+        # 4 for mosaic augmentation
+        max_num_obj = max(len(l["cls"]) for l in train_dataset.labels) * 4
+        return super().auto_batch(max_num_obj)
diff --git a/ultralytics/models/yolo/detect/val.py b/ultralytics/models/yolo/detect/val.py
index 05db8cba3..31f0fdc0b 100644
--- a/ultralytics/models/yolo/detect/val.py
+++ b/ultralytics/models/yolo/detect/val.py
@@ -155,8 +155,8 @@ class DetectionValidator(BaseValidator):
             # Evaluate
             if nl:
                 stat["tp"] = self._process_batch(predn, bbox, cls)
-                if self.args.plots:
-                    self.confusion_matrix.process_batch(predn, bbox, cls)
+            if self.args.plots:
+                self.confusion_matrix.process_batch(predn, bbox, cls)
             for k in self.stats.keys():
                 self.stats[k].append(stat[k])
 
diff --git a/ultralytics/models/yolo/pose/val.py b/ultralytics/models/yolo/pose/val.py
index bdf17328d..12d812a7f 100644
--- a/ultralytics/models/yolo/pose/val.py
+++ b/ultralytics/models/yolo/pose/val.py
@@ -138,8 +138,8 @@ class PoseValidator(DetectionValidator):
             if nl:
                 stat["tp"] = self._process_batch(predn, bbox, cls)
                 stat["tp_p"] = self._process_batch(predn, bbox, cls, pred_kpts, pbatch["kpts"])
-                if self.args.plots:
-                    self.confusion_matrix.process_batch(predn, bbox, cls)
+            if self.args.plots:
+                self.confusion_matrix.process_batch(predn, bbox, cls)
 
             for k in self.stats.keys():
                 self.stats[k].append(stat[k])
diff --git a/ultralytics/models/yolo/segment/val.py b/ultralytics/models/yolo/segment/val.py
index 30b63f979..d8fe4aae9 100644
--- a/ultralytics/models/yolo/segment/val.py
+++ b/ultralytics/models/yolo/segment/val.py
@@ -135,8 +135,8 @@ class SegmentationValidator(DetectionValidator):
                 stat["tp_m"] = self._process_batch(
                     predn, bbox, cls, pred_masks, gt_masks, self.args.overlap_mask, masks=True
                 )
-                if self.args.plots:
-                    self.confusion_matrix.process_batch(predn, bbox, cls)
+            if self.args.plots:
+                self.confusion_matrix.process_batch(predn, bbox, cls)
 
             for k in self.stats.keys():
                 self.stats[k].append(stat[k])
diff --git a/ultralytics/nn/autobackend.py b/ultralytics/nn/autobackend.py
index cef05a357..60b9f6389 100644
--- a/ultralytics/nn/autobackend.py
+++ b/ultralytics/nn/autobackend.py
@@ -123,6 +123,7 @@ class AutoBackend(nn.Module):
             paddle,
             mnn,
             ncnn,
+            imx,
             triton,
         ) = self._model_type(w)
         fp16 &= pt or jit or onnx or xml or engine or nn_module or triton  # FP16
@@ -182,8 +183,8 @@ class AutoBackend(nn.Module):
             check_requirements("opencv-python>=4.5.4")
             net = cv2.dnn.readNetFromONNX(w)
 
-        # ONNX Runtime
-        elif onnx:
+        # ONNX Runtime and IMX
+        elif onnx or imx:
             LOGGER.info(f"Loading {w} for ONNX Runtime inference...")
             check_requirements(("onnx", "onnxruntime-gpu" if cuda else "onnxruntime"))
             if IS_RASPBERRYPI or IS_JETSON:
@@ -199,7 +200,22 @@ class AutoBackend(nn.Module):
                 device = torch.device("cpu")
                 cuda = False
             LOGGER.info(f"Preferring ONNX Runtime {providers[0]}")
-            session = onnxruntime.InferenceSession(w, providers=providers)
+            if onnx:
+                session = onnxruntime.InferenceSession(w, providers=providers)
+            else:
+                check_requirements(
+                    ["model-compression-toolkit==2.1.1", "sony-custom-layers[torch]==0.2.0", "onnxruntime-extensions"]
+                )
+                w = next(Path(w).glob("*.onnx"))
+                LOGGER.info(f"Loading {w} for ONNX IMX inference...")
+                import mct_quantizers as mctq
+                from sony_custom_layers.pytorch.object_detection import nms_ort  # noqa
+
+                session = onnxruntime.InferenceSession(
+                    w, mctq.get_ort_session_options(), providers=["CPUExecutionProvider"]
+                )
+                task = "detect"
+
             output_names = [x.name for x in session.get_outputs()]
             metadata = session.get_modelmeta().custom_metadata_map
             dynamic = isinstance(session.get_outputs()[0].shape[0], str)
@@ -520,7 +536,7 @@ class AutoBackend(nn.Module):
             y = self.net.forward()
 
         # ONNX Runtime
-        elif self.onnx:
+        elif self.onnx or self.imx:
             if self.dynamic:
                 im = im.cpu().numpy()  # torch to numpy
                 y = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im})
@@ -537,6 +553,9 @@ class AutoBackend(nn.Module):
                 )
                 self.session.run_with_iobinding(self.io)
                 y = self.bindings
+            if self.imx:
+                # boxes, conf, cls
+                y = np.concatenate([y[0], y[1][:, :, None], y[2][:, :, None]], axis=-1)
 
         # OpenVINO
         elif self.xml:
diff --git a/ultralytics/nn/modules/block.py b/ultralytics/nn/modules/block.py
index 7208ea639..ddb21a74d 100644
--- a/ultralytics/nn/modules/block.py
+++ b/ultralytics/nn/modules/block.py
@@ -240,7 +240,8 @@ class C2f(nn.Module):
 
     def forward_split(self, x):
         """Forward pass using split() instead of chunk()."""
-        y = list(self.cv1(x).split((self.c, self.c), 1))
+        y = self.cv1(x).split((self.c, self.c), 1)
+        y = [y[0], y[1]]
         y.extend(m(y[-1]) for m in self.m)
         return self.cv2(torch.cat(y, 1))
 
@@ -279,8 +280,8 @@ class RepC3(nn.Module):
         """Initialize CSP Bottleneck with a single convolution using input channels, output channels, and number."""
         super().__init__()
         c_ = int(c2 * e)  # hidden channels
-        self.cv1 = Conv(c1, c2, 1, 1)
-        self.cv2 = Conv(c1, c2, 1, 1)
+        self.cv1 = Conv(c1, c_, 1, 1)
+        self.cv2 = Conv(c1, c_, 1, 1)
         self.m = nn.Sequential(*[RepConv(c_, c_) for _ in range(n)])
         self.cv3 = Conv(c_, c2, 1, 1) if c_ != c2 else nn.Identity()
 
diff --git a/ultralytics/nn/modules/conv.py b/ultralytics/nn/modules/conv.py
index aaa70f574..94c5b6c6d 100644
--- a/ultralytics/nn/modules/conv.py
+++ b/ultralytics/nn/modules/conv.py
@@ -50,7 +50,7 @@ class Conv(nn.Module):
         return self.act(self.bn(self.conv(x)))
 
     def forward_fuse(self, x):
-        """Perform transposed convolution of 2D data."""
+        """Apply convolution and activation without batch normalization."""
         return self.act(self.conv(x))
 
 
diff --git a/ultralytics/nn/modules/head.py b/ultralytics/nn/modules/head.py
index 84c31709c..29a1953e4 100644
--- a/ultralytics/nn/modules/head.py
+++ b/ultralytics/nn/modules/head.py
@@ -23,6 +23,7 @@ class Detect(nn.Module):
 
     dynamic = False  # force grid reconstruction
     export = False  # export mode
+    format = None  # export format
     end2end = False  # end2end
     max_det = 300  # max_det
     shape = None
@@ -101,7 +102,7 @@ class Detect(nn.Module):
         # Inference path
         shape = x[0].shape  # BCHW
         x_cat = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2)
-        if self.dynamic or self.shape != shape:
+        if self.format != "imx" and (self.dynamic or self.shape != shape):
             self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5))
             self.shape = shape
 
@@ -119,6 +120,11 @@ class Detect(nn.Module):
             grid_size = torch.tensor([grid_w, grid_h, grid_w, grid_h], device=box.device).reshape(1, 4, 1)
             norm = self.strides / (self.stride[0] * grid_size)
             dbox = self.decode_bboxes(self.dfl(box) * norm, self.anchors.unsqueeze(0) * norm[:, :2])
+        elif self.export and self.format == "imx":
+            dbox = self.decode_bboxes(
+                self.dfl(box) * self.strides, self.anchors.unsqueeze(0) * self.strides, xywh=False
+            )
+            return dbox.transpose(1, 2), cls.sigmoid().permute(0, 2, 1)
         else:
             dbox = self.decode_bboxes(self.dfl(box), self.anchors.unsqueeze(0)) * self.strides
 
@@ -137,9 +143,9 @@ class Detect(nn.Module):
                 a[-1].bias.data[:] = 1.0  # box
                 b[-1].bias.data[: m.nc] = math.log(5 / m.nc / (640 / s) ** 2)  # cls (.01 objects, 80 classes, 640 img)
 
-    def decode_bboxes(self, bboxes, anchors):
+    def decode_bboxes(self, bboxes, anchors, xywh=True):
         """Decode bounding boxes."""
-        return dist2bbox(bboxes, anchors, xywh=not self.end2end, dim=1)
+        return dist2bbox(bboxes, anchors, xywh=xywh and (not self.end2end), dim=1)
 
     @staticmethod
     def postprocess(preds: torch.Tensor, max_det: int, nc: int = 80):
diff --git a/ultralytics/nn/tasks.py b/ultralytics/nn/tasks.py
index 1e69a8f25..c1a24c344 100644
--- a/ultralytics/nn/tasks.py
+++ b/ultralytics/nn/tasks.py
@@ -960,10 +960,8 @@ def parse_model(d, ch, verbose=True):  # model_dict, input_channels(3)
         m = getattr(torch.nn, m[3:]) if "nn." in m else globals()[m]  # get module
         for j, a in enumerate(args):
             if isinstance(a, str):
-                try:
+                with contextlib.suppress(ValueError):
                     args[j] = locals()[a] if a in locals() else ast.literal_eval(a)
-                except ValueError:
-                    pass
         n = n_ = max(round(n * depth), 1) if n > 1 else n  # depth gain
         if m in {
             Classify,
@@ -1141,24 +1139,16 @@ def guess_model_task(model):
 
     # Guess from model cfg
     if isinstance(model, dict):
-        try:
+        with contextlib.suppress(Exception):
             return cfg2task(model)
-        except Exception:
-            pass
-
     # Guess from PyTorch model
     if isinstance(model, nn.Module):  # PyTorch model
         for x in "model.args", "model.model.args", "model.model.model.args":
-            try:
+            with contextlib.suppress(Exception):
                 return eval(x)["task"]
-            except Exception:
-                pass
         for x in "model.yaml", "model.model.yaml", "model.model.model.yaml":
-            try:
+            with contextlib.suppress(Exception):
                 return cfg2task(eval(x))
-            except Exception:
-                pass
-
         for m in model.modules():
             if isinstance(m, Segment):
                 return "segment"
diff --git a/ultralytics/solutions/__init__.py b/ultralytics/solutions/__init__.py
index 4446c1826..9de61edce 100644
--- a/ultralytics/solutions/__init__.py
+++ b/ultralytics/solutions/__init__.py
@@ -7,6 +7,7 @@ from .heatmap import Heatmap
 from .object_counter import ObjectCounter
 from .parking_management import ParkingManagement, ParkingPtsSelection
 from .queue_management import QueueManager
+from .region_counter import RegionCounter
 from .speed_estimation import SpeedEstimator
 from .streamlit_inference import inference
 
@@ -21,4 +22,5 @@ __all__ = (
     "SpeedEstimator",
     "Analytics",
     "inference",
+    "RegionCounter",
 )
diff --git a/ultralytics/solutions/analytics.py b/ultralytics/solutions/analytics.py
index aed7beed9..9be192448 100644
--- a/ultralytics/solutions/analytics.py
+++ b/ultralytics/solutions/analytics.py
@@ -54,7 +54,7 @@ class Analytics(BaseSolution):
         self.y_label = "Total Counts"
 
         # Predefined data
-        self.bg_color = "#00F344"  # background color of frame
+        self.bg_color = "#F3F3F3"  # background color of frame
         self.fg_color = "#111E68"  # foreground color of frame
         self.title = "Ultralytics Solutions"  # window name
         self.max_points = 45  # maximum points to be drawn on window
diff --git a/ultralytics/solutions/heatmap.py b/ultralytics/solutions/heatmap.py
index 39352a9bd..c9dd80879 100644
--- a/ultralytics/solutions/heatmap.py
+++ b/ultralytics/solutions/heatmap.py
@@ -104,12 +104,12 @@ class Heatmap(ObjectCounter):
                 self.annotator.draw_region(reg_pts=self.region, color=(104, 0, 123), thickness=self.line_width * 2)
                 self.store_tracking_history(track_id, box)  # Store track history
                 self.store_classwise_counts(cls)  # store classwise counts in dict
-
+                current_centroid = ((box[0] + box[2]) / 2, (box[1] + box[3]) / 2)
                 # Store tracking previous position and perform object counting
                 prev_position = None
                 if len(self.track_history[track_id]) > 1:
                     prev_position = self.track_history[track_id][-2]
-                self.count_objects(self.track_line, box, track_id, prev_position, cls)  # Perform object counting
+                self.count_objects(current_centroid, track_id, prev_position, cls)  # Perform object counting
 
         if self.region is not None:
             self.display_counts(im0)  # Display the counts on the frame
diff --git a/ultralytics/solutions/object_counter.py b/ultralytics/solutions/object_counter.py
index 637492073..d52acda7f 100644
--- a/ultralytics/solutions/object_counter.py
+++ b/ultralytics/solutions/object_counter.py
@@ -46,13 +46,12 @@ class ObjectCounter(BaseSolution):
         self.show_in = self.CFG["show_in"]
         self.show_out = self.CFG["show_out"]
 
-    def count_objects(self, track_line, box, track_id, prev_position, cls):
+    def count_objects(self, current_centroid, track_id, prev_position, cls):
         """
         Counts objects within a polygonal or linear region based on their tracks.
 
         Args:
-            track_line (Dict): Last 30 frame track record for the object.
-            box (List[float]): Bounding box coordinates [x1, y1, x2, y2] for the specific track in the current frame.
+            current_centroid (Tuple[float, float]): Current centroid values in the current frame.
             track_id (int): Unique identifier for the tracked object.
             prev_position (Tuple[float, float]): Last frame position coordinates (x, y) of the track.
             cls (int): Class index for classwise count updates.
@@ -64,34 +63,51 @@ class ObjectCounter(BaseSolution):
             >>> track_id = 1
             >>> prev_position = (120, 220)
             >>> cls = 0
-            >>> counter.count_objects(track_line, box, track_id, prev_position, cls)
+            >>> counter.count_objects(current_centroid, track_id, prev_position, cls)
         """
         if prev_position is None or track_id in self.counted_ids:
             return
 
-        centroid = self.r_s.centroid
-        dx = (box[0] - prev_position[0]) * (centroid.x - prev_position[0])
-        dy = (box[1] - prev_position[1]) * (centroid.y - prev_position[1])
-
-        if len(self.region) >= 3 and self.r_s.contains(self.Point(track_line[-1])):
-            self.counted_ids.append(track_id)
-            # For polygon region
-            if dx > 0:
-                self.in_count += 1
-                self.classwise_counts[self.names[cls]]["IN"] += 1
-            else:
-                self.out_count += 1
-                self.classwise_counts[self.names[cls]]["OUT"] += 1
-
-        elif len(self.region) < 3 and self.LineString([prev_position, box[:2]]).intersects(self.r_s):
-            self.counted_ids.append(track_id)
-            # For linear region
-            if dx > 0 and dy > 0:
-                self.in_count += 1
-                self.classwise_counts[self.names[cls]]["IN"] += 1
-            else:
-                self.out_count += 1
-                self.classwise_counts[self.names[cls]]["OUT"] += 1
+        if len(self.region) == 2:  # Linear region (defined as a line segment)
+            line = self.LineString(self.region)  # Check if the line intersects the trajectory of the object
+            if line.intersects(self.LineString([prev_position, current_centroid])):
+                # Determine orientation of the region (vertical or horizontal)
+                if abs(self.region[0][0] - self.region[1][0]) < abs(self.region[0][1] - self.region[1][1]):
+                    # Vertical region: Compare x-coordinates to determine direction
+                    if current_centroid[0] > prev_position[0]:  # Moving right
+                        self.in_count += 1
+                        self.classwise_counts[self.names[cls]]["IN"] += 1
+                    else:  # Moving left
+                        self.out_count += 1
+                        self.classwise_counts[self.names[cls]]["OUT"] += 1
+                # Horizontal region: Compare y-coordinates to determine direction
+                elif current_centroid[1] > prev_position[1]:  # Moving downward
+                    self.in_count += 1
+                    self.classwise_counts[self.names[cls]]["IN"] += 1
+                else:  # Moving upward
+                    self.out_count += 1
+                    self.classwise_counts[self.names[cls]]["OUT"] += 1
+                self.counted_ids.append(track_id)
+
+        elif len(self.region) > 2:  # Polygonal region
+            polygon = self.Polygon(self.region)
+            if polygon.contains(self.Point(current_centroid)):
+                # Determine motion direction for vertical or horizontal polygons
+                region_width = max(p[0] for p in self.region) - min(p[0] for p in self.region)
+                region_height = max(p[1] for p in self.region) - min(p[1] for p in self.region)
+
+                if (
+                    region_width < region_height
+                    and current_centroid[0] > prev_position[0]
+                    or region_width >= region_height
+                    and current_centroid[1] > prev_position[1]
+                ):  # Moving right
+                    self.in_count += 1
+                    self.classwise_counts[self.names[cls]]["IN"] += 1
+                else:  # Moving left
+                    self.out_count += 1
+                    self.classwise_counts[self.names[cls]]["OUT"] += 1
+                self.counted_ids.append(track_id)
 
     def store_classwise_counts(self, cls):
         """
@@ -174,12 +190,12 @@ class ObjectCounter(BaseSolution):
             self.annotator.draw_centroid_and_tracks(
                 self.track_line, color=colors(int(cls), True), track_thickness=self.line_width
             )
-
+            current_centroid = ((box[0] + box[2]) / 2, (box[1] + box[3]) / 2)
             # store previous position of track for object counting
             prev_position = None
             if len(self.track_history[track_id]) > 1:
                 prev_position = self.track_history[track_id][-2]
-            self.count_objects(self.track_line, box, track_id, prev_position, cls)  # Perform object counting
+            self.count_objects(current_centroid, track_id, prev_position, cls)  # Perform object counting
 
         self.display_counts(im0)  # Display the counts on the frame
         self.display_output(im0)  # display output with base class function
diff --git a/ultralytics/solutions/parking_management.py b/ultralytics/solutions/parking_management.py
index a62de9952..8b5d4922d 100644
--- a/ultralytics/solutions/parking_management.py
+++ b/ultralytics/solutions/parking_management.py
@@ -89,7 +89,7 @@ class ParkingPtsSelection:
         """Uploads and displays an image on the canvas, resizing it to fit within specified dimensions."""
         from PIL import Image, ImageTk  # scope because ImageTk requires tkinter package
 
-        self.image = Image.open(self.filedialog.askopenfilename(filetypes=[("Image Files", "*.png;*.jpg;*.jpeg")]))
+        self.image = Image.open(self.filedialog.askopenfilename(filetypes=[("Image Files", "*.png *.jpg *.jpeg")]))
         if not self.image:
             return
 
diff --git a/ultralytics/solutions/region_counter.py b/ultralytics/solutions/region_counter.py
new file mode 100644
index 000000000..03575100d
--- /dev/null
+++ b/ultralytics/solutions/region_counter.py
@@ -0,0 +1,112 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+
+from ultralytics.solutions.solutions import BaseSolution
+from ultralytics.utils.plotting import Annotator, colors
+
+
+class RegionCounter(BaseSolution):
+    """
+    A class designed for real-time counting of objects within user-defined regions in a video stream.
+
+    This class inherits from `BaseSolution` and offers functionalities to define polygonal regions in a video
+    frame, track objects, and count those objects that pass through each defined region. This makes it useful
+    for applications that require counting in specified areas, such as monitoring zones or segmented sections.
+
+    Attributes:
+        region_template (dict): A template for creating new counting regions with default attributes including
+                                the name, polygon coordinates, and display colors.
+        counting_regions (list): A list storing all defined regions, where each entry is based on `region_template`
+                                 and includes specific region settings like name, coordinates, and color.
+
+    Methods:
+        add_region: Adds a new counting region with specified attributes, such as the region's name, polygon points,
+                    region color, and text color.
+        count: Processes video frames to count objects in each region, drawing regions and displaying counts
+               on the frame. Handles object detection, region definition, and containment checks.
+    """
+
+    def __init__(self, **kwargs):
+        """Initializes the RegionCounter class for real-time counting in different regions of the video streams."""
+        super().__init__(**kwargs)
+        self.region_template = {
+            "name": "Default Region",
+            "polygon": None,
+            "counts": 0,
+            "dragging": False,
+            "region_color": (255, 255, 255),
+            "text_color": (0, 0, 0),
+        }
+        self.counting_regions = []
+
+    def add_region(self, name, polygon_points, region_color, text_color):
+        """
+        Adds a new region to the counting list based on the provided template with specific attributes.
+
+        Args:
+            name (str): Name assigned to the new region.
+            polygon_points (list[tuple]): List of (x, y) coordinates defining the region's polygon.
+            region_color (tuple): BGR color for region visualization.
+            text_color (tuple): BGR color for the text within the region.
+        """
+        region = self.region_template.copy()
+        region.update(
+            {
+                "name": name,
+                "polygon": self.Polygon(polygon_points),
+                "region_color": region_color,
+                "text_color": text_color,
+            }
+        )
+        self.counting_regions.append(region)
+
+    def count(self, im0):
+        """
+        Processes the input frame to detect and count objects within each defined region.
+
+        Args:
+            im0 (numpy.ndarray): Input image frame where objects and regions are annotated.
+
+        Returns:
+           im0 (numpy.ndarray): Processed image frame with annotated counting information.
+        """
+        self.annotator = Annotator(im0, line_width=self.line_width)
+        self.extract_tracks(im0)
+
+        # Region initialization and conversion
+        if self.region is None:
+            self.initialize_region()
+            regions = {"Region#01": self.region}
+        else:
+            regions = self.region if isinstance(self.region, dict) else {"Region#01": self.region}
+
+        # Draw regions and process counts for each defined area
+        for idx, (region_name, reg_pts) in enumerate(regions.items(), start=1):
+            color = colors(idx, True)
+            self.annotator.draw_region(reg_pts=reg_pts, color=color, thickness=self.line_width * 2)
+            self.add_region(region_name, reg_pts, color, self.annotator.get_txt_color())
+
+        # Prepare regions for containment check
+        for region in self.counting_regions:
+            region["prepared_polygon"] = self.prep(region["polygon"])
+
+        # Process bounding boxes and count objects within each region
+        for box, cls in zip(self.boxes, self.clss):
+            self.annotator.box_label(box, label=self.names[cls], color=colors(cls, True))
+            bbox_center = ((box[0] + box[2]) / 2, (box[1] + box[3]) / 2)
+
+            for region in self.counting_regions:
+                if region["prepared_polygon"].contains(self.Point(bbox_center)):
+                    region["counts"] += 1
+
+        # Display counts in each region
+        for region in self.counting_regions:
+            self.annotator.text_label(
+                region["polygon"].bounds,
+                label=str(region["counts"]),
+                color=region["region_color"],
+                txt_color=region["text_color"],
+            )
+            region["counts"] = 0  # Reset count for next frame
+
+        self.display_output(im0)
+        return im0
diff --git a/ultralytics/solutions/solutions.py b/ultralytics/solutions/solutions.py
index 20c2ce90b..fc05d42d6 100644
--- a/ultralytics/solutions/solutions.py
+++ b/ultralytics/solutions/solutions.py
@@ -50,10 +50,12 @@ class BaseSolution:
         """
         check_requirements("shapely>=2.0.0")
         from shapely.geometry import LineString, Point, Polygon
+        from shapely.prepared import prep
 
         self.LineString = LineString
         self.Polygon = Polygon
         self.Point = Point
+        self.prep = prep
 
         # Load config and update with args
         DEFAULT_SOL_DICT.update(kwargs)
@@ -72,14 +74,13 @@ class BaseSolution:
         self.model = YOLO(self.CFG["model"])
         self.names = self.model.names
 
-        if IS_CLI:  # for CLI, download the source and init video writer
-            if self.CFG["source"] is None:
-                d_s = "solutions_ci_demo.mp4" if "-pose" not in self.CFG["model"] else "solution_ci_pose_demo.mp4"
-                LOGGER.warning(f"⚠️ WARNING: source not provided. using default source {ASSETS_URL}/{d_s}")
-                from ultralytics.utils.downloads import safe_download
+        if IS_CLI and self.CFG["source"] is None:
+            d_s = "solutions_ci_demo.mp4" if "-pose" not in self.CFG["model"] else "solution_ci_pose_demo.mp4"
+            LOGGER.warning(f"⚠️ WARNING: source not provided. using default source {ASSETS_URL}/{d_s}")
+            from ultralytics.utils.downloads import safe_download
 
-                safe_download(f"{ASSETS_URL}/{d_s}")  # download source from ultralytics assets
-                self.CFG["source"] = d_s  # set default source
+            safe_download(f"{ASSETS_URL}/{d_s}")  # download source from ultralytics assets
+            self.CFG["source"] = d_s  # set default source
 
         # Initialize environment and region setup
         self.env_check = check_imshow(warn=True)
@@ -134,7 +135,7 @@ class BaseSolution:
     def initialize_region(self):
         """Initialize the counting region and line segment based on configuration settings."""
         if self.region is None:
-            self.region = [(20, 400), (1080, 404), (1080, 360), (20, 360)]
+            self.region = [(20, 400), (1080, 400), (1080, 360), (20, 360)]
         self.r_s = (
             self.Polygon(self.region) if len(self.region) >= 3 else self.LineString(self.region)
         )  # region or line
diff --git a/ultralytics/trackers/basetrack.py b/ultralytics/trackers/basetrack.py
index f3baaf4e4..c78ee3595 100644
--- a/ultralytics/trackers/basetrack.py
+++ b/ultralytics/trackers/basetrack.py
@@ -44,7 +44,7 @@ class BaseTrack:
         start_frame (int): The frame number where tracking started.
         frame_id (int): The most recent frame ID processed by the track.
         time_since_update (int): Frames passed since the last update.
-        location (Tuple): The location of the object in the context of multi-camera tracking.
+        location (tuple): The location of the object in the context of multi-camera tracking.
 
     Methods:
         end_frame: Returns the ID of the last frame where the object was tracked.
diff --git a/ultralytics/trackers/utils/matching.py b/ultralytics/trackers/utils/matching.py
index f969f1126..b062d938e 100644
--- a/ultralytics/trackers/utils/matching.py
+++ b/ultralytics/trackers/utils/matching.py
@@ -27,10 +27,9 @@ def linear_assignment(cost_matrix: np.ndarray, thresh: float, use_lap: bool = Tr
         use_lap (bool): Use lap.lapjv for the assignment. If False, scipy.optimize.linear_sum_assignment is used.
 
     Returns:
-        (tuple): A tuple containing:
-            - matched_indices (np.ndarray): Array of matched indices of shape (K, 2), where K is the number of matches.
-            - unmatched_a (np.ndarray): Array of unmatched indices from the first set, with shape (L,).
-            - unmatched_b (np.ndarray): Array of unmatched indices from the second set, with shape (M,).
+        matched_indices (np.ndarray): Array of matched indices of shape (K, 2), where K is the number of matches.
+        unmatched_a (np.ndarray): Array of unmatched indices from the first set, with shape (L,).
+        unmatched_b (np.ndarray): Array of unmatched indices from the second set, with shape (M,).
 
     Examples:
         >>> cost_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
diff --git a/ultralytics/utils/__init__.py b/ultralytics/utils/__init__.py
index a2540c6b8..f02e148bf 100644
--- a/ultralytics/utils/__init__.py
+++ b/ultralytics/utils/__init__.py
@@ -607,13 +607,12 @@ def is_raspberrypi() -> bool:
 
 def is_jetson() -> bool:
     """
-    Determines if the Python environment is running on a Jetson Nano or Jetson Orin device by checking the device model
-    information.
+    Determines if the Python environment is running on an NVIDIA Jetson device by checking the device model information.
 
     Returns:
-        (bool): True if running on a Jetson Nano or Jetson Orin, False otherwise.
+        (bool): True if running on an NVIDIA Jetson device, False otherwise.
     """
-    return "NVIDIA" in PROC_DEVICE_MODEL  # i.e. "NVIDIA Jetson Nano" or "NVIDIA Orin NX"
+    return any(keyword in PROC_DEVICE_MODEL.lower() for keyword in ("nvidia", "jetson"))
 
 
 def is_online() -> bool:
@@ -1255,9 +1254,12 @@ class SettingsManager(JSONDict):
         self.update(self.defaults)
 
 
-def deprecation_warn(arg, new_arg):
+def deprecation_warn(arg, new_arg=None):
     """Issue a deprecation warning when a deprecated argument is used, suggesting an updated argument."""
-    LOGGER.warning(f"WARNING ⚠️ '{arg}' is deprecated and will be removed in in the future. Use '{new_arg}' instead.")
+    msg = f"WARNING ⚠️ '{arg}' is deprecated and will be removed in in the future."
+    if new_arg is not None:
+        msg += f" Use '{new_arg}' instead."
+    LOGGER.warning(msg)
 
 
 def clean_url(url):
diff --git a/ultralytics/utils/autobatch.py b/ultralytics/utils/autobatch.py
index 6a0d9cbc2..0c3e8e4bd 100644
--- a/ultralytics/utils/autobatch.py
+++ b/ultralytics/utils/autobatch.py
@@ -11,7 +11,7 @@ from ultralytics.utils import DEFAULT_CFG, LOGGER, colorstr
 from ultralytics.utils.torch_utils import autocast, profile
 
 
-def check_train_batch_size(model, imgsz=640, amp=True, batch=-1):
+def check_train_batch_size(model, imgsz=640, amp=True, batch=-1, max_num_obj=1):
     """
     Compute optimal YOLO training batch size using the autobatch() function.
 
@@ -20,6 +20,7 @@ def check_train_batch_size(model, imgsz=640, amp=True, batch=-1):
         imgsz (int, optional): Image size used for training.
         amp (bool, optional): Use automatic mixed precision if True.
         batch (float, optional): Fraction of GPU memory to use. If -1, use default.
+        max_num_obj (int, optional): The maximum number of objects from dataset.
 
     Returns:
         (int): Optimal batch size computed using the autobatch() function.
@@ -29,10 +30,12 @@ def check_train_batch_size(model, imgsz=640, amp=True, batch=-1):
         Otherwise, a default fraction of 0.6 is used.
     """
     with autocast(enabled=amp):
-        return autobatch(deepcopy(model).train(), imgsz, fraction=batch if 0.0 < batch < 1.0 else 0.6)
+        return autobatch(
+            deepcopy(model).train(), imgsz, fraction=batch if 0.0 < batch < 1.0 else 0.6, max_num_obj=max_num_obj
+        )
 
 
-def autobatch(model, imgsz=640, fraction=0.60, batch_size=DEFAULT_CFG.batch):
+def autobatch(model, imgsz=640, fraction=0.60, batch_size=DEFAULT_CFG.batch, max_num_obj=1):
     """
     Automatically estimate the best YOLO batch size to use a fraction of the available CUDA memory.
 
@@ -41,6 +44,7 @@ def autobatch(model, imgsz=640, fraction=0.60, batch_size=DEFAULT_CFG.batch):
         imgsz (int, optional): The image size used as input for the YOLO model. Defaults to 640.
         fraction (float, optional): The fraction of available CUDA memory to use. Defaults to 0.60.
         batch_size (int, optional): The default batch size to use if an error is detected. Defaults to 16.
+        max_num_obj (int, optional): The maximum number of objects from dataset.
 
     Returns:
         (int): The optimal batch size.
@@ -70,7 +74,7 @@ def autobatch(model, imgsz=640, fraction=0.60, batch_size=DEFAULT_CFG.batch):
     batch_sizes = [1, 2, 4, 8, 16] if t < 16 else [1, 2, 4, 8, 16, 32, 64]
     try:
         img = [torch.empty(b, 3, imgsz, imgsz) for b in batch_sizes]
-        results = profile(img, model, n=1, device=device)
+        results = profile(img, model, n=1, device=device, max_num_obj=max_num_obj)
 
         # Fit a solution
         y = [x[2] for x in results if x]  # memory [2]
diff --git a/ultralytics/utils/benchmarks.py b/ultralytics/utils/benchmarks.py
index 13d940780..e65d12887 100644
--- a/ultralytics/utils/benchmarks.py
+++ b/ultralytics/utils/benchmarks.py
@@ -114,10 +114,13 @@ def benchmark(
                 assert LINUX or MACOS, "Windows Paddle exports not supported yet"
             if i == 12:  # MNN
                 assert not isinstance(model, YOLOWorld), "YOLOWorldv2 MNN exports not supported yet"
-                assert not IS_RASPBERRYPI, "MNN export not supported on Raspberry Pi"
-                assert not IS_JETSON, "MNN export not supported on NVIDIA Jetson"
             if i == 13:  # NCNN
                 assert not isinstance(model, YOLOWorld), "YOLOWorldv2 NCNN exports not supported yet"
+            if i == 14:  # IMX
+                assert not is_end2end
+                assert not isinstance(model, YOLOWorld), "YOLOWorldv2 IMX exports not supported"
+                assert model.task == "detect", "IMX only supported for detection task"
+                assert "C2f" in model.__str__(), "IMX only supported for YOLOv8"
             if "cpu" in device.type:
                 assert cpu, "inference not supported on CPU"
             if "cuda" in device.type:
diff --git a/ultralytics/utils/callbacks/comet.py b/ultralytics/utils/callbacks/comet.py
index 3fae97f91..ef6e88c3f 100644
--- a/ultralytics/utils/callbacks/comet.py
+++ b/ultralytics/utils/callbacks/comet.py
@@ -291,7 +291,7 @@ def _log_plots(experiment, trainer):
             for plots in EVALUATION_PLOT_NAMES
             for prefix in POSE_METRICS_PLOT_PREFIX
         ]
-    elif isinstance(trainer.validator.metrics, DetMetrics) or isinstance(trainer.validator.metrics, OBBMetrics):
+    elif isinstance(trainer.validator.metrics, (DetMetrics, OBBMetrics)):
         plot_filenames = [trainer.save_dir / f"{plots}.png" for plots in EVALUATION_PLOT_NAMES]
 
     if plot_filenames is not None:
diff --git a/ultralytics/utils/callbacks/raytune.py b/ultralytics/utils/callbacks/raytune.py
index 1a368db66..d92dc3221 100644
--- a/ultralytics/utils/callbacks/raytune.py
+++ b/ultralytics/utils/callbacks/raytune.py
@@ -16,8 +16,7 @@ def on_fit_epoch_end(trainer):
     """Sends training metrics to Ray Tune at end of each epoch."""
     if ray.train._internal.session._get_session():  # replacement for deprecated ray.tune.is_session_enabled()
         metrics = trainer.metrics
-        metrics["epoch"] = trainer.epoch
-        session.report(metrics)
+        session.report({**metrics, **{"epoch": trainer.epoch + 1}})
 
 
 callbacks = (
diff --git a/ultralytics/utils/callbacks/wb.py b/ultralytics/utils/callbacks/wb.py
index b82b8d85e..0898a2933 100644
--- a/ultralytics/utils/callbacks/wb.py
+++ b/ultralytics/utils/callbacks/wb.py
@@ -109,7 +109,12 @@ def _log_plots(plots, step):
 
 def on_pretrain_routine_start(trainer):
     """Initiate and start project if module is present."""
-    wb.run or wb.init(project=trainer.args.project or "Ultralytics", name=trainer.args.name, config=vars(trainer.args))
+    if not wb.run:
+        wb.init(
+            project=str(trainer.args.project).replace("/", "-") if trainer.args.project else "Ultralytics",
+            name=str(trainer.args.name).replace("/", "-"),
+            config=vars(trainer.args),
+        )
 
 
 def on_fit_epoch_end(trainer):
@@ -138,7 +143,7 @@ def on_train_end(trainer):
         art.add_file(trainer.best)
         wb.run.log_artifact(art, aliases=["best"])
     # Check if we actually have plots to save
-    if trainer.args.plots:
+    if trainer.args.plots and hasattr(trainer.validator.metrics, "curves_results"):
         for curve_name, curve_values in zip(trainer.validator.metrics.curves, trainer.validator.metrics.curves_results):
             x, y, x_title, y_title = curve_values
             _plot_curve(
diff --git a/ultralytics/utils/loss.py b/ultralytics/utils/loss.py
index 94038aefe..739528689 100644
--- a/ultralytics/utils/loss.py
+++ b/ultralytics/utils/loss.py
@@ -552,9 +552,8 @@ class v8PoseLoss(v8DetectionLoss):
             pred_kpts (torch.Tensor): Predicted keypoints, shape (BS, N_anchors, N_kpts_per_object, kpts_dim).
 
         Returns:
-            (tuple): Returns a tuple containing:
-                - kpts_loss (torch.Tensor): The keypoints loss.
-                - kpts_obj_loss (torch.Tensor): The keypoints object loss.
+            kpts_loss (torch.Tensor): The keypoints loss.
+            kpts_obj_loss (torch.Tensor): The keypoints object loss.
         """
         batch_idx = batch_idx.flatten()
         batch_size = len(masks)
diff --git a/ultralytics/utils/metrics.py b/ultralytics/utils/metrics.py
index 2b80c02fe..bb521f5c3 100644
--- a/ultralytics/utils/metrics.py
+++ b/ultralytics/utils/metrics.py
@@ -549,19 +549,18 @@ def ap_per_class(
         prefix (str, optional): A prefix string for saving the plot files. Defaults to an empty string.
 
     Returns:
-        (tuple): A tuple of six arrays and one array of unique classes, where:
-            tp (np.ndarray): True positive counts at threshold given by max F1 metric for each class.Shape: (nc,).
-            fp (np.ndarray): False positive counts at threshold given by max F1 metric for each class. Shape: (nc,).
-            p (np.ndarray): Precision values at threshold given by max F1 metric for each class. Shape: (nc,).
-            r (np.ndarray): Recall values at threshold given by max F1 metric for each class. Shape: (nc,).
-            f1 (np.ndarray): F1-score values at threshold given by max F1 metric for each class. Shape: (nc,).
-            ap (np.ndarray): Average precision for each class at different IoU thresholds. Shape: (nc, 10).
-            unique_classes (np.ndarray): An array of unique classes that have data. Shape: (nc,).
-            p_curve (np.ndarray): Precision curves for each class. Shape: (nc, 1000).
-            r_curve (np.ndarray): Recall curves for each class. Shape: (nc, 1000).
-            f1_curve (np.ndarray): F1-score curves for each class. Shape: (nc, 1000).
-            x (np.ndarray): X-axis values for the curves. Shape: (1000,).
-            prec_values: Precision values at mAP@0.5 for each class. Shape: (nc, 1000).
+        tp (np.ndarray): True positive counts at threshold given by max F1 metric for each class.Shape: (nc,).
+        fp (np.ndarray): False positive counts at threshold given by max F1 metric for each class. Shape: (nc,).
+        p (np.ndarray): Precision values at threshold given by max F1 metric for each class. Shape: (nc,).
+        r (np.ndarray): Recall values at threshold given by max F1 metric for each class. Shape: (nc,).
+        f1 (np.ndarray): F1-score values at threshold given by max F1 metric for each class. Shape: (nc,).
+        ap (np.ndarray): Average precision for each class at different IoU thresholds. Shape: (nc, 10).
+        unique_classes (np.ndarray): An array of unique classes that have data. Shape: (nc,).
+        p_curve (np.ndarray): Precision curves for each class. Shape: (nc, 1000).
+        r_curve (np.ndarray): Recall curves for each class. Shape: (nc, 1000).
+        f1_curve (np.ndarray): F1-score curves for each class. Shape: (nc, 1000).
+        x (np.ndarray): X-axis values for the curves. Shape: (1000,).
+        prec_values (np.ndarray): Precision values at mAP@0.5 for each class. Shape: (nc, 1000).
     """
     # Sort by objectness
     i = np.argsort(-conf)
diff --git a/ultralytics/utils/ops.py b/ultralytics/utils/ops.py
index b76168f95..25e83c61c 100644
--- a/ultralytics/utils/ops.py
+++ b/ultralytics/utils/ops.py
@@ -317,11 +317,11 @@ def clip_boxes(boxes, shape):
     Takes a list of bounding boxes and a shape (height, width) and clips the bounding boxes to the shape.
 
     Args:
-        boxes (torch.Tensor): the bounding boxes to clip
-        shape (tuple): the shape of the image
+        boxes (torch.Tensor): The bounding boxes to clip.
+        shape (tuple): The shape of the image.
 
     Returns:
-        (torch.Tensor | numpy.ndarray): Clipped boxes
+        (torch.Tensor | numpy.ndarray): The clipped boxes.
     """
     if isinstance(boxes, torch.Tensor):  # faster individually (WARNING: inplace .clamp_() Apple MPS bug)
         boxes[..., 0] = boxes[..., 0].clamp(0, shape[1])  # x1
@@ -359,9 +359,9 @@ def scale_image(masks, im0_shape, ratio_pad=None):
     Takes a mask, and resizes it to the original image size.
 
     Args:
-        masks (np.ndarray): resized and padded masks/images, [h, w, num]/[h, w, 3].
-        im0_shape (tuple): the original image shape
-        ratio_pad (tuple): the ratio of the padding to the original image.
+        masks (np.ndarray): Resized and padded masks/images, [h, w, num]/[h, w, 3].
+        im0_shape (tuple): The original image shape.
+        ratio_pad (tuple): The ratio of the padding to the original image.
 
     Returns:
         masks (np.ndarray): The masks that are being returned with shape [h, w, num].
@@ -692,12 +692,12 @@ def process_mask_native(protos, masks_in, bboxes, shape):
 
     Args:
         protos (torch.Tensor): [mask_dim, mask_h, mask_w]
-        masks_in (torch.Tensor): [n, mask_dim], n is number of masks after nms
-        bboxes (torch.Tensor): [n, 4], n is number of masks after nms
-        shape (tuple): the size of the input image (h,w)
+        masks_in (torch.Tensor): [n, mask_dim], n is number of masks after nms.
+        bboxes (torch.Tensor): [n, 4], n is number of masks after nms.
+        shape (tuple): The size of the input image (h,w).
 
     Returns:
-        masks (torch.Tensor): The returned masks with dimensions [h, w, n]
+        masks (torch.Tensor): The returned masks with dimensions [h, w, n].
     """
     c, mh, mw = protos.shape  # CHW
     masks = (masks_in @ protos.float().view(c, -1)).view(-1, mh, mw)
@@ -783,23 +783,29 @@ def regularize_rboxes(rboxes):
     return torch.stack([x, y, w_, h_, t], dim=-1)  # regularized boxes
 
 
-def masks2segments(masks, strategy="largest"):
+def masks2segments(masks, strategy="all"):
     """
     It takes a list of masks(n,h,w) and returns a list of segments(n,xy).
 
     Args:
         masks (torch.Tensor): the output of the model, which is a tensor of shape (batch_size, 160, 160)
-        strategy (str): 'concat' or 'largest'. Defaults to largest
+        strategy (str): 'all' or 'largest'. Defaults to all
 
     Returns:
         segments (List): list of segment masks
     """
+    from ultralytics.data.converter import merge_multi_segment
+
     segments = []
     for x in masks.int().cpu().numpy().astype("uint8"):
         c = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0]
         if c:
-            if strategy == "concat":  # concatenate all segments
-                c = np.concatenate([x.reshape(-1, 2) for x in c])
+            if strategy == "all":  # merge and concatenate all segments
+                c = (
+                    np.concatenate(merge_multi_segment([x.reshape(-1, 2) for x in c]))
+                    if len(c) > 1
+                    else c[0].reshape(-1, 2)
+                )
             elif strategy == "largest":  # select largest segment
                 c = np.array(c[np.array([len(x) for x in c]).argmax()]).reshape(-1, 2)
         else:
diff --git a/ultralytics/utils/plotting.py b/ultralytics/utils/plotting.py
index 6e257634d..f4514247c 100644
--- a/ultralytics/utils/plotting.py
+++ b/ultralytics/utils/plotting.py
@@ -584,8 +584,8 @@ class Annotator:
         Displays queue counts on an image centered at the points with customizable font size and colors.
 
         Args:
-            label (str): queue counts label
-            points (tuple): region points for center point calculation to display text
+            label (str): Queue counts label.
+            points (tuple): Region points for center point calculation to display text.
             region_color (tuple): RGB queue region color.
             txt_color (tuple): RGB text display color.
         """
@@ -624,13 +624,13 @@ class Annotator:
         Display the bounding boxes labels in parking management app.
 
         Args:
-            im0 (ndarray): inference image
-            text (str): object/class name
-            txt_color (tuple): display color for text foreground
-            bg_color (tuple): display color for text background
-            x_center (float): x position center point for bounding box
-            y_center (float): y position center point for bounding box
-            margin (int): gap between text and rectangle for better display
+            im0 (ndarray): Inference image.
+            text (str): Object/class name.
+            txt_color (tuple): Display color for text foreground.
+            bg_color (tuple): Display color for text background.
+            x_center (float): The x position center point for bounding box.
+            y_center (float): The y position center point for bounding box.
+            margin (int): The gap between text and rectangle for better display.
         """
         text_size = cv2.getTextSize(text, 0, fontScale=self.sf, thickness=self.tf)[0]
         text_x = x_center - text_size[0] // 2
@@ -648,11 +648,11 @@ class Annotator:
         Display the overall statistics for parking lots.
 
         Args:
-            im0 (ndarray): inference image
-            text (dict): labels dictionary
-            txt_color (tuple): display color for text foreground
-            bg_color (tuple): display color for text background
-            margin (int): gap between text and rectangle for better display
+            im0 (ndarray): Inference image.
+            text (dict): Labels dictionary.
+            txt_color (tuple): Display color for text foreground.
+            bg_color (tuple): Display color for text background.
+            margin (int): Gap between text and rectangle for better display.
         """
         horizontal_gap = int(im0.shape[1] * 0.02)
         vertical_gap = int(im0.shape[0] * 0.01)
diff --git a/ultralytics/utils/tal.py b/ultralytics/utils/tal.py
index 74604eda2..eec2a3b2d 100644
--- a/ultralytics/utils/tal.py
+++ b/ultralytics/utils/tal.py
@@ -3,6 +3,7 @@
 import torch
 import torch.nn as nn
 
+from . import LOGGER
 from .checks import check_version
 from .metrics import bbox_iou, probiou
 from .ops import xywhr2xyxyxyxy
@@ -58,17 +59,46 @@ class TaskAlignedAssigner(nn.Module):
         """
         self.bs = pd_scores.shape[0]
         self.n_max_boxes = gt_bboxes.shape[1]
+        device = gt_bboxes.device
 
         if self.n_max_boxes == 0:
-            device = gt_bboxes.device
             return (
-                torch.full_like(pd_scores[..., 0], self.bg_idx).to(device),
-                torch.zeros_like(pd_bboxes).to(device),
-                torch.zeros_like(pd_scores).to(device),
-                torch.zeros_like(pd_scores[..., 0]).to(device),
-                torch.zeros_like(pd_scores[..., 0]).to(device),
+                torch.full_like(pd_scores[..., 0], self.bg_idx),
+                torch.zeros_like(pd_bboxes),
+                torch.zeros_like(pd_scores),
+                torch.zeros_like(pd_scores[..., 0]),
+                torch.zeros_like(pd_scores[..., 0]),
             )
 
+        try:
+            return self._forward(pd_scores, pd_bboxes, anc_points, gt_labels, gt_bboxes, mask_gt)
+        except torch.OutOfMemoryError:
+            # Move tensors to CPU, compute, then move back to original device
+            LOGGER.warning("WARNING: CUDA OutOfMemoryError in TaskAlignedAssigner, using CPU")
+            cpu_tensors = [t.cpu() for t in (pd_scores, pd_bboxes, anc_points, gt_labels, gt_bboxes, mask_gt)]
+            result = self._forward(*cpu_tensors)
+            return tuple(t.to(device) for t in result)
+
+    def _forward(self, pd_scores, pd_bboxes, anc_points, gt_labels, gt_bboxes, mask_gt):
+        """
+        Compute the task-aligned assignment. Reference code is available at
+        https://github.com/Nioolek/PPYOLOE_pytorch/blob/master/ppyoloe/assigner/tal_assigner.py.
+
+        Args:
+            pd_scores (Tensor): shape(bs, num_total_anchors, num_classes)
+            pd_bboxes (Tensor): shape(bs, num_total_anchors, 4)
+            anc_points (Tensor): shape(num_total_anchors, 2)
+            gt_labels (Tensor): shape(bs, n_max_boxes, 1)
+            gt_bboxes (Tensor): shape(bs, n_max_boxes, 4)
+            mask_gt (Tensor): shape(bs, n_max_boxes, 1)
+
+        Returns:
+            target_labels (Tensor): shape(bs, num_total_anchors)
+            target_bboxes (Tensor): shape(bs, num_total_anchors, 4)
+            target_scores (Tensor): shape(bs, num_total_anchors, num_classes)
+            fg_mask (Tensor): shape(bs, num_total_anchors)
+            target_gt_idx (Tensor): shape(bs, num_total_anchors)
+        """
         mask_pos, align_metric, overlaps = self.get_pos_mask(
             pd_scores, pd_bboxes, gt_labels, gt_bboxes, anc_points, mask_gt
         )
@@ -306,7 +336,7 @@ def make_anchors(feats, strides, grid_cell_offset=0.5):
     assert feats is not None
     dtype, device = feats[0].dtype, feats[0].device
     for i, stride in enumerate(strides):
-        _, _, h, w = feats[i].shape
+        h, w = feats[i].shape[2:] if isinstance(feats, list) else (int(feats[i][0]), int(feats[i][1]))
         sx = torch.arange(end=w, device=device, dtype=dtype) + grid_cell_offset  # shift x
         sy = torch.arange(end=h, device=device, dtype=dtype) + grid_cell_offset  # shift y
         sy, sx = torch.meshgrid(sy, sx, indexing="ij") if TORCH_1_10 else torch.meshgrid(sy, sx)
diff --git a/ultralytics/utils/torch_utils.py b/ultralytics/utils/torch_utils.py
index 0dbc728e2..b413297be 100644
--- a/ultralytics/utils/torch_utils.py
+++ b/ultralytics/utils/torch_utils.py
@@ -623,7 +623,7 @@ def convert_optimizer_state_dict_to_fp16(state_dict):
     return state_dict
 
 
-def profile(input, ops, n=10, device=None):
+def profile(input, ops, n=10, device=None, max_num_obj=0):
     """
     Ultralytics speed, memory and FLOPs profiler.
 
@@ -671,6 +671,14 @@ def profile(input, ops, n=10, device=None):
                         t[2] = float("nan")
                     tf += (t[1] - t[0]) * 1000 / n  # ms per op forward
                     tb += (t[2] - t[1]) * 1000 / n  # ms per op backward
+                    if max_num_obj:  # simulate training with predictions per image grid (for AutoBatch)
+                        torch.randn(
+                            x.shape[0],
+                            max_num_obj,
+                            int(sum((x.shape[-1] / s) * (x.shape[-2] / s) for s in m.stride.tolist())),
+                            device=device,
+                            dtype=torch.float32,
+                        )
                 mem = torch.cuda.memory_reserved() / 1e9 if torch.cuda.is_available() else 0  # (GB)
                 s_in, s_out = (tuple(x.shape) if isinstance(x, torch.Tensor) else "list" for x in (x, y))  # shapes
                 p = sum(x.numel() for x in m.parameters()) if isinstance(m, nn.Module) else 0  # parameters
@@ -729,3 +737,48 @@ class EarlyStopping:
                 f"i.e. `patience=300` or use `patience=0` to disable EarlyStopping."
             )
         return stop
+
+
+class FXModel(nn.Module):
+    """
+    A custom model class for torch.fx compatibility.
+
+    This class extends `torch.nn.Module` and is designed to ensure compatibility with torch.fx for tracing and graph manipulation.
+    It copies attributes from an existing model and explicitly sets the model attribute to ensure proper copying.
+
+    Args:
+        model (torch.nn.Module): The original model to wrap for torch.fx compatibility.
+    """
+
+    def __init__(self, model):
+        """
+        Initialize the FXModel.
+
+        Args:
+            model (torch.nn.Module): The original model to wrap for torch.fx compatibility.
+        """
+        super().__init__()
+        copy_attr(self, model)
+        # Explicitly set `model` since `copy_attr` somehow does not copy it.
+        self.model = model.model
+
+    def forward(self, x):
+        """
+        Forward pass through the model.
+
+        This method performs the forward pass through the model, handling the dependencies between layers and saving intermediate outputs.
+
+        Args:
+            x (torch.Tensor): The input tensor to the model.
+
+        Returns:
+            (torch.Tensor): The output tensor from the model.
+        """
+        y = []  # outputs
+        for m in self.model:
+            if m.f != -1:  # if not from previous layer
+                # from earlier layers
+                x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f]
+            x = m(x)  # run
+            y.append(x)  # save output
+        return x