diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index e0eff35cb8..9d7031e98e 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -146,6 +146,8 @@ jobs:
           flags: Benchmarks
         env:
           CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
+      - name: Prune uv Cache
+        run: uv cache prune --ci
       - name: Benchmark Summary
         run: |
           cat benchmarks.log
@@ -203,6 +205,8 @@ jobs:
           flags: Tests
         env:
           CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
+      - name: Prune uv Cache
+        run: uv cache prune --ci
 
   GPU:
     if: github.repository == 'ultralytics/ultralytics' && (github.event_name != 'workflow_dispatch' || github.event.inputs.gpu == 'true')
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index 5b0c7a96d3..40b7efc186 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -23,13 +23,15 @@ on:
     inputs:
       publish_docs:
         description: "Publish live to https://docs.ultralytics.com"
-        default: "true"
+        default: true
         type: boolean
 
 jobs:
   Docs:
     if: github.repository == 'ultralytics/ultralytics'
     runs-on: ubuntu-latest
+    env:
+      GITHUB_REF: ${{ github.head_ref || github.ref }}
     steps:
       - name: Git config
         run: |
@@ -40,7 +42,7 @@ jobs:
         with:
           repository: ${{ github.event.pull_request.head.repo.full_name || github.repository }}
           token: ${{ secrets._GITHUB_TOKEN || secrets.GITHUB_TOKEN }}
-          ref: ${{ github.head_ref || github.ref }}
+          ref: ${{ env.GITHUB_REF }}
           fetch-depth: 0
       - name: Set up Python
         uses: actions/setup-python@v5
@@ -56,7 +58,7 @@ jobs:
         continue-on-error: true
         run: |
           python docs/build_reference.py
-          git pull origin ${{ github.head_ref || github.ref }}
+          git pull origin "$GITHUB_REF"          
           git add .
           git reset HEAD -- .github/workflows/  # workflow changes are not permitted with default token
           if ! git diff --staged --quiet; then
@@ -75,7 +77,7 @@ jobs:
         continue-on-error: true
         if: always()
         run: |
-          git pull origin ${{ github.head_ref || github.ref }}
+          git pull origin "$GITHUB_REF"
           git add --update  # only add updated files
           git reset HEAD -- .github/workflows/  # workflow changes are not permitted with default token
           if ! git diff --staged --quiet; then
diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml
index 84f39b7ebd..c98b876265 100644
--- a/.github/workflows/format.yml
+++ b/.github/workflows/format.yml
@@ -9,7 +9,7 @@ on:
     types: [opened, edited]
   discussion:
     types: [created]
-  pull_request_target:
+  pull_request:
     branches: [main]
     types: [opened, closed, synchronize, review_requested]
 
@@ -20,7 +20,7 @@ jobs:
       - name: Run Ultralytics Formatting
         uses: ultralytics/actions@main
         with:
-          token: ${{ secrets._GITHUB_TOKEN }} # note GITHUB_TOKEN automatically generated
+          token: ${{ secrets._GITHUB_TOKEN || secrets.GITHUB_TOKEN}}
           labels: true # autolabel issues and PRs
           python: true # format Python code and docstrings
           prettier: true # format YAML, JSON, Markdown and CSS
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index b1dd1e4350..ba736048d1 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -13,34 +13,23 @@ on:
         description: Publish to PyPI
 
 jobs:
-  publish:
+  check:
     if: github.repository == 'ultralytics/ultralytics' && github.actor == 'glenn-jocher'
-    name: Publish
     runs-on: ubuntu-latest
-    environment: # for GitHub Deployments tab
-      name: Release - PyPI
-      url: https://pypi.org/p/ultralytics
     permissions:
-      id-token: write # for PyPI trusted publishing
+      contents: write
+    outputs:
+      increment: ${{ steps.check_pypi.outputs.increment }}
+      current_tag: ${{ steps.check_pypi.outputs.current_tag }}
+      previous_tag: ${{ steps.check_pypi.outputs.previous_tag }}
     steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-        with:
-          token: ${{ secrets._GITHUB_TOKEN }}
-      - name: Git config
-        run: |
-          git config --global user.name "UltralyticsAssistant"
-          git config --global user.email "web@ultralytics.com"
-      - name: Set up Python environment
-        uses: actions/setup-python@v5
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
         with:
           python-version: "3.x"
-          cache: "pip"
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip wheel
-          pip install ultralytics-actions build twine toml
-      - name: Check PyPI version
+      - uses: astral-sh/setup-uv@v4
+      - run: uv pip install --system --no-cache ultralytics-actions
+      - id: check_pypi
         shell: python
         run: |
           import os
@@ -51,56 +40,82 @@ jobs:
           os.system(f'echo "previous_tag=v{online_version}" >> $GITHUB_OUTPUT')
           if publish:
               print('Ready to publish new version to PyPI ✅.')
-        id: check_pypi
-      - name: Build package
-        if: (github.event_name == 'push' || github.event.inputs.pypi == 'true') && steps.check_pypi.outputs.increment == 'True'
-        run: python -m build
-      - name: Publish to PyPI
-        continue-on-error: true
-        if: (github.event_name == 'push' || github.event.inputs.pypi == 'true') && steps.check_pypi.outputs.increment == 'True'
-        uses: pypa/gh-action-pypi-publish@release/v1
-      - name: Publish new tag
-        if: (github.event_name == 'push' || github.event.inputs.pypi == 'true')  && steps.check_pypi.outputs.increment == 'True'
-        run: |
-          git tag -a "${{ steps.check_pypi.outputs.current_tag }}" -m "$(git log -1 --pretty=%B)"  # i.e. "v0.1.2 commit message"
-          git push origin "${{ steps.check_pypi.outputs.current_tag }}"
-      - name: Publish new release
-        if: (github.event_name == 'push' || github.event.inputs.pypi == 'true')  && steps.check_pypi.outputs.increment == 'True'
+      - name: Tag and Release
+        if: steps.check_pypi.outputs.increment == 'True'
         env:
-          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-          GITHUB_TOKEN: ${{ secrets._GITHUB_TOKEN }}
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           CURRENT_TAG: ${{ steps.check_pypi.outputs.current_tag }}
           PREVIOUS_TAG: ${{ steps.check_pypi.outputs.previous_tag }}
-        run: ultralytics-actions-summarize-release
-        shell: bash
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+        run: |
+          git config --global user.name "UltralyticsAssistant"
+          git config --global user.email "web@ultralytics.com"
+          git tag -a "$CURRENT_TAG" -m "$(git log -1 --pretty=%B)"
+          git push origin "$CURRENT_TAG"
+          ultralytics-actions-summarize-release
+          uv cache prune --ci
+
+  build:
+    needs: check
+    if: needs.check.outputs.increment == 'True'
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.x"
+      - uses: astral-sh/setup-uv@v4
+      - run: uv pip install --system --no-cache build
+      - run: python -m build
+      - uses: actions/upload-artifact@v4
+        with:
+          name: dist
+          path: dist/
+      - run: uv cache prune --ci
+
+  publish:
+    needs: [check, build]
+    if: needs.check.outputs.increment == 'True'
+    runs-on: ubuntu-latest
+    environment: # for GitHub Deployments tab
+      name: Release - PyPI
+      url: https://pypi.org/p/ultralytics
+    permissions:
+      id-token: write # for PyPI trusted publishing
+    steps:
+      - uses: actions/download-artifact@v4
+        with:
+          name: dist
+          path: dist/
+      - uses: pypa/gh-action-pypi-publish@release/v1
+
+  notify:
+    needs: [check, publish]
+    if: always() && needs.check.outputs.increment == 'True'
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
       - name: Extract PR Details
         env:
-          GH_TOKEN: ${{ secrets._GITHUB_TOKEN }}
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         run: |
-          # Check if the event is a pull request or pull_request_target
-          if [ "${{ github.event_name }}" = "pull_request" ] || [ "${{ github.event_name }}" = "pull_request_target" ]; then
-            PR_NUMBER=${{ github.event.pull_request.number }}
-            PR_TITLE=$(gh pr view $PR_NUMBER --json title --jq '.title')
-          else
-            # Use gh to find the PR associated with the commit
-            COMMIT_SHA=${{ github.event.after }}
-            PR_JSON=$(gh pr list --search "${COMMIT_SHA}" --state merged --json number,title --jq '.[0]')
-            PR_NUMBER=$(echo $PR_JSON | jq -r '.number')
-            PR_TITLE=$(echo $PR_JSON | jq -r '.title')
-          fi
-          echo "PR_NUMBER=$PR_NUMBER" >> $GITHUB_ENV
-          echo "PR_TITLE=$PR_TITLE" >> $GITHUB_ENV
-
-      - name: Notify on Slack (Success)
-        if: success() && github.event_name == 'push' && steps.check_pypi.outputs.increment == 'True'
+          PR_JSON=$(gh pr list --search "${GITHUB_SHA}" --state merged --json number,title --jq '.[0]')
+          PR_NUMBER=$(echo "${PR_JSON}" | jq -r '.number')
+          PR_TITLE=$(echo "${PR_JSON}" | jq -r '.title')
+          echo "PR_NUMBER=${PR_NUMBER}" >> "${GITHUB_ENV}"
+          echo "PR_TITLE=${PR_TITLE}" >> "${GITHUB_ENV}"
+      - name: Notify Success
+        if: needs.publish.result == 'success' && github.event_name == 'push'
         uses: slackapi/slack-github-action@v2.0.0
         with:
           webhook-type: incoming-webhook
           webhook: ${{ secrets.SLACK_WEBHOOK_URL_YOLO }}
           payload: |
-            text: "<!channel> GitHub Actions success for ${{ github.workflow }} ✅\n\n\n*Repository:* https://github.com/${{ github.repository }}\n*Action:* https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\n*Author:* ${{ github.actor }}\n*Event:* NEW `${{ github.repository }} ${{ steps.check_pypi.outputs.current_tag }}` pip package published 😃\n*Job Status:* ${{ job.status }}\n*Pull Request:* <https://github.com/${{ github.repository }}/pull/${{ env.PR_NUMBER }}> ${{ env.PR_TITLE }}\n"
-      - name: Notify on Slack (Failure)
-        if: failure()
+            text: "<!channel> GitHub Actions success for ${{ github.workflow }} ✅\n\n\n*Repository:* https://github.com/${{ github.repository }}\n*Action:* https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\n*Author:* ${{ github.actor }}\n*Event:* NEW `${{ github.repository }} ${{ needs.check.outputs.current_tag }}` pip package published 😃\n*Job Status:* ${{ job.status }}\n*Pull Request:* <https://github.com/${{ github.repository }}/pull/${{ env.PR_NUMBER }}> ${{ env.PR_TITLE }}\n"
+      - name: Notify Failure
+        if: needs.publish.result != 'success'
         uses: slackapi/slack-github-action@v2.0.0
         with:
           webhook-type: incoming-webhook
diff --git a/docs/en/guides/optimizing-openvino-latency-vs-throughput-modes.md b/docs/en/guides/optimizing-openvino-latency-vs-throughput-modes.md
index 154ec7a893..cffeb22350 100644
--- a/docs/en/guides/optimizing-openvino-latency-vs-throughput-modes.md
+++ b/docs/en/guides/optimizing-openvino-latency-vs-throughput-modes.md
@@ -61,7 +61,7 @@ OpenVINO's multi-device mode simplifies scaling throughput by automatically bala
 
 Optimizing Ultralytics YOLO models for latency and throughput with OpenVINO can significantly enhance your application's performance. By carefully applying the strategies outlined in this guide, developers can ensure their models run efficiently, meeting the demands of various deployment scenarios. Remember, the choice between optimizing for latency or throughput depends on your specific application needs and the characteristics of the deployment environment.
 
-For more detailed technical information and the latest updates, refer to the [OpenVINO documentation](https://docs.openvino.ai/latest/index.html) and [Ultralytics YOLO repository](https://github.com/ultralytics/ultralytics). These resources provide in-depth guides, tutorials, and community support to help you get the most out of your deep learning models.
+For more detailed technical information and the latest updates, refer to the [OpenVINO documentation](https://docs.openvino.ai/2024/index.html) and [Ultralytics YOLO repository](https://github.com/ultralytics/ultralytics). These resources provide in-depth guides, tutorials, and community support to help you get the most out of your deep learning models.
 
 ---
 
diff --git a/docs/en/guides/raspberry-pi.md b/docs/en/guides/raspberry-pi.md
index ef41d0f8cf..5e173db25b 100644
--- a/docs/en/guides/raspberry-pi.md
+++ b/docs/en/guides/raspberry-pi.md
@@ -2,6 +2,7 @@
 comments: true
 description: Learn how to deploy Ultralytics YOLO11 on Raspberry Pi with our comprehensive guide. Get performance benchmarks, setup instructions, and best practices.
 keywords: Ultralytics, YOLO11, Raspberry Pi, setup, guide, benchmarks, computer vision, object detection, NCNN, Docker, camera modules
+benchmark_version: 8.3.39
 ---
 
 # Quick Start Guide: Raspberry Pi with Ultralytics YOLO11
@@ -144,7 +145,7 @@ We have only included benchmarks for YOLO11n and YOLO11s models because other mo
 
 <figure style="text-align: center;">
     <img width="800" src="https://github.com/ultralytics/assets/releases/download/v0.0.0/rpi-yolo11-benchmarks.avif" alt="YOLO11 benchmarks on RPi 5">
-    <figcaption style="font-style: italic; color: gray;">Benchmarked with Ultralytics v8.3.39</figcaption>
+    <figcaption style="font-style: italic; color: gray;">Benchmarked with Ultralytics {{ benchmark_version }}</figcaption>
 </figure>
 
 ### Detailed Comparison Table
@@ -183,7 +184,7 @@ The below table represents the benchmark results for two different models (YOLO1
         | MNN           | ✅      | 36.2              | 0.7409      | 273.032                |
         | NCNN          | ✅      | 36.2              | 0.7419      | 194.858                |
 
-    Benchmarked with Ultralytics `v8.3.39`
+    Benchmarked with Ultralytics {{ benchmark_version }}
 
 ## Reproduce Our Results
 
diff --git a/docs/en/guides/triton-inference-server.md b/docs/en/guides/triton-inference-server.md
index 0151cc078d..67d419bf52 100644
--- a/docs/en/guides/triton-inference-server.md
+++ b/docs/en/guides/triton-inference-server.md
@@ -48,6 +48,16 @@ from ultralytics import YOLO
 # Load a model
 model = YOLO("yolo11n.pt")  # load an official model
 
+# Retreive metadata during export
+metadata = []
+
+
+def export_cb(exporter):
+    metadata.append(exporter.metadata)
+
+
+model.add_callback("on_export_end", export_cb)
+
 # Export the model
 onnx_file = model.export(format="onnx", dynamic=True)
 ```
@@ -107,7 +117,13 @@ The Triton Model Repository is a storage location where Triton can access and lo
         }
       }
     }
-    """
+    parameters {
+      key: "metadata"
+      value: {
+        string_value: "%s"
+      }
+    }
+    """ % metadata[0]
 
     with open(triton_model_path / "config.pbtxt", "w") as f:
         f.write(data)
diff --git a/docs/en/macros/augmentation-args.md b/docs/en/macros/augmentation-args.md
index b4d6c9df6d..bee27ddd0d 100644
--- a/docs/en/macros/augmentation-args.md
+++ b/docs/en/macros/augmentation-args.md
@@ -13,7 +13,7 @@
 | `bgr`             | `float` | `0.0`         | `0.0 - 1.0`   | Flips the image channels from RGB to BGR with the specified probability, useful for increasing robustness to incorrect channel ordering.                                  |
 | `mosaic`          | `float` | `1.0`         | `0.0 - 1.0`   | Combines four training images into one, simulating different scene compositions and object interactions. Highly effective for complex scene understanding.                |
 | `mixup`           | `float` | `0.0`         | `0.0 - 1.0`   | Blends two images and their labels, creating a composite image. Enhances the model's ability to generalize by introducing label noise and visual variability.             |
-| `copy_paste`      | `float` | `0.0`         | `0.0 - 1.0`   | Copies objects from one image and pastes them onto another, useful for increasing object instances and learning object occlusion.                                         |
+| `copy_paste`      | `float` | `0.0`         | `0.0 - 1.0`   | Copies and pastes objects across images, useful for increasing object instances and learning object occlusion. Requires segmentation labels.                              |
 | `copy_paste_mode` | `str`   | `flip`        | -             | Copy-Paste augmentation method selection among the options of (`"flip"`, `"mixup"`).                                                                                      |
 | `auto_augment`    | `str`   | `randaugment` | -             | Automatically applies a predefined augmentation policy (`randaugment`, `autoaugment`, `augmix`), optimizing for classification tasks by diversifying the visual features. |
 | `erasing`         | `float` | `0.4`         | `0.0 - 0.9`   | Randomly erases a portion of the image during classification training, encouraging the model to focus on less obvious features for recognition.                           |
diff --git a/docs/en/macros/predict-args.md b/docs/en/macros/predict-args.md
index b40650d49a..f1960687f9 100644
--- a/docs/en/macros/predict-args.md
+++ b/docs/en/macros/predict-args.md
@@ -6,6 +6,7 @@
 | `imgsz`         | `int or tuple` | `640`                  | Defines the image size for inference. Can be a single integer `640` for square resizing or a (height, width) tuple. Proper sizing can improve detection [accuracy](https://www.ultralytics.com/glossary/accuracy) and processing speed.                                                                        |
 | `half`          | `bool`         | `False`                | Enables half-[precision](https://www.ultralytics.com/glossary/precision) (FP16) inference, which can speed up model inference on supported GPUs with minimal impact on accuracy.                                                                                                                               |
 | `device`        | `str`          | `None`                 | Specifies the device for inference (e.g., `cpu`, `cuda:0` or `0`). Allows users to select between CPU, a specific GPU, or other compute devices for model execution.                                                                                                                                           |
+| `batch`         | `int`          | `1`                    | Specifies the batch size for inference (only works when the source is [a directory, video file or `.txt` file](/modes/predict.md/#inference-sources)). A larger batch size can provide higher throughput, shortening the total amount of time required for inference.                                          |
 | `max_det`       | `int`          | `300`                  | Maximum number of detections allowed per image. Limits the total number of objects the model can detect in a single inference, preventing excessive outputs in dense scenes.                                                                                                                                   |
 | `vid_stride`    | `int`          | `1`                    | Frame stride for video inputs. Allows skipping frames in videos to speed up processing at the cost of temporal resolution. A value of 1 processes every frame, higher values skip frames.                                                                                                                      |
 | `stream_buffer` | `bool`         | `False`                | Determines whether to queue incoming frames for video streams. If `False`, old frames get dropped to accomodate new frames (optimized for real-time applications). If `True', queues new frames in a buffer, ensuring no frames get skipped, but will cause latency if inference FPS is lower than stream FPS. |
diff --git a/docs/en/reference/utils/ops.md b/docs/en/reference/utils/ops.md
index b62ba7b4eb..ab6cafbca8 100644
--- a/docs/en/reference/utils/ops.md
+++ b/docs/en/reference/utils/ops.md
@@ -129,4 +129,8 @@ keywords: Ultralytics, utility operations, non-max suppression, bounding box tra
 
 ## ::: ultralytics.utils.ops.clean_str
 
+<br><br><hr><br>
+
+## ::: ultralytics.utils.ops.empty_like
+
 <br><br>
diff --git a/docs/en/solutions/index.md b/docs/en/solutions/index.md
index 243fbf2757..dd7b61d252 100644
--- a/docs/en/solutions/index.md
+++ b/docs/en/solutions/index.md
@@ -29,7 +29,6 @@ Here's our curated list of Ultralytics solutions that can be used to create awes
 - [Parking Management](../guides/parking-management.md) 🚀: Organize and direct vehicle flow in parking areas with YOLO11, optimizing space utilization and user experience.
 - [Analytics](../guides/analytics.md) 📊: Conduct comprehensive data analysis to discover patterns and make informed decisions, leveraging YOLO11 for descriptive, predictive, and prescriptive analytics.
 - [Live Inference with Streamlit](../guides/streamlit-live-inference.md) 🚀: Leverage the power of YOLO11 for real-time [object detection](https://www.ultralytics.com/glossary/object-detection) directly through your web browser with a user-friendly Streamlit interface.
-- [Live Inference with Streamlit](../guides/streamlit-live-inference.md) 🚀: Leverage the power of YOLO11 for real-time [object detection](https://www.ultralytics.com/glossary/object-detection) directly through your web browser with a user-friendly Streamlit interface.
 - [Track Objects in Zone](../guides/trackzone.md) 🎯 NEW: Learn how to track objects within specific zones of video frames using YOLO11 for precise and efficient monitoring.
 
 ## Solutions Usage
@@ -39,7 +38,7 @@ Here's our curated list of Ultralytics solutions that can be used to create awes
     `yolo SOLUTIONS SOLUTION_NAME ARGS`
 
     - **SOLUTIONS** is a required keyword.
-    - **SOLUTION_NAME** (optional) is one of: `['count', 'heatmap', 'queue', 'speed', 'workout', 'analytics']`.
+    - **SOLUTION_NAME** (optional) is one of: `['count', 'heatmap', 'queue', 'speed', 'workout', 'analytics', 'trackzone']`.
     - **ARGS** (optional) are custom `arg=value` pairs, such as `show_in=True`, to override default settings.
 
     === "CLI"
diff --git a/docs/en/yolov5/tutorials/model_export.md b/docs/en/yolov5/tutorials/model_export.md
index a3a945c1e1..5cee3fdde9 100644
--- a/docs/en/yolov5/tutorials/model_export.md
+++ b/docs/en/yolov5/tutorials/model_export.md
@@ -31,7 +31,7 @@ YOLOv5 inference is officially supported in 11 formats:
 | [PyTorch](https://pytorch.org/)                                            | -                     | `yolov5s.pt`              |
 | [TorchScript](https://pytorch.org/docs/stable/jit.html)                    | `torchscript`         | `yolov5s.torchscript`     |
 | [ONNX](https://onnx.ai/)                                                   | `onnx`                | `yolov5s.onnx`            |
-| [OpenVINO](https://docs.openvino.ai/latest/index.html)                     | `openvino`            | `yolov5s_openvino_model/` |
+| [OpenVINO](https://docs.openvino.ai/2024/index.html)                       | `openvino`            | `yolov5s_openvino_model/` |
 | [TensorRT](https://developer.nvidia.com/tensorrt)                          | `engine`              | `yolov5s.engine`          |
 | [CoreML](https://github.com/apple/coremltools)                             | `coreml`              | `yolov5s.mlmodel`         |
 | [TensorFlow SavedModel](https://www.tensorflow.org/guide/saved_model)      | `saved_model`         | `yolov5s_saved_model/`    |
diff --git a/examples/tutorial.ipynb b/examples/tutorial.ipynb
index 5eadcf3e60..457334a689 100644
--- a/examples/tutorial.ipynb
+++ b/examples/tutorial.ipynb
@@ -45,6 +45,25 @@
         "</div>"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "<div align=\"center\">\n",
+        "  \n",
+        "  <a href=\"https://www.youtube.com/watch?v=ZN3nRZT7b24\" target=\"_blank\">\n",
+        "    <img src=\"https://img.youtube.com/vi/ZN3nRZT7b24/maxresdefault.jpg\" alt=\"Ultralytics Video\" width=\"720\" style=\"border-radius: 10px; box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2);\"></a>\n",
+        "  \n",
+        "  <p style=\"font-size: 16px; font-family: Arial, sans-serif; color: #555;\">\n",
+        "    <strong>Watch: </strong> How to Train\n",
+        "  <a href=\"https://github.com/ultralytics/ultralytics\">Ultralytics</a>\n",
+        "  <a href=\"https://docs.ultralytics.com/models/yolo11/\">YOLO11</a> Model on Custom Dataset using Google Colab Notebook 🚀</p>\n",
+        "\n",
+        "</div>"
+      ],
+      "metadata": {
+        "id": "DXHD1DC5M64G"
+      }
+    },
     {
       "cell_type": "markdown",
       "metadata": {
diff --git a/mkdocs.yml b/mkdocs.yml
index 283d52f3d9..e84bca25a5 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -69,8 +69,9 @@ theme:
     - content.tabs.link # all code tabs change simultaneously
 
 # Customization
-copyright: <a href="https://ultralytics.com" target="_blank">© 2024 Ultralytics Inc.</a> All rights reserved.
+copyright: <a href="https://www.ultralytics.com/" target="_blank">© 2024 Ultralytics Inc.</a> All rights reserved.
 extra: # version:
+  homepage: https://www.ultralytics.com/
   #   provider: mike  #  version drop-down menu
   robots: robots.txt
   analytics:
@@ -90,7 +91,7 @@ extra: # version:
     - icon: fontawesome/brands/python
       link: https://pypi.org/project/ultralytics/
     - icon: fontawesome/brands/discord
-      link: https://ultralytics.com/discord
+      link: https://discord.com/invite/ultralytics
     - icon: fontawesome/brands/reddit
       link: https://reddit.com/r/ultralytics
 
diff --git a/ultralytics/__init__.py b/ultralytics/__init__.py
index 601d1bb363..d178a35d8f 100644
--- a/ultralytics/__init__.py
+++ b/ultralytics/__init__.py
@@ -1,6 +1,6 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 
-__version__ = "8.3.40"
+__version__ = "8.3.48"
 
 import os
 
diff --git a/ultralytics/cfg/__init__.py b/ultralytics/cfg/__init__.py
index e4c239f3d4..b36418fbf5 100644
--- a/ultralytics/cfg/__init__.py
+++ b/ultralytics/cfg/__init__.py
@@ -77,7 +77,7 @@ SOLUTIONS_HELP_MSG = f"""
 
         yolo solutions SOLUTION ARGS
 
-        Where SOLUTION (optional) is one of {list(SOLUTION_MAP.keys())}
+        Where SOLUTION (optional) is one of {list(SOLUTION_MAP.keys())[:-1]}
               ARGS (optional) are any number of custom 'arg=value' pairs like 'show_in=True' that override defaults 
                   at https://docs.ultralytics.com/usage/cfg
                 
@@ -96,7 +96,7 @@ SOLUTIONS_HELP_MSG = f"""
     5. Generate analytical graphs
         yolo solutions analytics analytics_type="pie"
     
-    6. Track Objects Within Specific Zones
+    6. Track objects within specific zones
         yolo solutions trackzone source="path/to/video/file.mp4" region=[(150, 150), (1130, 150), (1130, 570), (150, 570)] 
     """
 CLI_HELP_MSG = f"""
@@ -125,7 +125,7 @@ CLI_HELP_MSG = f"""
         yolo streamlit-predict
 
     6. Ultralytics solutions usage
-        yolo solutions count or in {list(SOLUTION_MAP.keys())} source="path/to/video/file.mp4"
+        yolo solutions count or in {list(SOLUTION_MAP.keys())[1:-1]} source="path/to/video/file.mp4"
 
     7. Run special commands:
         yolo help
diff --git a/ultralytics/engine/exporter.py b/ultralytics/engine/exporter.py
index c0e29e7e1c..ae84cab9a1 100644
--- a/ultralytics/engine/exporter.py
+++ b/ultralytics/engine/exporter.py
@@ -73,7 +73,7 @@ from ultralytics.data import build_dataloader
 from ultralytics.data.dataset import YOLODataset
 from ultralytics.data.utils import check_cls_dataset, check_det_dataset
 from ultralytics.nn.autobackend import check_class_names, default_class_names
-from ultralytics.nn.modules import C2f, Detect, RTDETRDecoder
+from ultralytics.nn.modules import C2f, Classify, Detect, RTDETRDecoder
 from ultralytics.nn.tasks import DetectionModel, SegmentationModel, WorldModel
 from ultralytics.utils import (
     ARM64,
@@ -287,6 +287,8 @@ class Exporter:
 
             model = FXModel(model)
         for m in model.modules():
+            if isinstance(m, Classify):
+                m.export = True
             if isinstance(m, (Detect, RTDETRDecoder)):  # includes all Detect subclasses like Segment, Pose, OBB
                 m.dynamic = self.args.dynamic
                 m.export = True
diff --git a/ultralytics/engine/model.py b/ultralytics/engine/model.py
index 874613d2f1..db8d87ebc2 100644
--- a/ultralytics/engine/model.py
+++ b/ultralytics/engine/model.py
@@ -136,6 +136,7 @@ class Model(nn.Module):
         # Check if Triton Server model
         elif self.is_triton_model(model):
             self.model_name = self.model = model
+            self.overrides["task"] = task or "detect"  # set `task=detect` if not explicitly set
             return
 
         # Load or create new YOLO model
diff --git a/ultralytics/engine/predictor.py b/ultralytics/engine/predictor.py
index c28e1895d0..c5250166e9 100644
--- a/ultralytics/engine/predictor.py
+++ b/ultralytics/engine/predictor.py
@@ -155,7 +155,7 @@ class BasePredictor:
         same_shapes = len({x.shape for x in im}) == 1
         letterbox = LetterBox(
             self.imgsz,
-            auto=same_shapes and (self.model.pt or getattr(self.model, "dynamic", False)),
+            auto=same_shapes and (self.model.pt or (getattr(self.model, "dynamic", False) and not self.model.imx)),
             stride=self.model.stride,
         )
         return [letterbox(image=x) for x in im]
diff --git a/ultralytics/models/sam/predict.py b/ultralytics/models/sam/predict.py
index 540d1007a3..b657ef7051 100644
--- a/ultralytics/models/sam/predict.py
+++ b/ultralytics/models/sam/predict.py
@@ -1105,7 +1105,7 @@ class SAM2VideoPredictor(SAM2Predictor):
             for obj_temp_output_dict in temp_output_dict_per_obj.values():
                 temp_frame_inds.update(obj_temp_output_dict[storage_key].keys())
             consolidated_frame_inds[storage_key].update(temp_frame_inds)
-            # consolidate the temprary output across all objects on this frame
+            # consolidate the temporary output across all objects on this frame
             for frame_idx in temp_frame_inds:
                 consolidated_out = self._consolidate_temp_output_across_obj(
                     frame_idx, is_cond=is_cond, run_mem_encoder=True
diff --git a/ultralytics/models/yolo/classify/predict.py b/ultralytics/models/yolo/classify/predict.py
index b75a194984..385f75bc0d 100644
--- a/ultralytics/models/yolo/classify/predict.py
+++ b/ultralytics/models/yolo/classify/predict.py
@@ -53,7 +53,8 @@ class ClassificationPredictor(BasePredictor):
         if not isinstance(orig_imgs, list):  # input images are a torch.Tensor, not a list
             orig_imgs = ops.convert_torch2numpy_batch(orig_imgs)
 
+        preds = preds[0] if isinstance(preds, (list, tuple)) else preds
         return [
-            Results(orig_img, path=img_path, names=self.model.names, probs=pred.softmax(0))
+            Results(orig_img, path=img_path, names=self.model.names, probs=pred)
             for pred, orig_img, img_path in zip(preds, orig_imgs, self.batch[0])
         ]
diff --git a/ultralytics/models/yolo/classify/val.py b/ultralytics/models/yolo/classify/val.py
index e54f04118a..67333f2604 100644
--- a/ultralytics/models/yolo/classify/val.py
+++ b/ultralytics/models/yolo/classify/val.py
@@ -71,6 +71,10 @@ class ClassificationValidator(BaseValidator):
         self.metrics.confusion_matrix = self.confusion_matrix
         self.metrics.save_dir = self.save_dir
 
+    def postprocess(self, preds):
+        """Preprocesses the classification predictions."""
+        return preds[0] if isinstance(preds, (list, tuple)) else preds
+
     def get_stats(self):
         """Returns a dictionary of metrics obtained by processing targets and predictions."""
         self.metrics.process(self.targets, self.pred)
diff --git a/ultralytics/nn/autobackend.py b/ultralytics/nn/autobackend.py
index 60b9f6389a..b6df3753ec 100644
--- a/ultralytics/nn/autobackend.py
+++ b/ultralytics/nn/autobackend.py
@@ -96,7 +96,7 @@ class AutoBackend(nn.Module):
         Initialize the AutoBackend for inference.
 
         Args:
-            weights (str): Path to the model weights file. Defaults to 'yolov8n.pt'.
+            weights (str | torch.nn.Module): Path to the model weights file or a module instance. Defaults to 'yolo11n.pt'.
             device (torch.device): Device to run the model on. Defaults to CPU.
             dnn (bool): Use OpenCV DNN module for ONNX inference. Defaults to False.
             data (str | Path | optional): Path to the additional data.yaml file containing class names. Optional.
@@ -462,6 +462,7 @@ class AutoBackend(nn.Module):
             from ultralytics.utils.triton import TritonRemoteModel
 
             model = TritonRemoteModel(w)
+            metadata = model.metadata
 
         # Any other format (unsupported)
         else:
@@ -700,8 +701,7 @@ class AutoBackend(nn.Module):
         #     print(type(x), len(x)) if isinstance(x, (list, tuple)) else print(type(x), x.shape)  # debug shapes
         if isinstance(y, (list, tuple)):
             if len(self.names) == 999 and (self.task == "segment" or len(y) == 2):  # segments and names not defined
-                ip, ib = (0, 1) if len(y[0].shape) == 4 else (1, 0)  # index of protos, boxes
-                nc = y[ib].shape[1] - y[ip].shape[3] - 4  # y = (1, 160, 160, 32), (1, 116, 8400)
+                nc = y[0].shape[1] - y[1].shape[1] - 4  # y = (1, 32, 160, 160), (1, 116, 8400)
                 self.names = {i: f"class{i}" for i in range(nc)}
             return self.from_numpy(y[0]) if len(y) == 1 else [self.from_numpy(x) for x in y]
         else:
diff --git a/ultralytics/nn/modules/head.py b/ultralytics/nn/modules/head.py
index 25964ac2e5..0afb5fd16f 100644
--- a/ultralytics/nn/modules/head.py
+++ b/ultralytics/nn/modules/head.py
@@ -282,6 +282,8 @@ class Pose(Detect):
 class Classify(nn.Module):
     """YOLO classification head, i.e. x(b,c1,20,20) to x(b,c2)."""
 
+    export = False  # export mode
+
     def __init__(self, c1, c2, k=1, s=1, p=None, g=1):
         """Initializes YOLO classification head to transform input tensor from (b,c1,20,20) to (b,c2) shape."""
         super().__init__()
@@ -296,7 +298,10 @@ class Classify(nn.Module):
         if isinstance(x, list):
             x = torch.cat(x, 1)
         x = self.linear(self.drop(self.pool(self.conv(x)).flatten(1)))
-        return x
+        if self.training:
+            return x
+        y = x.softmax(1)  # get final output
+        return y if self.export else (y, x)
 
 
 class WorldDetect(Detect):
diff --git a/ultralytics/solutions/heatmap.py b/ultralytics/solutions/heatmap.py
index c9dd808798..bf2903b7a9 100644
--- a/ultralytics/solutions/heatmap.py
+++ b/ultralytics/solutions/heatmap.py
@@ -27,12 +27,8 @@ class Heatmap(ObjectCounter):
     Examples:
         >>> from ultralytics.solutions import Heatmap
         >>> heatmap = Heatmap(model="yolov8n.pt", colormap=cv2.COLORMAP_JET)
-        >>> results = heatmap("path/to/video.mp4")
-        >>> for result in results:
-        ...     print(result.speed)  # Print inference speed
-        ...     cv2.imshow("Heatmap", result.plot())
-        ...     if cv2.waitKey(1) & 0xFF == ord("q"):
-        ...         break
+        >>> frame = cv2.imread("frame.jpg")
+        >>> processed_frame = heatmap.generate_heatmap(frame)
     """
 
     def __init__(self, **kwargs):
diff --git a/ultralytics/solutions/queue_management.py b/ultralytics/solutions/queue_management.py
index ca0acb14f8..043bd371d5 100644
--- a/ultralytics/solutions/queue_management.py
+++ b/ultralytics/solutions/queue_management.py
@@ -27,10 +27,13 @@ class QueueManager(BaseSolution):
         display_output: Displays the processed output.
 
     Examples:
-        >>> queue_manager = QueueManager(source="video.mp4", region=[100, 100, 200, 200, 300, 300])
-        >>> for frame in video_stream:
-        ...     processed_frame = queue_manager.process_queue(frame)
-        ...     cv2.imshow("Queue Management", processed_frame)
+        >>> cap = cv2.VideoCapture("Path/to/video/file.mp4")
+        >>> queue_manager = QueueManager(region=[100, 100, 200, 200, 300, 300])
+        >>> while cap.isOpened():
+        >>>     success, im0 = cap.read()
+        >>>     if not success:
+        >>>         break
+        >>>     out = queue.process_queue(im0)
     """
 
     def __init__(self, **kwargs):
diff --git a/ultralytics/trackers/utils/matching.py b/ultralytics/trackers/utils/matching.py
index b062d938e5..4a3a420af4 100644
--- a/ultralytics/trackers/utils/matching.py
+++ b/ultralytics/trackers/utils/matching.py
@@ -13,7 +13,7 @@ try:
 except (ImportError, AssertionError, AttributeError):
     from ultralytics.utils.checks import check_requirements
 
-    check_requirements("lapx>=0.5.2")  # update to lap package from https://github.com/rathaROG/lapx
+    check_requirements("lap>=0.5.12")  # https://github.com/gatagat/lap
     import lap
 
 
diff --git a/ultralytics/utils/checks.py b/ultralytics/utils/checks.py
index 3a8201a54e..fe858eb023 100644
--- a/ultralytics/utils/checks.py
+++ b/ultralytics/utils/checks.py
@@ -669,8 +669,22 @@ def check_amp(model):
     from ultralytics.utils.torch_utils import autocast
 
     device = next(model.parameters()).device  # get model device
+    prefix = colorstr("AMP: ")
     if device.type in {"cpu", "mps"}:
         return False  # AMP only used on CUDA devices
+    else:
+        # GPUs that have issues with AMP
+        pattern = re.compile(
+            r"(nvidia|geforce|quadro|tesla).*?(1660|1650|1630|t400|t550|t600|t1000|t1200|t2000|k40m)", re.IGNORECASE
+        )
+
+        gpu = torch.cuda.get_device_name(device)
+        if bool(pattern.search(gpu)):
+            LOGGER.warning(
+                f"{prefix}checks failed ❌. AMP training on {gpu} GPU may cause "
+                f"NaN losses or zero-mAP results, so AMP will be disabled during training."
+            )
+            return False
 
     def amp_allclose(m, im):
         """All close FP32 vs AMP results."""
@@ -683,7 +697,6 @@ def check_amp(model):
         return a.shape == b.shape and torch.allclose(a, b.float(), atol=0.5)  # close to 0.5 absolute tolerance
 
     im = ASSETS / "bus.jpg"  # image to check
-    prefix = colorstr("AMP: ")
     LOGGER.info(f"{prefix}running Automatic Mixed Precision (AMP) checks...")
     warning_msg = "Setting 'amp=True'. If you experience zero-mAP or NaN losses you can disable AMP with amp=False."
     try:
diff --git a/ultralytics/utils/loss.py b/ultralytics/utils/loss.py
index 7395286899..c6557df4c0 100644
--- a/ultralytics/utils/loss.py
+++ b/ultralytics/utils/loss.py
@@ -604,6 +604,7 @@ class v8ClassificationLoss:
 
     def __call__(self, preds, batch):
         """Compute the classification loss between predictions and true labels."""
+        preds = preds[1] if isinstance(preds, (list, tuple)) else preds
         loss = F.cross_entropy(preds, batch["cls"], reduction="mean")
         loss_items = loss.detach()
         return loss, loss_items
diff --git a/ultralytics/utils/ops.py b/ultralytics/utils/ops.py
index ac53546ed1..9a05b3a8b5 100644
--- a/ultralytics/utils/ops.py
+++ b/ultralytics/utils/ops.py
@@ -400,7 +400,7 @@ def xyxy2xywh(x):
         y (np.ndarray | torch.Tensor): The bounding box coordinates in (x, y, width, height) format.
     """
     assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}"
-    y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x)  # faster than clone/copy
+    y = empty_like(x)  # faster than clone/copy
     y[..., 0] = (x[..., 0] + x[..., 2]) / 2  # x center
     y[..., 1] = (x[..., 1] + x[..., 3]) / 2  # y center
     y[..., 2] = x[..., 2] - x[..., 0]  # width
@@ -420,7 +420,7 @@ def xywh2xyxy(x):
         y (np.ndarray | torch.Tensor): The bounding box coordinates in (x1, y1, x2, y2) format.
     """
     assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}"
-    y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x)  # faster than clone/copy
+    y = empty_like(x)  # faster than clone/copy
     xy = x[..., :2]  # centers
     wh = x[..., 2:] / 2  # half width-height
     y[..., :2] = xy - wh  # top left xy
@@ -443,7 +443,7 @@ def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
             x1,y1 is the top-left corner, x2,y2 is the bottom-right corner of the bounding box.
     """
     assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}"
-    y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x)  # faster than clone/copy
+    y = empty_like(x)  # faster than clone/copy
     y[..., 0] = w * (x[..., 0] - x[..., 2] / 2) + padw  # top left x
     y[..., 1] = h * (x[..., 1] - x[..., 3] / 2) + padh  # top left y
     y[..., 2] = w * (x[..., 0] + x[..., 2] / 2) + padw  # bottom right x
@@ -469,7 +469,7 @@ def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0):
     if clip:
         x = clip_boxes(x, (h - eps, w - eps))
     assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}"
-    y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x)  # faster than clone/copy
+    y = empty_like(x)  # faster than clone/copy
     y[..., 0] = ((x[..., 0] + x[..., 2]) / 2) / w  # x center
     y[..., 1] = ((x[..., 1] + x[..., 3]) / 2) / h  # y center
     y[..., 2] = (x[..., 2] - x[..., 0]) / w  # width
@@ -625,8 +625,9 @@ def resample_segments(segments, n=1000):
     """
     for i, s in enumerate(segments):
         s = np.concatenate((s, s[0:1, :]), axis=0)
-        x = np.linspace(0, len(s) - 1, n)
+        x = np.linspace(0, len(s) - 1, n - len(s) if len(s) < n else n)
         xp = np.arange(len(s))
+        x = np.insert(x, np.searchsorted(x, xp), xp) if len(s) < n else x
         segments[i] = (
             np.concatenate([np.interp(x, xp, s[:, i]) for i in range(2)], dtype=np.float32).reshape(2, -1).T
         )  # segment xy
@@ -837,3 +838,10 @@ def clean_str(s):
         (str): a string with special characters replaced by an underscore _
     """
     return re.sub(pattern="[|@#!¡·$€%&()=?¿^*;:,¨´><+]", repl="_", string=s)
+
+
+def empty_like(x):
+    """Creates empty torch.Tensor or np.ndarray with same shape as input and float32 dtype."""
+    return (
+        torch.empty_like(x, dtype=torch.float32) if isinstance(x, torch.Tensor) else np.empty_like(x, dtype=np.float32)
+    )
diff --git a/ultralytics/utils/triton.py b/ultralytics/utils/triton.py
index 3f873a6faf..cc53ed5714 100644
--- a/ultralytics/utils/triton.py
+++ b/ultralytics/utils/triton.py
@@ -66,6 +66,7 @@ class TritonRemoteModel:
         self.np_input_formats = [type_map[x] for x in self.input_formats]
         self.input_names = [x["name"] for x in config["input"]]
         self.output_names = [x["name"] for x in config["output"]]
+        self.metadata = eval(config.get("parameters", {}).get("metadata", {}).get("string_value", "None"))
 
     def __call__(self, *inputs: np.ndarray) -> List[np.ndarray]:
         """