diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml
index d798cbec18..359db69ddb 100644
--- a/.github/workflows/docker.yaml
+++ b/.github/workflows/docker.yaml
@@ -54,7 +54,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       fail-fast: false
-      max-parallel: 6
+      max-parallel: 10
       matrix:
         include:
           - dockerfile: "Dockerfile"
@@ -82,6 +82,15 @@ jobs:
           #   tags: "latest-conda"
           #   platforms: "linux/amd64"
     steps:
+      - name: Cleanup toolcache
+        # Free up to 10GB of disk space per https://github.com/ultralytics/ultralytics/pull/14894
+        run: |
+          echo "Free space before deletion:"
+          df -h /
+          rm -rf /opt/hostedtoolcache
+          echo "Free space after deletion:"
+          df -h /
+
       - name: Checkout repo
         uses: actions/checkout@v4
         with:
@@ -133,7 +142,7 @@ jobs:
         uses: nick-invision/retry@v3
         with:
           timeout_minutes: 120
-          retry_wait_seconds: 30
+          retry_wait_seconds: 60
           max_attempts: 2 # retry once
           command: |
             docker build \
diff --git a/.gitignore b/.gitignore
index 1b68ec6b2c..1c0c5fbea6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -26,6 +26,9 @@ share/python-wheels/
 .installed.cfg
 *.egg
 MANIFEST
+requirements.txt
+setup.py
+ultralytics.egg-info
 
 # PyInstaller
 #  Usually these files are written by a python script from a template
diff --git a/docs/en/datasets/classify/caltech101.md b/docs/en/datasets/classify/caltech101.md
index 451c4cc97e..6a75f66ac8 100644
--- a/docs/en/datasets/classify/caltech101.md
+++ b/docs/en/datasets/classify/caltech101.md
@@ -46,7 +46,7 @@ To train a YOLO model on the Caltech-101 dataset for 100 epochs, you can use the
 
         ```bash
         # Start training from a pretrained *.pt model
-        yolo detect train data=caltech101 model=yolov8n-cls.pt epochs=100 imgsz=416
+        yolo classify train data=caltech101 model=yolov8n-cls.pt epochs=100 imgsz=416
         ```
 
 ## Sample Images and Annotations
@@ -108,7 +108,7 @@ To train an Ultralytics YOLO model on the Caltech-101 dataset, you can use the p
 
         ```bash
         # Start training from a pretrained *.pt model
-        yolo detect train data=caltech101 model=yolov8n-cls.pt epochs=100 imgsz=416
+        yolo classify train data=caltech101 model=yolov8n-cls.pt epochs=100 imgsz=416
         ```
 For more detailed arguments and options, refer to the model [Training](../../modes/train.md) page.
 
diff --git a/docs/en/datasets/classify/caltech256.md b/docs/en/datasets/classify/caltech256.md
index 6c6cf4a65a..c7b367cc63 100644
--- a/docs/en/datasets/classify/caltech256.md
+++ b/docs/en/datasets/classify/caltech256.md
@@ -57,7 +57,7 @@ To train a YOLO model on the Caltech-256 dataset for 100 epochs, you can use the
 
         ```bash
         # Start training from a pretrained *.pt model
-        yolo detect train data=caltech256 model=yolov8n-cls.pt epochs=100 imgsz=416
+        yolo classify train data=caltech256 model=yolov8n-cls.pt epochs=100 imgsz=416
         ```
 
 ## Sample Images and Annotations
@@ -116,7 +116,7 @@ To train a YOLO model on the Caltech-256 dataset for 100 epochs, you can use the
 
         ```bash
         # Start training from a pretrained *.pt model
-        yolo detect train data=caltech256 model=yolov8n-cls.pt epochs=100 imgsz=416
+        yolo classify train data=caltech256 model=yolov8n-cls.pt epochs=100 imgsz=416
         ```
 
 ### What are the most common use cases for the Caltech-256 dataset?
diff --git a/docs/en/datasets/classify/cifar10.md b/docs/en/datasets/classify/cifar10.md
index 513f838319..b4742cbcb7 100644
--- a/docs/en/datasets/classify/cifar10.md
+++ b/docs/en/datasets/classify/cifar10.md
@@ -60,7 +60,7 @@ To train a YOLO model on the CIFAR-10 dataset for 100 epochs with an image size
 
         ```bash
         # Start training from a pretrained *.pt model
-        yolo detect train data=cifar10 model=yolov8n-cls.pt epochs=100 imgsz=32
+        yolo classify train data=cifar10 model=yolov8n-cls.pt epochs=100 imgsz=32
         ```
 
 ## Sample Images and Annotations
@@ -114,7 +114,7 @@ To train a YOLO model on the CIFAR-10 dataset using Ultralytics, you can follow
 
         ```bash
         # Start training from a pretrained *.pt model
-        yolo detect train data=cifar10 model=yolov8n-cls.pt epochs=100 imgsz=32
+        yolo classify train data=cifar10 model=yolov8n-cls.pt epochs=100 imgsz=32
         ```
 
 For more details, refer to the model [Training](../../modes/train.md) page.
diff --git a/docs/en/datasets/classify/cifar100.md b/docs/en/datasets/classify/cifar100.md
index 5110564ef0..4a8ba4bd8b 100644
--- a/docs/en/datasets/classify/cifar100.md
+++ b/docs/en/datasets/classify/cifar100.md
@@ -49,7 +49,7 @@ To train a YOLO model on the CIFAR-100 dataset for 100 epochs with an image size
 
         ```bash
         # Start training from a pretrained *.pt model
-        yolo detect train data=cifar100 model=yolov8n-cls.pt epochs=100 imgsz=32
+        yolo classify train data=cifar100 model=yolov8n-cls.pt epochs=100 imgsz=32
         ```
 
 ## Sample Images and Annotations
@@ -107,7 +107,7 @@ You can train a YOLO model on the CIFAR-100 dataset using either Python or CLI c
         
         ```bash
         # Start training from a pretrained *.pt model
-        yolo detect train data=cifar100 model=yolov8n-cls.pt epochs=100 imgsz=32
+        yolo classify train data=cifar100 model=yolov8n-cls.pt epochs=100 imgsz=32
         ```
 
 For a comprehensive list of available arguments, please refer to the model [Training](../../modes/train.md) page.
diff --git a/docs/en/datasets/classify/fashion-mnist.md b/docs/en/datasets/classify/fashion-mnist.md
index 21373268e1..656473edf5 100644
--- a/docs/en/datasets/classify/fashion-mnist.md
+++ b/docs/en/datasets/classify/fashion-mnist.md
@@ -74,7 +74,7 @@ To train a CNN model on the Fashion-MNIST dataset for 100 epochs with an image s
 
         ```bash
         # Start training from a pretrained *.pt model
-        yolo detect train data=fashion-mnist model=yolov8n-cls.pt epochs=100 imgsz=28
+        yolo classify train data=fashion-mnist model=yolov8n-cls.pt epochs=100 imgsz=28
         ```
 
 ## Sample Images and Annotations
@@ -117,7 +117,7 @@ To train an Ultralytics YOLO model on the Fashion-MNIST dataset, you can use bot
     === "CLI"
     
         ```bash
-        yolo detect train data=fashion-mnist model=yolov8n-cls.pt epochs=100 imgsz=28
+        yolo classify train data=fashion-mnist model=yolov8n-cls.pt epochs=100 imgsz=28
         ```
 
 For more detailed training parameters, refer to the [Training page](../../modes/train.md).
diff --git a/docs/en/datasets/classify/imagenet.md b/docs/en/datasets/classify/imagenet.md
index e563568026..53aabccef0 100644
--- a/docs/en/datasets/classify/imagenet.md
+++ b/docs/en/datasets/classify/imagenet.md
@@ -59,7 +59,7 @@ To train a deep learning model on the ImageNet dataset for 100 epochs with an im
 
         ```bash
         # Start training from a pretrained *.pt model
-        yolo train data=imagenet model=yolov8n-cls.pt epochs=100 imgsz=224
+        yolo classify train data=imagenet model=yolov8n-cls.pt epochs=100 imgsz=224
         ```
 
 ## Sample Images and Annotations
@@ -120,7 +120,7 @@ To use a pretrained Ultralytics YOLO model for image classification on the Image
     
         ```bash
         # Start training from a pretrained *.pt model
-        yolo train data=imagenet model=yolov8n-cls.pt epochs=100 imgsz=224
+        yolo classify train data=imagenet model=yolov8n-cls.pt epochs=100 imgsz=224
         ```
 
 For more in-depth training instruction, refer to our [Training page](../../modes/train.md).
diff --git a/docs/en/datasets/classify/imagenet10.md b/docs/en/datasets/classify/imagenet10.md
index d7bf55e483..a079986cce 100644
--- a/docs/en/datasets/classify/imagenet10.md
+++ b/docs/en/datasets/classify/imagenet10.md
@@ -45,7 +45,7 @@ To test a deep learning model on the ImageNet10 dataset with an image size of 22
 
         ```bash
         # Start training from a pretrained *.pt model
-        yolo train data=imagenet10 model=yolov8n-cls.pt epochs=5 imgsz=224
+        yolo classify train data=imagenet10 model=yolov8n-cls.pt epochs=5 imgsz=224
         ```
 
 ## Sample Images and Annotations
@@ -104,7 +104,7 @@ To test your deep learning model on the ImageNet10 dataset with an image size of
 
         ```bash
         # Start training from a pretrained *.pt model
-        yolo train data=imagenet10 model=yolov8n-cls.pt epochs=5 imgsz=224
+        yolo classify train data=imagenet10 model=yolov8n-cls.pt epochs=5 imgsz=224
         ```
 
 Refer to the [Training](../../modes/train.md) page for a comprehensive list of available arguments.
diff --git a/docs/en/datasets/classify/imagenette.md b/docs/en/datasets/classify/imagenette.md
index b667192aec..9a2a128ff6 100644
--- a/docs/en/datasets/classify/imagenette.md
+++ b/docs/en/datasets/classify/imagenette.md
@@ -47,7 +47,7 @@ To train a model on the ImageNette dataset for 100 epochs with a standard image
 
         ```bash
         # Start training from a pretrained *.pt model
-        yolo detect train data=imagenette model=yolov8n-cls.pt epochs=100 imgsz=224
+        yolo classify train data=imagenette model=yolov8n-cls.pt epochs=100 imgsz=224
         ```
 
 ## Sample Images and Annotations
@@ -82,7 +82,7 @@ To use these datasets, simply replace 'imagenette' with 'imagenette160' or 'imag
 
         ```bash
         # Start training from a pretrained *.pt model with ImageNette160
-        yolo detect train data=imagenette160 model=yolov8n-cls.pt epochs=100 imgsz=160
+        yolo classify train data=imagenette160 model=yolov8n-cls.pt epochs=100 imgsz=160
         ```
 
 !!! Example "Train Example with ImageNette320"
@@ -103,7 +103,7 @@ To use these datasets, simply replace 'imagenette' with 'imagenette160' or 'imag
 
         ```bash
         # Start training from a pretrained *.pt model with ImageNette320
-        yolo detect train data=imagenette320 model=yolov8n-cls.pt epochs=100 imgsz=320
+        yolo classify train data=imagenette320 model=yolov8n-cls.pt epochs=100 imgsz=320
         ```
 
 These smaller versions of the dataset allow for rapid iterations during the development process while still providing valuable and realistic image classification tasks.
@@ -140,7 +140,7 @@ To train a YOLO model on the ImageNette dataset for 100 epochs, you can use the
 
         ```bash
         # Start training from a pretrained *.pt model
-        yolo detect train data=imagenette model=yolov8n-cls.pt epochs=100 imgsz=224
+        yolo classify train data=imagenette model=yolov8n-cls.pt epochs=100 imgsz=224
         ```
 
 For more details, see the [Training](../../modes/train.md) documentation page.
diff --git a/docs/en/datasets/classify/imagewoof.md b/docs/en/datasets/classify/imagewoof.md
index 2a439425f8..5a76d97fc2 100644
--- a/docs/en/datasets/classify/imagewoof.md
+++ b/docs/en/datasets/classify/imagewoof.md
@@ -44,7 +44,7 @@ To train a CNN model on the ImageWoof dataset for 100 epochs with an image size
 
         ```bash
         # Start training from a pretrained *.pt model
-        yolo detect train data=imagewoof model=yolov8n-cls.pt epochs=100 imgsz=224
+        yolo classify train data=imagewoof model=yolov8n-cls.pt epochs=100 imgsz=224
         ```
 
 ## Dataset Variants
@@ -113,7 +113,7 @@ To train a Convolutional Neural Network (CNN) model on the ImageWoof dataset usi
     === "CLI"
     
         ```bash
-        yolo detect train data=imagewoof model=yolov8n-cls.pt epochs=100 imgsz=224
+        yolo classify train data=imagewoof model=yolov8n-cls.pt epochs=100 imgsz=224
         ```
 
 For more details on available training arguments, refer to the [Training](../../modes/train.md) page.
diff --git a/docs/en/datasets/classify/mnist.md b/docs/en/datasets/classify/mnist.md
index 8f8ad1265d..ae9be2bceb 100644
--- a/docs/en/datasets/classify/mnist.md
+++ b/docs/en/datasets/classify/mnist.md
@@ -52,7 +52,7 @@ To train a CNN model on the MNIST dataset for 100 epochs with an image size of 3
 
         ```bash
         # Start training from a pretrained *.pt model
-        cnn detect train data=mnist model=yolov8n-cls.pt epochs=100 imgsz=28
+        yolo classify train data=mnist model=yolov8n-cls.pt epochs=100 imgsz=28
         ```
 
 ## Sample Images and Annotations
@@ -113,7 +113,7 @@ To train a model on the MNIST dataset using Ultralytics YOLO, you can follow the
     
         ```bash
         # Start training from a pretrained *.pt model
-        cnn detect train data=mnist model=yolov8n-cls.pt epochs=100 imgsz=28
+        yolo classify train data=mnist model=yolov8n-cls.pt epochs=100 imgsz=28
         ```
 
 For a detailed list of available training arguments, refer to the [Training](../../modes/train.md) page.
diff --git a/docs/en/datasets/obb/index.md b/docs/en/datasets/obb/index.md
index b0b548b0d5..f7708a10a0 100644
--- a/docs/en/datasets/obb/index.md
+++ b/docs/en/datasets/obb/index.md
@@ -50,7 +50,7 @@ To train a model using these OBB formats:
 
         ```bash
         # Train a new YOLOv8n-OBB model on the DOTAv2 dataset
-        yolo detect train data=DOTAv1.yaml model=yolov8n.pt epochs=100 imgsz=640
+        yolo obb train data=DOTAv1.yaml model=yolov8n-obb.pt epochs=100 imgsz=640
         ```
 
 ## Supported Datasets
@@ -125,7 +125,7 @@ Training a YOLOv8 model with OBBs involves ensuring your dataset is in the YOLO
     
         ```bash
         # Train a new YOLOv8n-OBB model on the custom dataset
-        yolo detect train data=your_dataset.yaml model=yolov8n.pt epochs=100 imgsz=640
+        yolo obb train data=your_dataset.yaml model=yolov8n-obb.yaml epochs=100 imgsz=640
         ```
         
 This ensures your model leverages the detailed OBB annotations for improved detection accuracy.
diff --git a/docs/en/datasets/pose/coco.md b/docs/en/datasets/pose/coco.md
index 589614f8b3..52fce86c03 100644
--- a/docs/en/datasets/pose/coco.md
+++ b/docs/en/datasets/pose/coco.md
@@ -71,7 +71,7 @@ To train a YOLOv8n-pose model on the COCO-Pose dataset for 100 epochs with an im
 
         ```bash
         # Start training from a pretrained *.pt model
-        yolo detect train data=coco-pose.yaml model=yolov8n-pose.pt epochs=100 imgsz=640
+        yolo pose train data=coco-pose.yaml model=yolov8n-pose.pt epochs=100 imgsz=640
         ```
 
 ## Sample Images and Annotations
@@ -133,7 +133,7 @@ Training a YOLOv8 model on the COCO-Pose dataset can be accomplished using eithe
 
         ```bash
         # Start training from a pretrained *.pt model
-        yolo detect train data=coco-pose.yaml model=yolov8n.pt epochs=100 imgsz=640
+        yolo pose train data=coco-pose.yaml model=yolov8n-pose.pt epochs=100 imgsz=640
         ```
 
 For more details on the training process and available arguments, check the [training page](../../modes/train.md).
diff --git a/docs/en/datasets/pose/coco8-pose.md b/docs/en/datasets/pose/coco8-pose.md
index f8f4d46073..49295ac483 100644
--- a/docs/en/datasets/pose/coco8-pose.md
+++ b/docs/en/datasets/pose/coco8-pose.md
@@ -44,7 +44,7 @@ To train a YOLOv8n-pose model on the COCO8-Pose dataset for 100 epochs with an i
 
         ```bash
         # Start training from a pretrained *.pt model
-        yolo detect train data=coco8-pose.yaml model=yolov8n-pose.pt epochs=100 imgsz=640
+        yolo pose train data=coco8-pose.yaml model=yolov8n-pose.pt epochs=100 imgsz=640
         ```
 
 ## Sample Images and Annotations
@@ -105,7 +105,7 @@ To train a YOLOv8n-pose model on the COCO8-Pose dataset for 100 epochs with an i
     === "CLI"
     
         ```bash
-        yolo detect train data=coco8-pose.yaml model=yolov8n.pt epochs=100 imgsz=640
+        yolo pose train data=coco8-pose.yaml model=yolov8n-pose.pt epochs=100 imgsz=640
         ```
 
 For a comprehensive list of training arguments, refer to the model [Training](../../modes/train.md) page.
diff --git a/docs/en/datasets/pose/index.md b/docs/en/datasets/pose/index.md
index 29179d8c68..57c20dcb7c 100644
--- a/docs/en/datasets/pose/index.md
+++ b/docs/en/datasets/pose/index.md
@@ -82,7 +82,7 @@ The `train` and `val` fields specify the paths to the directories containing the
 
         ```bash
         # Start training from a pretrained *.pt model
-        yolo detect train data=coco8-pose.yaml model=yolov8n-pose.pt epochs=100 imgsz=640
+        yolo pose train data=coco8-pose.yaml model=yolov8n-pose.pt epochs=100 imgsz=640
         ```
 
 ## Supported Datasets
diff --git a/docs/en/datasets/pose/tiger-pose.md b/docs/en/datasets/pose/tiger-pose.md
index ec838b5ddb..d1e338ccac 100644
--- a/docs/en/datasets/pose/tiger-pose.md
+++ b/docs/en/datasets/pose/tiger-pose.md
@@ -126,7 +126,7 @@ To train a YOLOv8n-pose model on the Tiger-Pose dataset for 100 epochs with an i
     
         ```bash
         # Start training from a pretrained *.pt model
-        yolo task=pose mode=train data=tiger-pose.yaml model=yolov8n.pt epochs=100 imgsz=640
+        yolo task=pose mode=train data=tiger-pose.yaml model=yolov8n-pose.pt epochs=100 imgsz=640
         ```
 
 ### What configurations does the `tiger-pose.yaml` file include?
diff --git a/docs/en/datasets/segment/coco.md b/docs/en/datasets/segment/coco.md
index dd77b2102c..e02b677115 100644
--- a/docs/en/datasets/segment/coco.md
+++ b/docs/en/datasets/segment/coco.md
@@ -69,7 +69,7 @@ To train a YOLOv8n-seg model on the COCO-Seg dataset for 100 epochs with an imag
 
         ```bash
         # Start training from a pretrained *.pt model
-        yolo detect train data=coco-seg.yaml model=yolov8n-seg.pt epochs=100 imgsz=640
+        yolo segment train data=coco-seg.yaml model=yolov8n-seg.pt epochs=100 imgsz=640
         ```
 
 ## Sample Images and Annotations
@@ -131,7 +131,7 @@ To train a YOLOv8n-seg model on the COCO-Seg dataset for 100 epochs with an imag
     
         ```bash
         # Start training from a pretrained *.pt model
-        yolo detect train data=coco-seg.yaml model=yolov8n.pt epochs=100 imgsz=640
+        yolo segment train data=coco-seg.yaml model=yolov8n-seg.pt epochs=100 imgsz=640
         ```
 
 ### What are the key features of the COCO-Seg dataset?
diff --git a/docs/en/datasets/segment/coco8-seg.md b/docs/en/datasets/segment/coco8-seg.md
index 55006b1712..bcca4a2641 100644
--- a/docs/en/datasets/segment/coco8-seg.md
+++ b/docs/en/datasets/segment/coco8-seg.md
@@ -44,7 +44,7 @@ To train a YOLOv8n-seg model on the COCO8-Seg dataset for 100 epochs with an ima
 
         ```bash
         # Start training from a pretrained *.pt model
-        yolo detect train data=coco8-seg.yaml model=yolov8n-seg.pt epochs=100 imgsz=640
+        yolo segment train data=coco8-seg.yaml model=yolov8n-seg.pt epochs=100 imgsz=640
         ```
 
 ## Sample Images and Annotations
@@ -106,7 +106,7 @@ To train a **YOLOv8n-seg** model on the COCO8-Seg dataset for 100 epochs with an
     
         ```bash
         # Start training from a pretrained *.pt model
-        yolo detect train data=coco8-seg.yaml model=yolov8n.pt epochs=100 imgsz=640
+        yolo segment train data=coco8-seg.yaml model=yolov8n-seg.pt epochs=100 imgsz=640
         ```
 
 For a thorough explanation of available arguments and configuration options, you can check the [Training](../../modes/train.md) documentation.
diff --git a/docs/en/datasets/segment/index.md b/docs/en/datasets/segment/index.md
index 27cb43741f..f9228c0812 100644
--- a/docs/en/datasets/segment/index.md
+++ b/docs/en/datasets/segment/index.md
@@ -84,7 +84,7 @@ The `train` and `val` fields specify the paths to the directories containing the
 
         ```bash
         # Start training from a pretrained *.pt model
-        yolo detect train data=coco8-seg.yaml model=yolov8n-seg.pt epochs=100 imgsz=640
+        yolo segment train data=coco8-seg.yaml model=yolov8n-seg.pt epochs=100 imgsz=640
         ```
 
 ## Supported Datasets
diff --git a/docs/en/models/sam-2.md b/docs/en/models/sam-2.md
index 001bd89b60..ef76f8a3cf 100644
--- a/docs/en/models/sam-2.md
+++ b/docs/en/models/sam-2.md
@@ -6,10 +6,6 @@ keywords: SAM 2, Segment Anything, video segmentation, image segmentation, promp
 
 # SAM 2: Segment Anything Model 2
 
-!!! Note "🚧 SAM 2 Integration In Progress 🚧"
-
-    The SAM 2 features described in this documentation are currently not enabled in the `ultralytics` package. The Ultralytics team is actively working on integrating SAM 2, and these capabilities should be available soon. We appreciate your patience as we work to implement this exciting new model.
-
 SAM 2, the successor to Meta's [Segment Anything Model (SAM)](sam.md), is a cutting-edge tool designed for comprehensive object segmentation in both images and videos. It excels in handling complex visual data through a unified, promptable model architecture that supports real-time processing and zero-shot generalization.
 
 ![SAM 2 Example Results](https://github.com/facebookresearch/segment-anything-2/raw/main/assets/sa_v_dataset.jpg?raw=true)
@@ -105,10 +101,6 @@ pip install ultralytics
 
 ## How to Use SAM 2: Versatility in Image and Video Segmentation
 
-!!! Note "🚧 SAM 2 Integration In Progress 🚧"
-
-    The SAM 2 features described in this documentation are currently not enabled in the `ultralytics` package. The Ultralytics team is actively working on integrating SAM 2, and these capabilities should be available soon. We appreciate your patience as we work to implement this exciting new model.
-
 The following table details the available SAM 2 models, their pre-trained weights, supported tasks, and compatibility with different operating modes like [Inference](../modes/predict.md), [Validation](../modes/val.md), [Training](../modes/train.md), and [Export](../modes/export.md).
 
 | Model Type  | Pre-trained Weights                                                                   | Tasks Supported                              | Inference | Validation | Training | Export |
diff --git a/docs/en/quickstart.md b/docs/en/quickstart.md
index 93ccaaa3e7..6a5a21dba4 100644
--- a/docs/en/quickstart.md
+++ b/docs/en/quickstart.md
@@ -162,8 +162,8 @@ The Ultralytics command line interface (CLI) allows for simple single-line comma
         yolo TASK MODE ARGS
         ```
 
-        - `TASK` (optional) is one of ([detect](tasks/detect.md), [segment](tasks/segment.md), [classify](tasks/classify.md), [pose](tasks/pose.md))
-        - `MODE` (required) is one of ([train](modes/train.md), [val](modes/val.md), [predict](modes/predict.md), [export](modes/export.md), [track](modes/track.md))
+        - `TASK` (optional) is one of ([detect](tasks/detect.md), [segment](tasks/segment.md), [classify](tasks/classify.md), [pose](tasks/pose.md), [obb](tasks/obb.md))
+        - `MODE` (required) is one of ([train](modes/train.md), [val](modes/val.md), [predict](modes/predict.md), [export](modes/export.md), [track](modes/track.md), [benchmark](modes/benchmark.md))
         - `ARGS` (optional) are `arg=value` pairs like `imgsz=640` that override defaults.
 
         See all `ARGS` in the full [Configuration Guide](usage/cfg.md) or with the `yolo cfg` CLI command.
diff --git a/docs/en/reference/data/converter.md b/docs/en/reference/data/converter.md
index d1b63f2f0e..2854e7d228 100644
--- a/docs/en/reference/data/converter.md
+++ b/docs/en/reference/data/converter.md
@@ -23,6 +23,10 @@ keywords: Ultralytics, data conversion, YOLO models, COCO, DOTA, YOLO bbox2segme
 
 <br><br><hr><br>
 
+## ::: ultralytics.data.converter.convert_segment_masks_to_yolo_seg
+
+<br><br><hr><br>
+
 ## ::: ultralytics.data.converter.convert_dota_to_yolo_obb
 
 <br><br><hr><br>
diff --git a/docs/en/usage/cli.md b/docs/en/usage/cli.md
index c1c221ffca..b78b031ab0 100644
--- a/docs/en/usage/cli.md
+++ b/docs/en/usage/cli.md
@@ -27,8 +27,8 @@ The YOLO command line interface (CLI) allows for simple single-line commands wit
         ```bash
         yolo TASK MODE ARGS
 
-        Where   TASK (optional) is one of [detect, segment, classify]
-                MODE (required) is one of [train, val, predict, export, track]
+        Where   TASK (optional) is one of [detect, segment, classify, pose, obb]
+                MODE (required) is one of [train, val, predict, export, track, benchmark]
                 ARGS (optional) are any number of custom 'arg=value' pairs like 'imgsz=320' that override defaults.
         ```
         See all ARGS in the full [Configuration Guide](cfg.md) or with `yolo cfg`
@@ -75,8 +75,8 @@ The YOLO command line interface (CLI) allows for simple single-line commands wit
 
 Where:
 
-- `TASK` (optional) is one of `[detect, segment, classify]`. If it is not passed explicitly YOLOv8 will try to guess the `TASK` from the model type.
-- `MODE` (required) is one of `[train, val, predict, export, track]`
+- `TASK` (optional) is one of `[detect, segment, classify, pose, obb]`. If it is not passed explicitly YOLOv8 will try to guess the `TASK` from the model type.
+- `MODE` (required) is one of `[train, val, predict, export, track, benchmark]`
 - `ARGS` (optional) are any number of custom `arg=value` pairs like `imgsz=320` that override defaults. For a full list of available `ARGS` see the [Configuration](cfg.md) page and `defaults.yaml`
 
 !!! Warning "Warning"
diff --git a/docs/en/usage/simple-utilities.md b/docs/en/usage/simple-utilities.md
index 99206eb202..694ecf1dfe 100644
--- a/docs/en/usage/simple-utilities.md
+++ b/docs/en/usage/simple-utilities.md
@@ -51,6 +51,22 @@ auto_annotate(  # (1)!
 
 - Use in combination with the [function `segments2boxes`](#convert-segments-to-bounding-boxes) to generate object detection bounding boxes as well
 
+### Convert Segmentation Masks into YOLO Format
+
+![Segmentation Masks to YOLO Format](https://github.com/user-attachments/assets/1a823fc1-f3a1-4dd5-83e7-0b209df06fc3)
+
+Use to convert a dataset of segmentation mask images to the `YOLO` segmentation format.
+This function takes the directory containing the binary format mask images and converts them into YOLO segmentation format.
+
+The converted masks will be saved in the specified output directory.
+
+```python
+from ultralytics.data.converter import convert_segment_masks_to_yolo_seg
+
+# For COCO dataset we have 80 classes
+convert_segment_masks_to_yolo_seg(masks_dir="path/to/masks_dir", output_dir="path/to/output_dir", classes=80)
+```
+
 ### Convert COCO into YOLO Format
 
 Use to convert COCO JSON annotations into proper YOLO format. For object detection (bounding box) datasets, `use_segments` and `use_keypoints` should both be `False`
diff --git a/docs/mkdocs_github_authors.yaml b/docs/mkdocs_github_authors.yaml
index 745070b16c..49d50284e1 100644
--- a/docs/mkdocs_github_authors.yaml
+++ b/docs/mkdocs_github_authors.yaml
@@ -1,3 +1,4 @@
+116908874+jk4e@users.noreply.github.com: jk4e
 1185102784@qq.com: Laughing-q
 130829914+IvorZhu331@users.noreply.github.com: IvorZhu331
 135830346+UltralyticsAssistant@users.noreply.github.com: UltralyticsAssistant
diff --git a/ultralytics/__init__.py b/ultralytics/__init__.py
index affb8e35c7..8eb9bc7812 100644
--- a/ultralytics/__init__.py
+++ b/ultralytics/__init__.py
@@ -1,6 +1,6 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 
-__version__ = "8.2.70"
+__version__ = "8.2.71"
 
 import os
 
diff --git a/ultralytics/data/converter.py b/ultralytics/data/converter.py
index 0ee390877d..34b359d61f 100644
--- a/ultralytics/data/converter.py
+++ b/ultralytics/data/converter.py
@@ -334,6 +334,87 @@ def convert_coco(
     LOGGER.info(f"{'LVIS' if lvis else 'COCO'} data converted successfully.\nResults saved to {save_dir.resolve()}")
 
 
+def convert_segment_masks_to_yolo_seg(masks_dir, output_dir, classes):
+    """
+    Converts a dataset of segmentation mask images to the YOLO segmentation format.
+
+    This function takes the directory containing the binary format mask images and converts them into YOLO segmentation format.
+    The converted masks are saved in the specified output directory.
+
+    Args:
+        masks_dir (str): The path to the directory where all mask images (png, jpg) are stored.
+        output_dir (str): The path to the directory where the converted YOLO segmentation masks will be stored.
+        classes (int): Total classes in the dataset i.e for COCO classes=80
+
+    Example:
+        ```python
+        from ultralytics.data.converter import convert_segment_masks_to_yolo_seg
+
+        # for coco dataset, we have 80 classes
+        convert_segment_masks_to_yolo_seg('path/to/masks_directory', 'path/to/output/directory', classes=80)
+        ```
+
+    Notes:
+        The expected directory structure for the masks is:
+
+            - masks
+                ├─ mask_image_01.png or mask_image_01.jpg
+                ├─ mask_image_02.png or mask_image_02.jpg
+                ├─ mask_image_03.png or mask_image_03.jpg
+                └─ mask_image_04.png or mask_image_04.jpg
+
+        After execution, the labels will be organized in the following structure:
+
+            - output_dir
+                ├─ mask_yolo_01.txt
+                ├─ mask_yolo_02.txt
+                ├─ mask_yolo_03.txt
+                └─ mask_yolo_04.txt
+    """
+    import os
+
+    pixel_to_class_mapping = {i + 1: i for i in range(80)}
+    for mask_filename in os.listdir(masks_dir):
+        if mask_filename.endswith(".png"):
+            mask_path = os.path.join(masks_dir, mask_filename)
+            mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)  # Read the mask image in grayscale
+            img_height, img_width = mask.shape  # Get image dimensions
+            LOGGER.info(f"Processing {mask_path} imgsz = {img_height} x {img_width}")
+
+            unique_values = np.unique(mask)  # Get unique pixel values representing different classes
+            yolo_format_data = []
+
+            for value in unique_values:
+                if value == 0:
+                    continue  # Skip background
+                class_index = pixel_to_class_mapping.get(value, -1)
+                if class_index == -1:
+                    LOGGER.warning(f"Unknown class for pixel value {value} in file {mask_filename}, skipping.")
+                    continue
+
+                # Create a binary mask for the current class and find contours
+                contours, _ = cv2.findContours(
+                    (mask == value).astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
+                )  # Find contours
+
+                for contour in contours:
+                    if len(contour) >= 3:  # YOLO requires at least 3 points for a valid segmentation
+                        contour = contour.squeeze()  # Remove single-dimensional entries
+                        yolo_format = [class_index]
+                        for point in contour:
+                            # Normalize the coordinates
+                            yolo_format.append(round(point[0] / img_width, 6))  # Rounding to 6 decimal places
+                            yolo_format.append(round(point[1] / img_height, 6))
+                        yolo_format_data.append(yolo_format)
+            # Save Ultralytics YOLO format data to file
+            output_path = os.path.join(output_dir, os.path.splitext(mask_filename)[0] + ".txt")
+            with open(output_path, "w") as file:
+                for item in yolo_format_data:
+                    line = " ".join(map(str, item))
+                    file.write(line + "\n")
+            LOGGER.info(f"Processed and stored at {output_path} imgsz = {img_height} x {img_width}")
+
+
 def convert_dota_to_yolo_obb(dota_root_path: str):
     """
     Converts DOTA dataset annotations to YOLO OBB (Oriented Bounding Box) format.
diff --git a/ultralytics/engine/trainer.py b/ultralytics/engine/trainer.py
index 48e95679cf..47063466ad 100644
--- a/ultralytics/engine/trainer.py
+++ b/ultralytics/engine/trainer.py
@@ -26,6 +26,7 @@ from ultralytics.data.utils import check_cls_dataset, check_det_dataset
 from ultralytics.nn.tasks import attempt_load_one_weight, attempt_load_weights
 from ultralytics.utils import (
     DEFAULT_CFG,
+    LOCAL_RANK,
     LOGGER,
     RANK,
     TQDM,
@@ -129,7 +130,7 @@ class BaseTrainer:
 
         # Model and Dataset
         self.model = check_model_file_from_stem(self.args.model)  # add suffix, i.e. yolov8n -> yolov8n.pt
-        with torch_distributed_zero_first(RANK):  # avoid auto-downloading dataset multiple times
+        with torch_distributed_zero_first(LOCAL_RANK):  # avoid auto-downloading dataset multiple times
             self.trainset, self.testset = self.get_dataset()
         self.ema = None
 
@@ -285,7 +286,7 @@ class BaseTrainer:
 
         # Dataloaders
         batch_size = self.batch_size // max(world_size, 1)
-        self.train_loader = self.get_dataloader(self.trainset, batch_size=batch_size, rank=RANK, mode="train")
+        self.train_loader = self.get_dataloader(self.trainset, batch_size=batch_size, rank=LOCAL_RANK, mode="train")
         if RANK in {-1, 0}:
             # Note: When training DOTA dataset, double batch size could get OOM on images with >2000 objects.
             self.test_loader = self.get_dataloader(
diff --git a/ultralytics/engine/validator.py b/ultralytics/engine/validator.py
index 8a2765c98f..4a40a88291 100644
--- a/ultralytics/engine/validator.py
+++ b/ultralytics/engine/validator.py
@@ -136,8 +136,8 @@ class BaseValidator:
             if engine:
                 self.args.batch = model.batch_size
             elif not pt and not jit:
-                self.args.batch = 1  # export.py models default to batch-size 1
-                LOGGER.info(f"Forcing batch=1 square inference (1,3,{imgsz},{imgsz}) for non-PyTorch models")
+                self.args.batch = model.metadata.get("batch", 1)  # export.py models default to batch-size 1
+                LOGGER.info(f"Setting batch={self.args.batch} input of shape ({self.args.batch}, 3, {imgsz}, {imgsz})")
 
             if str(self.args.data).split(".")[-1] in {"yaml", "yml"}:
                 self.data = check_det_dataset(self.args.data)
diff --git a/ultralytics/utils/__init__.py b/ultralytics/utils/__init__.py
index cf119579cc..82f0465eab 100644
--- a/ultralytics/utils/__init__.py
+++ b/ultralytics/utils/__init__.py
@@ -50,7 +50,7 @@ PYTHON_VERSION = platform.python_version()
 TORCH_VERSION = torch.__version__
 TORCHVISION_VERSION = importlib.metadata.version("torchvision")  # faster than importing torchvision
 HELP_MSG = """
-    Usage examples for running YOLOv8:
+    Usage examples for running Ultralytics YOLO:
 
     1. Install the ultralytics package:
 
@@ -61,25 +61,25 @@ HELP_MSG = """
         from ultralytics import YOLO
 
         # Load a model
-        model = YOLO('yolov8n.yaml')  # build a new model from scratch
+        model = YOLO("yolov8n.yaml")  # build a new model from scratch
         model = YOLO("yolov8n.pt")  # load a pretrained model (recommended for training)
 
         # Use the model
         results = model.train(data="coco8.yaml", epochs=3)  # train the model
         results = model.val()  # evaluate model performance on the validation set
-        results = model('https://ultralytics.com/images/bus.jpg')  # predict on an image
-        success = model.export(format='onnx')  # export the model to ONNX format
+        results = model("https://ultralytics.com/images/bus.jpg")  # predict on an image
+        success = model.export(format="onnx")  # export the model to ONNX format
 
     3. Use the command line interface (CLI):
 
-        YOLOv8 'yolo' CLI commands use the following syntax:
+        Ultralytics 'yolo' CLI commands use the following syntax:
 
             yolo TASK MODE ARGS
 
-            Where   TASK (optional) is one of [detect, segment, classify]
-                    MODE (required) is one of [train, val, predict, export]
-                    ARGS (optional) are any number of custom 'arg=value' pairs like 'imgsz=320' that override defaults.
-                        See all ARGS at https://docs.ultralytics.com/usage/cfg or with 'yolo cfg'
+            Where   TASK (optional) is one of [detect, segment, classify, pose, obb]
+                    MODE (required) is one of [train, val, predict, export, benchmark]
+                    ARGS (optional) are any number of custom "arg=value" pairs like "imgsz=320" that override defaults.
+                        See all ARGS at https://docs.ultralytics.com/usage/cfg or with "yolo cfg"
 
         - Train a detection model for 10 epochs with an initial learning_rate of 0.01
             yolo detect train data=coco8.yaml model=yolov8n.pt epochs=10 lr0=0.01
diff --git a/ultralytics/utils/plotting.py b/ultralytics/utils/plotting.py
index f000a8c50a..74ca940cfd 100644
--- a/ultralytics/utils/plotting.py
+++ b/ultralytics/utils/plotting.py
@@ -280,7 +280,7 @@ class Annotator:
         Args:
             box (tuple): The bounding box coordinates (x1, y1, x2, y2).
             label (str): The text label to be displayed.
-            color (tuple, optional): The background color of the rectangle (R, G, B).
+            color (tuple, optional): The background color of the rectangle (B, G, R).
             txt_color (tuple, optional): The color of the text (R, G, B).
             rotated (bool, optional): Variable used to check if task is OBB
         """
diff --git a/ultralytics/utils/torch_utils.py b/ultralytics/utils/torch_utils.py
index 624167694f..fd24403d5a 100644
--- a/ultralytics/utils/torch_utils.py
+++ b/ultralytics/utils/torch_utils.py
@@ -48,11 +48,12 @@ TORCHVISION_0_18 = check_version(TORCHVISION_VERSION, "0.18.0")
 def torch_distributed_zero_first(local_rank: int):
     """Ensures all processes in distributed training wait for the local master (rank 0) to complete a task first."""
     initialized = dist.is_available() and dist.is_initialized()
+
     if initialized and local_rank not in {-1, 0}:
         dist.barrier(device_ids=[local_rank])
     yield
     if initialized and local_rank == 0:
-        dist.barrier(device_ids=[0])
+        dist.barrier(device_ids=[local_rank])
 
 
 def smart_inference_mode():