`ultralytics 8.0.106` (#2736)

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: vyskocj <whiskey1939@seznam.cz>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: triple Mu <gpu@163.com>
Co-authored-by: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Laughing <61612323+Laughing-q@users.noreply.github.com>
pull/2669/head^2 v8.0.106
Glenn Jocher 2 years ago committed by GitHub
parent 23fc50641c
commit 4db686a315
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 34
      .github/workflows/ci.yaml
  2. 4
      .github/workflows/publish.yml
  3. 2
      docker/Dockerfile
  4. 75
      docs/datasets/classify/caltech101.md
  5. 70
      docs/datasets/classify/caltech256.md
  6. 72
      docs/datasets/classify/cifar10.md
  7. 72
      docs/datasets/classify/cifar100.md
  8. 75
      docs/datasets/classify/fashion-mnist.md
  9. 77
      docs/datasets/classify/imagenet.md
  10. 74
      docs/datasets/classify/imagenet10.md
  11. 109
      docs/datasets/classify/imagenette.md
  12. 80
      docs/datasets/classify/imagewoof.md
  13. 2
      docs/datasets/classify/mnist.md
  14. 87
      docs/datasets/pose/coco.md
  15. 4
      docs/datasets/pose/coco8-pose.md
  16. 86
      docs/datasets/segment/coco.md
  17. 4
      docs/datasets/segment/coco8-seg.md
  18. 34
      docs/help/CI.md
  19. 1
      docs/help/index.md
  20. 5
      docs/hub/inference_api.md
  21. 74
      docs/models/yolov3.md
  22. 50
      docs/models/yolov5.md
  23. 39
      docs/models/yolov8.md
  24. 18
      docs/tasks/classify.md
  25. 4
      docs/tasks/detect.md
  26. 8
      docs/tasks/pose.md
  27. 8
      docs/tasks/segment.md
  28. 2
      docs/yolov5/tutorials/train_custom_data.md
  29. 1
      mkdocs.yml
  30. 2
      ultralytics/__init__.py
  31. 2
      ultralytics/nn/modules/head.py
  32. 8
      ultralytics/vit/rtdetr/predict.py
  33. 2
      ultralytics/yolo/data/dataset.py
  34. 12
      ultralytics/yolo/engine/trainer.py
  35. 24
      ultralytics/yolo/utils/__init__.py
  36. 2
      ultralytics/yolo/utils/ops.py
  37. 45
      ultralytics/yolo/utils/patches.py
  38. 8
      ultralytics/yolo/utils/torch_utils.py
  39. 2
      ultralytics/yolo/utils/tuner.py
  40. 15
      ultralytics/yolo/v8/detect/val.py
  41. 17
      ultralytics/yolo/v8/pose/val.py

@ -158,38 +158,6 @@ jobs:
python --version
pip --version
pip list
- name: Test Detect
shell: bash # for Windows compatibility
run: |
yolo detect train data=coco8.yaml model=yolov8n.yaml epochs=1 imgsz=32
yolo detect train data=coco8.yaml model=yolov8n.pt epochs=1 imgsz=32
yolo detect val data=coco8.yaml model=runs/detect/train/weights/last.pt imgsz=32
yolo detect predict model=runs/detect/train/weights/last.pt imgsz=32 source=ultralytics/assets/bus.jpg
yolo export model=runs/detect/train/weights/last.pt imgsz=32 format=torchscript
- name: Test Segment
shell: bash # for Windows compatibility
run: |
yolo segment train data=coco8-seg.yaml model=yolov8n-seg.yaml epochs=1 imgsz=32
yolo segment train data=coco8-seg.yaml model=yolov8n-seg.pt epochs=1 imgsz=32
yolo segment val data=coco8-seg.yaml model=runs/segment/train/weights/last.pt imgsz=32
yolo segment predict model=runs/segment/train/weights/last.pt imgsz=32 source=ultralytics/assets/bus.jpg
yolo export model=runs/segment/train/weights/last.pt imgsz=32 format=torchscript
- name: Test Classify
shell: bash # for Windows compatibility
run: |
yolo classify train data=imagenet10 model=yolov8n-cls.yaml epochs=1 imgsz=32
yolo classify train data=imagenet10 model=yolov8n-cls.pt epochs=1 imgsz=32
yolo classify val data=imagenet10 model=runs/classify/train/weights/last.pt imgsz=32
yolo classify predict model=runs/classify/train/weights/last.pt imgsz=32 source=ultralytics/assets/bus.jpg
yolo export model=runs/classify/train/weights/last.pt imgsz=32 format=torchscript
- name: Test Pose
shell: bash # for Windows compatibility
run: |
yolo pose train data=coco8-pose.yaml model=yolov8n-pose.yaml epochs=1 imgsz=32
yolo pose train data=coco8-pose.yaml model=yolov8n-pose.pt epochs=1 imgsz=32
yolo pose val data=coco8-pose.yaml model=runs/pose/train/weights/last.pt imgsz=32
yolo pose predict model=runs/pose/train/weights/last.pt imgsz=32 source=ultralytics/assets/bus.jpg
yolo export model=runs/pose/train/weights/last.pt imgsz=32 format=torchscript
- name: Pytest tests
shell: bash # for Windows compatibility
run: pytest tests
@ -201,7 +169,7 @@ jobs:
steps:
- name: Check for failure and notify
if: (needs.HUB.result == 'failure' || needs.Benchmarks.result == 'failure' || needs.Tests.result == 'failure') && github.repository == 'ultralytics/ultralytics' && (github.event_name == 'schedule' || github.event_name == 'push')
uses: slackapi/slack-github-action@v1.23.0
uses: slackapi/slack-github-action@v1.24.0
with:
payload: |
{"text": "<!channel> GitHub Actions error for ${{ github.workflow }} ❌\n\n\n*Repository:* https://github.com/${{ github.repository }}\n*Action:* https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\n*Author:* ${{ github.actor }}\n*Event:* ${{ github.event_name }}\n"}

@ -94,7 +94,7 @@ jobs:
echo "PR_TITLE=$PR_TITLE" >> $GITHUB_ENV
- name: Notify on Slack (Success)
if: success() && github.event_name == 'push' && steps.check_pypi.outputs.increment == 'True'
uses: slackapi/slack-github-action@v1.23.0
uses: slackapi/slack-github-action@v1.24.0
with:
payload: |
{"text": "<!channel> GitHub Actions success for ${{ github.workflow }} ✅\n\n\n*Repository:* https://github.com/${{ github.repository }}\n*Action:* https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\n*Author:* ${{ github.actor }}\n*Event:* NEW 'ultralytics ${{ steps.check_pypi.outputs.version }}' pip package published 😃\n*Job Status:* ${{ job.status }}\n*Pull Request:* <https://github.com/${{ github.repository }}/pull/${{ env.PR_NUMBER }}> ${{ env.PR_TITLE }}\n"}
@ -102,7 +102,7 @@ jobs:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_YOLO }}
- name: Notify on Slack (Failure)
if: failure()
uses: slackapi/slack-github-action@v1.23.0
uses: slackapi/slack-github-action@v1.24.0
with:
payload: |
{"text": "<!channel> GitHub Actions error for ${{ github.workflow }} ❌\n\n\n*Repository:* https://github.com/${{ github.repository }}\n*Action:* https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\n*Author:* ${{ github.actor }}\n*Event:* ${{ github.event_name }}\n*Job Status:* ${{ job.status }}\n*Pull Request:* <https://github.com/${{ github.repository }}/pull/${{ env.PR_NUMBER }}> ${{ env.PR_TITLE }}\n"}

@ -29,7 +29,7 @@ ADD https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt /u
# Install pip packages
RUN python3 -m pip install --upgrade pip wheel
RUN pip install --no-cache -e . albumentations comet tensorboard
RUN pip install --no-cache -e . albumentations comet tensorboard thop
# Set environment variables
ENV OMP_NUM_THREADS=1

@ -1,7 +1,78 @@
---
comments: true
description: Learn about the Caltech-101 dataset, a collection of images for object recognition tasks in machine learning and computer vision algorithms.
---
# 🚧 Page Under Construction ⚒
# Caltech-101 Dataset
This page is currently under construction! 👷Please check back later for updates. 😃🔜
The [Caltech-101](https://data.caltech.edu/records/mzrjq-6wc02) dataset is a widely used dataset for object recognition tasks, containing around 9,000 images from 101 object categories. The categories were chosen to reflect a variety of real-world objects, and the images themselves were carefully selected and annotated to provide a challenging benchmark for object recognition algorithms.
## Key Features
- The Caltech-101 dataset comprises around 9,000 color images divided into 101 categories.
- The categories encompass a wide variety of objects, including animals, vehicles, household items, and people.
- The number of images per category varies, with about 40 to 800 images in each category.
- Images are of variable sizes, with most images being medium resolution.
- Caltech-101 is widely used for training and testing in the field of machine learning, particularly for object recognition tasks.
## Dataset Structure
Unlike many other datasets, the Caltech-101 dataset is not formally split into training and testing sets. Users typically create their own splits based on their specific needs. However, a common practice is to use a random subset of images for training (e.g., 30 images per category) and the remaining images for testing.
## Applications
The Caltech-101 dataset is extensively used for training and evaluating deep learning models in object recognition tasks, such as Convolutional Neural Networks (CNNs), Support Vector Machines (SVMs), and various other machine learning algorithms. Its wide variety of categories and high-quality images make it an excellent dataset for research and development in the field of machine learning and computer vision.
## Usage
To train a YOLO model on the Caltech-101 dataset for 100 epochs, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
!!! example "Train Example"
=== "Python"
```python
from ultralytics import YOLO
# Load a model
model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training)
# Train the model
model.train(data='caltech101', epochs=100, imgsz=416)
```
=== "CLI"
```bash
# Start training from a pretrained *.pt model
yolo detect train data=caltech101 model=yolov8n-cls.pt epochs=100 imgsz=416
```
## Sample Images and Annotations
The Caltech-101 dataset contains high-quality color images of various objects, providing a well-structured dataset for object recognition tasks. Here are some examples of images from the dataset:
![Dataset sample image](https://user-images.githubusercontent.com/26833433/239366386-44171121-b745-4206-9b59-a3be41e16089.png)
The example showcases the variety and complexity of the objects in the Caltech-101 dataset, emphasizing the significance of a diverse dataset for training robust object recognition models.
## Citations and Acknowledgments
If you use the Caltech-101 dataset in your research or development work, please cite the following paper:
```bibtex
@article{fei2007learning,
title={Learning generative visual models from few training examples: An incremental Bayesian approach tested on 101 object categories},
author={Fei-Fei, Li and Fergus, Rob and Perona, Pietro},
journal={Computer vision and Image understanding},
volume={106},
number={
1},
pages={59--70},
year={2007},
publisher={Elsevier}
}
```
We would like to acknowledge Li Fei-Fei, Rob Fergus, and Pietro Perona for creating and maintaining the Caltech-101 dataset as a valuable resource for the machine learning and computer vision research community. For more information about the Caltech-101 dataset and its creators, visit the [Caltech-101 dataset website](https://data.caltech.edu/records/mzrjq-6wc02).

@ -1,7 +1,73 @@
---
comments: true
description: Learn about the Caltech-256 dataset, a broad collection of images used for object classification tasks in machine learning and computer vision algorithms.
---
# 🚧 Page Under Construction ⚒
# Caltech-256 Dataset
This page is currently under construction! 👷Please check back later for updates. 😃🔜
The [Caltech-256](https://data.caltech.edu/records/nyy15-4j048) dataset is an extensive collection of images used for object classification tasks. It contains around 30,000 images divided into 257 categories (256 object categories and 1 background category). The images are carefully curated and annotated to provide a challenging and diverse benchmark for object recognition algorithms.
## Key Features
- The Caltech-256 dataset comprises around 30,000 color images divided into 257 categories.
- Each category contains a minimum of 80 images.
- The categories encompass a wide variety of real-world objects, including animals, vehicles, household items, and people.
- Images are of variable sizes and resolutions.
- Caltech-256 is widely used for training and testing in the field of machine learning, particularly for object recognition tasks.
## Dataset Structure
Like Caltech-101, the Caltech-256 dataset does not have a formal split between training and testing sets. Users typically create their own splits according to their specific needs. A common practice is to use a random subset of images for training and the remaining images for testing.
## Applications
The Caltech-256 dataset is extensively used for training and evaluating deep learning models in object recognition tasks, such as Convolutional Neural Networks (CNNs), Support Vector Machines (SVMs), and various other machine learning algorithms. Its diverse set of categories and high-quality images make it an invaluable dataset for research and development in the field of machine learning and computer vision.
## Usage
To train a YOLO model on the Caltech-256 dataset for 100 epochs, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
!!! example "Train Example"
=== "Python"
```python
from ultralytics import YOLO
# Load a model
model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training)
# Train the model
model.train(data='caltech256', epochs=100, imgsz=416)
```
=== "CLI"
```bash
# Start training from a pretrained *.pt model
yolo detect train data=caltech256 model=yolov8n-cls.pt epochs=100 imgsz=416
```
## Sample Images and Annotations
The Caltech-256 dataset contains high-quality color images of various objects, providing a comprehensive dataset for object recognition tasks. Here are some examples of images from the dataset ([credit](https://ml4a.github.io/demos/tsne_viewer.html)):
![Dataset sample image](https://user-images.githubusercontent.com/26833433/239365061-1e5f7857-b1e8-44ca-b3d7-d0befbcd33f9.jpg)
The example showcases the diversity and complexity of the objects in the Caltech-256 dataset, emphasizing the importance of a varied dataset for training robust object recognition models.
## Citations and Acknowledgments
If you use the Caltech-256 dataset in your research or development work, please cite the following paper:
```bibtex
@article{griffin2007caltech,
title={Caltech-256 object category dataset},
author={Griffin, Gregory and Holub, Alex and Perona, Pietro},
year={2007}
}
```
We would like to acknowledge Gregory Griffin, Alex Holub, and Pietro Perona for creating and maintaining the Caltech-256 dataset as a valuable resource for the machine learning and computer vision research community. For more information about the
Caltech-256 dataset and its creators, visit the [Caltech-256 dataset website](https://data.caltech.edu/records/nyy15-4j048).

@ -1,7 +1,75 @@
---
comments: true
description: Learn about the CIFAR-10 dataset, a collection of images that are commonly used to train machine learning and computer vision algorithms.
---
# 🚧 Page Under Construction ⚒
# CIFAR-10 Dataset
This page is currently under construction! 👷Please check back later for updates. 😃🔜
The [CIFAR-10](https://www.cs.toronto.edu/~kriz/cifar.html) (Canadian Institute For Advanced Research) dataset is a collection of images used widely for machine learning and computer vision algorithms. It was developed by researchers at the CIFAR institute and consists of 60,000 32x32 color images in 10 different classes.
## Key Features
- The CIFAR-10 dataset consists of 60,000 images, divided into 10 classes.
- Each class contains 6,000 images, split into 5,000 for training and 1,000 for testing.
- The images are colored and of size 32x32 pixels.
- The 10 different classes represent airplanes, cars, birds, cats, deer, dogs, frogs, horses, ships, and trucks.
- CIFAR-10 is commonly used for training and testing in the field of machine learning and computer vision.
## Dataset Structure
The CIFAR-10 dataset is split into two subsets:
1. **Training Set**: This subset contains 50,000 images used for training machine learning models.
2. **Testing Set**: This subset consists of 10,000 images used for testing and benchmarking the trained models.
## Applications
The CIFAR-10 dataset is widely used for training and evaluating deep learning models in image classification tasks, such as Convolutional Neural Networks (CNNs), Support Vector Machines (SVMs), and various other machine learning algorithms. The diversity of the dataset in terms of classes and the presence of color images make it a well-rounded dataset for research and development in the field of machine learning and computer vision.
## Usage
To train a YOLO model on the CIFAR-10 dataset for 100 epochs with an image size of 32x32, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
!!! example "Train Example"
=== "Python"
```python
from ultralytics import YOLO
# Load a model
model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training)
# Train the model
model.train(data='cifar10', epochs=100, imgsz=32)
```
=== "CLI"
```bash
# Start training from a pretrained *.pt model
yolo detect train data=cifar10 model=yolov8n-cls.pt epochs=100 imgsz=32
```
## Sample Images and Annotations
The CIFAR-10 dataset contains color images of various objects, providing a well-structured dataset for image classification tasks. Here are some examples of images from the dataset:
![Dataset sample image](https://miro.medium.com/max/1100/1*SZnidBt7CQ4Xqcag6rd8Ew.png)
The example showcases the variety and complexity of the objects in the CIFAR-10 dataset, highlighting the importance of a diverse dataset for training robust image classification models.
## Citations and Acknowledgments
If you use the CIFAR-10 dataset in your research or development work, please cite the following paper:
```bibtex
@TECHREPORT{Krizhevsky09learningmultiple,
author = {Alex Krizhevsky},
title = {Learning multiple layers of features from tiny images},
institution = {},
year = {2009}
}
```
We would like to acknowledge Alex Krizhevsky for creating and maintaining the CIFAR-10 dataset as a valuable resource for the machine learning and computer vision research community. For more information about the CIFAR-10 dataset and its creator, visit the [CIFAR-10 dataset website](https://www.cs.toronto.edu/~kriz/cifar.html).

@ -1,7 +1,75 @@
---
comments: true
description: Learn about the CIFAR-100 dataset, a collection of images that are commonly used to train machine learning and computer vision algorithms.
---
# 🚧 Page Under Construction ⚒
# CIFAR-100 Dataset
This page is currently under construction! 👷Please check back later for updates. 😃🔜
The [CIFAR-100](https://www.cs.toronto.edu/~kriz/cifar.html) (Canadian Institute For Advanced Research) dataset is a significant extension of the CIFAR-10 dataset, composed of 60,000 32x32 color images in 100 different classes. It was developed by researchers at the CIFAR institute, offering a more challenging dataset for more complex machine learning and computer vision tasks.
## Key Features
- The CIFAR-100 dataset consists of 60,000 images, divided into 100 classes.
- Each class contains 600 images, split into 500 for training and 100 for testing.
- The images are colored and of size 32x32 pixels.
- The 100 different classes are grouped into 20 coarse categories for higher level classification.
- CIFAR-100 is commonly used for training and testing in the field of machine learning and computer vision.
## Dataset Structure
The CIFAR-100 dataset is split into two subsets:
1. **Training Set**: This subset contains 50,000 images used for training machine learning models.
2. **Testing Set**: This subset consists of 10,000 images used for testing and benchmarking the trained models.
## Applications
The CIFAR-100 dataset is extensively used for training and evaluating deep learning models in image classification tasks, such as Convolutional Neural Networks (CNNs), Support Vector Machines (SVMs), and various other machine learning algorithms. The diversity of the dataset in terms of classes and the presence of color images make it a more challenging and comprehensive dataset for research and development in the field of machine learning and computer vision.
## Usage
To train a YOLO model on the CIFAR-100 dataset for 100 epochs with an image size of 32x32, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
!!! example "Train Example"
=== "Python"
```python
from ultralytics import YOLO
# Load a model
model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training)
# Train the model
model.train(data='cifar100', epochs=100, imgsz=32)
```
=== "CLI"
```bash
# Start training from a pretrained *.pt model
yolo detect train data=cifar100 model=yolov8n-cls.pt epochs=100 imgsz=32
```
## Sample Images and Annotations
The CIFAR-100 dataset contains color images of various objects, providing a well-structured dataset for image classification tasks. Here are some examples of images from the dataset:
![Dataset sample image](https://user-images.githubusercontent.com/26833433/239363319-62ebf02f-7469-4178-b066-ccac3cd334db.jpg)
The example showcases the variety and complexity of the objects in the CIFAR-100 dataset, highlighting the importance of a diverse dataset for training robust image classification models.
## Citations and Acknowledgments
If you use the CIFAR-100 dataset in your research or development work, please cite the following paper:
```bibtex
@TECHREPORT{Krizhevsky09learningmultiple,
author = {Alex Krizhevsky},
title = {Learning multiple layers of features from tiny images},
institution = {},
year = {2009}
}
```
We would like to acknowledge Alex Krizhevsky for creating and maintaining the CIFAR-100 dataset as a valuable resource for the machine learning and computer vision research community. For more information about the CIFAR-100 dataset and its creator, visit the [CIFAR-100 dataset website](https://www.cs.toronto.edu/~kriz/cifar.html).

@ -1,7 +1,78 @@
---
comments: true
description: Learn about the Fashion-MNIST dataset, a large database of Zalando's article images used for training various image processing systems and machine learning models.
---
# 🚧 Page Under Construction ⚒
# Fashion-MNIST Dataset
This page is currently under construction! 👷Please check back later for updates. 😃🔜
The [Fashion-MNIST](https://github.com/zalandoresearch/fashion-mnist) dataset is a database of Zalando's article images—consisting of a training set of 60,000 examples and a test set of 10,000 examples. Each example is a 28x28 grayscale image, associated with a label from 10 classes. Fashion-MNIST is intended to serve as a direct drop-in replacement for the original MNIST dataset for benchmarking machine learning algorithms.
## Key Features
- Fashion-MNIST contains 60,000 training images and 10,000 testing images of Zalando's article images.
- The dataset comprises grayscale images of size 28x28 pixels.
- Each pixel has a single pixel-value associated with it, indicating the lightness or darkness of that pixel, with higher numbers meaning darker. This pixel-value is an integer between 0 and 255.
- Fashion-MNIST is widely used for training and testing in the field of machine learning, especially for image classification tasks.
## Dataset Structure
The Fashion-MNIST dataset is split into two subsets:
1. **Training Set**: This subset contains 60,000 images used for training machine learning models.
2. **Testing Set**: This subset consists of 10,000 images used for testing and benchmarking the trained models.
## Labels
Each training and test example is assigned to one of the following labels:
0. T-shirt/top
1. Trouser
2. Pullover
3. Dress
4. Coat
5. Sandal
6. Shirt
7. Sneaker
8. Bag
9. Ankle boot
## Applications
The Fashion-MNIST dataset is widely used for training and evaluating deep learning models in image classification tasks, such as Convolutional Neural Networks (CNNs), Support Vector Machines (SVMs), and various other machine learning algorithms. The dataset's simple and well-structured format makes it an essential resource for researchers and practitioners in the field of machine learning and computer vision.
## Usage
To train a CNN model on the Fashion-MNIST dataset for 100 epochs with an image size of 28x28, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
!!! example "Train Example"
=== "Python"
```python
from ultralytics import YOLO
# Load a model
model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training)
# Train the model
model.train(data='fashion-mnist', epochs=100, imgsz=28)
```
=== "CLI"
```bash
# Start training from a pretrained *.pt model
yolo detect train data=fashion-mnist model=yolov8n-cls.pt epochs=100 imgsz=28
```
## Sample Images and Annotations
The Fashion-MNIST dataset contains grayscale images of Zalando's article images, providing a well-structured dataset for image classification tasks. Here are some examples of images from the dataset:
![Dataset sample image](https://user-images.githubusercontent.com/26833433/239359139-ce0a434e-9056-43e0-a306-3214f193dcce.png)
The example showcases the variety and complexity of the images in the Fashion-MNIST dataset, highlighting the importance of a diverse dataset for training robust image classification models.
## Acknowledgments
If you use the Fashion-MNIST dataset in your research or development work, please acknowledge the dataset by linking to the [GitHub repository](https://github.com/zalandoresearch/fashion-mnist). This dataset was made available by Zalando Research.

@ -1,7 +1,80 @@
---
comments: true
description: Learn about the ImageNet dataset, a large-scale database of annotated images commonly used for training deep learning models in computer vision tasks.
---
# 🚧 Page Under Construction ⚒
# ImageNet Dataset
This page is currently under construction! 👷Please check back later for updates. 😃🔜
[ImageNet](https://www.image-net.org/) is a large-scale database of annotated images designed for use in visual object recognition research. It contains over 14 million images, with each image annotated using WordNet synsets, making it one of the most extensive resources available for training deep learning models in computer vision tasks.
## Key Features
- ImageNet contains over 14 million high-resolution images spanning thousands of object categories.
- The dataset is organized according to the WordNet hierarchy, with each synset representing a category.
- ImageNet is widely used for training and benchmarking in the field of computer vision, particularly for image classification and object detection tasks.
- The annual ImageNet Large Scale Visual Recognition Challenge (ILSVRC) has been instrumental in advancing computer vision research.
## Dataset Structure
The ImageNet dataset is organized using the WordNet hierarchy. Each node in the hierarchy represents a category, and each category is described by a synset (a collection of synonymous terms). The images in ImageNet are annotated with one or more synsets, providing a rich resource for training models to recognize various objects and their relationships.
## ImageNet Large Scale Visual Recognition Challenge (ILSVRC)
The annual [ImageNet Large Scale Visual Recognition Challenge (ILSVRC)](http://image-net.org/challenges/LSVRC/) has been an important event in the field of computer vision. It has provided a platform for researchers and developers to evaluate their algorithms and models on a large-scale dataset with standardized evaluation metrics. The ILSVRC has led to significant advancements in the development of deep learning models for image classification, object detection, and other computer vision tasks.
## Applications
The ImageNet dataset is widely used for training and evaluating deep learning models in various computer vision tasks, such as image classification, object detection, and object localization. Some popular deep learning architectures, such as AlexNet, VGG, and ResNet, were developed and benchmarked using the ImageNet dataset.
## Usage
To train a deep learning model on the ImageNet dataset for 100 epochs with an image size of 224x224, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
!!! example "Train Example"
=== "Python"
```python
from ultralytics import YOLO
# Load a model
model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training)
# Train the model
model.train(data='imagenet', epochs=100, imgsz=224)
```
=== "CLI"
```bash
# Start training from a pretrained *.pt model
yolo train data=imagenet model=yolov8n-cls.pt epochs=100 imgsz=224
```
## Sample Images and Annotations
The ImageNet dataset contains high-resolution images spanning thousands of object categories, providing a diverse and extensive dataset for training and evaluating computer vision models. Here are some examples of images from the dataset:
![Dataset sample images](https://user-images.githubusercontent.com/26833433/239280348-3d8f30c7-6f05-4dda-9cfe-d62ad9faecc9.png)
The example showcases the variety and complexity of the images in the ImageNet dataset, highlighting the importance of a diverse dataset for training robust computer vision models.
## Citations and Acknowledgments
If you use the ImageNet dataset in your research or development work, please cite the following paper:
```bibtex
@article{ILSVRC15,
Author = {Olga Russakovsky
and Jia Deng and Hao Su and Jonathan Krause and Sanjeev Satheesh and Sean Ma and Zhiheng Huang and Andrej Karpathy and Aditya Khosla and Michael Bernstein and Alexander C. Berg and Li Fei-Fei},
Title = { {ImageNet Large Scale Visual Recognition Challenge}},
Year = {2015},
journal = {International Journal of Computer Vision (IJCV)},
volume={115},
number={3},
pages={211-252}
}
```
We would like to acknowledge the ImageNet team, led by Olga Russakovsky, Jia Deng, and Li Fei-Fei, for creating and maintaining the ImageNet dataset as a valuable resource for the machine learning and computer vision research community. For more information about the ImageNet dataset and its creators, visit the [ImageNet website](https://www.image-net.org/).

@ -1,7 +1,77 @@
---
comments: true
description: Learn about the ImageNet10 dataset, a compact subset of the original ImageNet dataset designed for quick testing, CI tests, and sanity checks.
---
# 🚧 Page Under Construction ⚒
# ImageNet10 Dataset
This page is currently under construction! 👷Please check back later for updates. 😃🔜
The [ImageNet10](https://github.com/ultralytics/yolov5/releases/download/v1.0/imagenet10.zip) dataset is a small-scale subset of the [ImageNet](https://www.image-net.org/) database, developed by [Ultralytics](https://ultralytics.com) and designed for CI tests, sanity checks, and fast testing of training pipelines. This dataset is composed of the first image in the training set and the first image from the validation set of the first 10 classes in ImageNet. Although significantly smaller, it retains the structure and diversity of the original ImageNet dataset.
## Key Features
- ImageNet10 is a compact version of ImageNet, with 20 images representing the first 10 classes of the original dataset.
- The dataset is organized according to the WordNet hierarchy, mirroring the structure of the full ImageNet dataset.
- It is ideally suited for CI tests, sanity checks, and rapid testing of training pipelines in computer vision tasks.
- Although not designed for model benchmarking, it can provide a quick indication of a model's basic functionality and correctness.
## Dataset Structure
The ImageNet10 dataset, like the original ImageNet, is organized using the WordNet hierarchy. Each of the 10 classes in ImageNet10 is described by a synset (a collection of synonymous terms). The images in ImageNet10 are annotated with one or more synsets, providing a compact resource for testing models to recognize various objects and their relationships.
## Applications
The ImageNet10 dataset is useful for quickly testing and debugging computer vision models and pipelines. Its small size allows for rapid iteration, making it ideal for continuous integration tests and sanity checks. It can also be used for fast preliminary testing of new models or changes to existing models before moving on to full-scale testing with the complete ImageNet dataset.
## Usage
To test a deep learning model on the ImageNet10 dataset with an image size of 224x224, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
!!! example "Test Example"
=== "Python"
```python
from ultralytics import YOLO
# Load a model
model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training)
# Train the model
model.train(data='imagenet10', epochs=5, imgsz=224)
```
=== "CLI"
```bash
# Start training from a pretrained *.pt model
yolo train data=imagenet10 model=yolov8n-cls.pt epochs=5 imgsz=224
```
## Sample Images and Annotations
The ImageNet10 dataset contains a subset of images from the original ImageNet dataset. These images are chosen to represent the first 10 classes in the dataset, providing a diverse yet compact dataset for quick testing and evaluation.
![Dataset sample images](https://user-images.githubusercontent.com/26833433/239689723-16f9b4a7-becc-4deb-b875-d3e5c28eb03b.png)
The example showcases the variety and complexity of the images in the ImageNet10 dataset, highlighting its usefulness for sanity checks and quick testing of computer vision models.
## Citations and Acknowledgments
If you use the ImageNet10 dataset in your research or development work, please cite the original ImageNet paper:
```bibtex
@article{ILSVRC15,
Author = {Olga Russakovsky
and Jia Deng and Hao Su and Jonathan Krause and Sanjeev Satheesh and Sean Ma and Zhiheng Huang and Andrej Karpathy and Aditya Khosla and Michael Bernstein and Alexander C. Berg and Li Fei-Fei},
Title = { {ImageNet Large
Scale Visual Recognition Challenge}},
Year = {2015},
journal = {International Journal of Computer Vision (IJCV)},
volume={115},
number={3},
pages={211-252}
}
```
We would like to acknowledge the ImageNet team, led by Olga Russakovsky, Jia Deng, and Li Fei-Fei, for creating and maintaining the ImageNet dataset. The ImageNet10 dataset, while a compact subset, is a valuable resource for quick testing and debugging in the machine learning and computer vision research community. For more information about the ImageNet dataset and its creators, visit the [ImageNet website](https://www.image-net.org/).

@ -1,7 +1,112 @@
---
comments: true
description: Learn about the ImageNette dataset, a subset of 10 easily classified classes from the Imagenet dataset commonly used for training various image processing systems and machine learning models.
---
# 🚧 Page Under Construction ⚒
# ImageNette Dataset
This page is currently under construction! 👷Please check back later for updates. 😃🔜
The [ImageNette](https://github.com/fastai/imagenette) dataset is a subset of the larger [Imagenet](http://www.image-net.org/) dataset, but it only includes 10 easily distinguishable classes. It was created to provide a quicker, easier-to-use version of Imagenet for software development and education.
## Key Features
- ImageNette contains images from 10 different classes such as tench, English springer, cassette player, chain saw, church, French horn, garbage truck, gas pump, golf ball, parachute.
- The dataset comprises colored images of varying dimensions.
- ImageNette is widely used for training and testing in the field of machine learning, especially for image classification tasks.
## Dataset Structure
The ImageNette dataset is split into two subsets:
1. **Training Set**: This subset contains several thousands of images used for training machine learning models. The exact number varies per class.
2. **Validation Set**: This subset consists of several hundreds of images used for validating and benchmarking the trained models. Again, the exact number varies per class.
## Applications
The ImageNette dataset is widely used for training and evaluating deep learning models in image classification tasks, such as Convolutional Neural Networks (CNNs), and various other machine learning algorithms. The dataset's straightforward format and well-chosen classes make it a handy resource for both beginner and experienced practitioners in the field of machine learning and computer vision.
## Usage
To train a model on the ImageNette dataset for 100 epochs with a standard image size of 224x224, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
!!! example "Train Example"
=== "Python"
```python
from ultralytics import YOLO
# Load a model
model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training)
# Train the model
model.train(data='imagenette', epochs=100, imgsz=224)
```
=== "CLI"
```bash
# Start training from a pretrained *.pt model
yolo detect train data=imagenette model=yolov8n-cls.pt epochs=100 imgsz=224
```
## Sample Images and Annotations
The ImageNette dataset contains colored images of various objects and scenes, providing a diverse dataset for image classification tasks. Here are some examples of images from the dataset:
![Dataset sample image](https://docs.fast.ai/22_tutorial.imagenette_files/figure-html/cell-21-output-1.png)
The example showcases the variety and complexity of the images in the ImageNette dataset, highlighting the importance of a diverse dataset for training robust image classification models.
## ImageNette160 and ImageNette320
For faster prototyping and training, the ImageNette dataset is also available in two reduced sizes: ImageNette160 and ImageNette320. These datasets maintain the same classes and structure as the full ImageNette dataset, but the images are resized to a smaller dimension. As such, these versions of the dataset are particularly useful for preliminary model testing, or when computational resources are limited.
To use these datasets, simply replace 'imagenette' with 'imagenette160' or 'imagenette320' in the training command. The following code snippets illustrate this:
!!! example "Train Example with ImageNette160"
=== "Python"
```python
from ultralytics import YOLO
# Load a model
model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training)
# Train the model with ImageNette160
model.train(data='imagenette160', epochs=100, imgsz=160)
```
=== "CLI"
```bash
# Start training from a pretrained *.pt model with ImageNette160
yolo detect train data=imagenette160 model=yolov8n-cls.pt epochs=100 imgsz=160
```
!!! example "Train Example with ImageNette320"
=== "Python"
```python
from ultralytics import YOLO
# Load a model
model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training)
# Train the model with ImageNette320
model.train(data='imagenette320', epochs=100, imgsz=320)
```
=== "CLI"
```bash
# Start training from a pretrained *.pt model with ImageNette320
yolo detect train data=imagenette320 model=yolov8n-cls.pt epochs=100 imgsz=320
```
These smaller versions of the dataset allow for rapid iterations during the development process while still providing valuable and realistic image classification tasks.
## Citations and Acknowledgments
If you use the ImageNette dataset in your research or development work, please acknowledge it appropriately. For more information about the ImageNette dataset, visit the [ImageNette dataset GitHub page](https://github.com/fastai/imagenette).

@ -1,7 +1,83 @@
---
comments: true
description: Learn about the ImageWoof dataset, a subset of the ImageNet consisting of 10 challenging-to-classify dog breed classes.
---
# 🚧 Page Under Construction ⚒
# ImageWoof Dataset
This page is currently under construction! 👷Please check back later for updates. 😃🔜
The [ImageWoof](https://github.com/fastai/imagenette) dataset is a subset of the ImageNet consisting of 10 classes that are challenging to classify, since they're all dog breeds. It was created as a more difficult task for image classification algorithms to solve, aiming at encouraging development of more advanced models.
## Key Features
- ImageWoof contains images of 10 different dog breeds: Australian terrier, Border terrier, Samoyed, Beagle, Shih-Tzu, English foxhound, Rhodesian ridgeback, Dingo, Golden retriever, and Old English sheepdog.
- The dataset provides images at various resolutions (full size, 320px, 160px), accommodating for different computational capabilities and research needs.
- It also includes a version with noisy labels, providing a more realistic scenario where labels might not always be reliable.
## Dataset Structure
The ImageWoof dataset structure is based on the dog breed classes, with each breed having its own directory of images.
## Applications
The ImageWoof dataset is widely used for training and evaluating deep learning models in image classification tasks, especially when it comes to more complex and similar classes. The dataset's challenge lies in the subtle differences between the dog breeds, pushing the limits of model's performance and generalization.
## Usage
To train a CNN model on the ImageWoof dataset for 100 epochs with an image size of 224x224, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
!!! example "Train Example"
=== "Python"
```python
from ultralytics import YOLO
# Load a model
model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training)
# Train the model
model.train(data='imagewoof', epochs=100, imgsz=224)
```
=== "CLI"
```bash
# Start training from a pretrained *.pt model
yolo detect train data=imagewoof model=yolov8n-cls.pt epochs=100 imgsz=224
```
## Dataset Variants
ImageWoof dataset comes in three different sizes to accommodate various research needs and computational capabilities:
1. **Full Size (imagewoof)**: This is the original version of the ImageWoof dataset. It contains full-sized images and is ideal for final training and performance benchmarking.
2. **Medium Size (imagewoof320)**: This version contains images resized to have a maximum edge length of 320 pixels. It's suitable for faster training without significantly sacrificing model performance.
3. **Small Size (imagewoof160)**: This version contains images resized to have a maximum edge length of 160 pixels. It's designed for rapid prototyping and experimentation where training speed is a priority.
To use these variants in your training, simply replace 'imagewoof' in the dataset argument with 'imagewoof320' or 'imagewoof160'. For example:
```python
# For medium-sized dataset
model.train(data='imagewoof320', epochs=100, imgsz=224)
# For small-sized dataset
model.train(data='imagewoof160', epochs=100, imgsz=224)
```
It's important to note that using smaller images will likely yield lower performance in terms of classification accuracy. However, it's an excellent way to iterate quickly in the early stages of model development and prototyping.
## Sample Images and Annotations
The ImageWoof dataset contains colorful images of various dog breeds, providing a challenging dataset for image classification tasks. Here are some examples of images from the dataset:
![Dataset sample image](https://user-images.githubusercontent.com/26833433/239357533-ec833254-4351-491b-8cb3-59578ea5d0b2.png)
The example showcases the subtle differences and similarities among the different dog breeds in the ImageWoof dataset, highlighting the complexity and difficulty of the classification task.
## Citations and Acknowledgments
If you use the ImageWoof dataset in your research or development work, please make sure to acknowledge the creators of the dataset by linking to the [official dataset repository](https://github.com/fastai/imagenette). As of my knowledge cutoff in September 2021, there is no official publication specifically about ImageWoof for citation.
We would like to acknowledge the FastAI team for creating and maintaining the ImageWoof dataset as a valuable resource for the machine learning and computer vision research community. For more information about the ImageWoof dataset, visit the [ImageWoof dataset repository](https://github.com/fastai/imagenette).

@ -51,7 +51,7 @@ To train a CNN model on the MNIST dataset for 100 epochs with an image size of 3
```bash
# Start training from a pretrained *.pt model
cnn detect train data=MNIST.yaml model=cnn_mnist.pt epochs=100 imgsz=28
cnn detect train data=mnist model=yolov8n-cls.pt epochs=100 imgsz=28
```
## Sample Images and Annotations

@ -1,7 +1,90 @@
---
comments: true
description: Learn about the COCO-Pose dataset, designed to encourage research on pose estimation tasks with standardized evaluation metrics.
---
# 🚧 Page Under Construction ⚒
# COCO-Pose Dataset
This page is currently under construction! 👷Please check back later for updates. 😃🔜
The [COCO-Pose](https://cocodataset.org/#keypoints-2017) dataset is a specialized version of the COCO (Common Objects in Context) dataset, designed for pose estimation tasks. It leverages the COCO Keypoints 2017 images and labels to enable the training of models like YOLO for pose estimation tasks.
![Pose sample image](https://user-images.githubusercontent.com/26833433/239691398-d62692dc-713e-4207-9908-2f6710050e5c.jpg)
## Key Features
- COCO-Pose builds upon the COCO Keypoints 2017 dataset which contains 200K images labeled with keypoints for pose estimation tasks.
- The dataset supports 17 keypoints for human figures, facilitating detailed pose estimation.
- Like COCO, it provides standardized evaluation metrics, including Object Keypoint Similarity (OKS) for pose estimation tasks, making it suitable for comparing model performance.
## Dataset Structure
The COCO-Pose dataset is split into three subsets:
1. **Train2017**: This subset contains a portion of the 118K images from the COCO dataset, annotated for training pose estimation models.
2. **Val2017**: This subset has a selection of images used for validation purposes during model training.
3. **Test2017**: This subset consists of images used for testing and benchmarking the trained models. Ground truth annotations for this subset are not publicly available, and the results are submitted to the [COCO evaluation server](https://competitions.codalab.org/competitions/5181) for performance evaluation.
## Applications
The COCO-Pose dataset is specifically used for training and evaluating deep learning models in keypoint detection and pose estimation tasks, such as OpenPose. The dataset's large number of annotated images and standardized evaluation metrics make it an essential resource for computer vision researchers and practitioners focused on pose estimation.
## Dataset YAML
A YAML (Yet Another Markup Language) file is used to define the dataset configuration. It contains information about the dataset's paths, classes, and other relevant information. In the case of the COCO-Pose dataset, the `coco-pose.yaml` file is maintained at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/datasets/coco-pose.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/datasets/coco-pose.yaml).
!!! example "ultralytics/datasets/coco-pose.yaml"
```yaml
--8<-- "ultralytics/datasets/coco-pose.yaml"
```
## Usage
To train a YOLOv8n-pose model on the COCO-Pose dataset for 100 epochs with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
!!! example "Train Example"
=== "Python"
```python
from ultralytics import YOLO
# Load a model
model = YOLO('yolov8n-pose.pt') # load a pretrained model (recommended for training)
# Train the model
model.train(data='coco-pose.yaml', epochs=100, imgsz=640)
```
=== "CLI"
```bash
# Start training from a pretrained *.pt model
yolo detect train data=coco-pose.yaml model=yolov8n.pt epochs=100 imgsz=640
```
## Sample Images and Annotations
The COCO-Pose dataset contains a diverse set of images with human figures annotated with keypoints. Here are some examples of images from the dataset, along with their corresponding annotations:
![Dataset sample image](https://user-images.githubusercontent.com/26833433/239690150-a9dc0bd0-7ad9-4b78-a30f-189ed727ea0e.jpg)
- **Mosaiced Image**: This image demonstrates a training batch composed of mosaiced dataset images. Mosaicing is a technique used during training that combines multiple images into a single image to increase the variety of objects and scenes within each training batch. This helps improve the model's ability to generalize to different object sizes, aspect ratios, and contexts.
The example showcases the variety and complexity of the images in the COCO-Pose dataset and the benefits of using mosaicing during the training process.
## Citations and Acknowledgments
If you use the COCO-Pose dataset in your research or development work, please cite the following paper:
```bibtex
@misc{lin2015microsoft,
title={Microsoft COCO: Common Objects in Context},
author={Tsung-Yi Lin and Michael Maire and Serge Belongie and Lubomir Bourdev and Ross Girshick and James Hays and Pietro Perona and Deva Ramanan and C. Lawrence Zitnick and Piotr Dollár},
year={2015},
eprint={1405.0312},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
```
We would like to acknowledge the COCO Consortium for creating and maintaining this valuable resource for the computer vision community. For more information about the COCO-Pose dataset and its creators, visit the [COCO dataset website](https://cocodataset.org/#home).

@ -28,7 +28,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur
## Usage
To train a YOLOv8n model on the COCO8-Pose dataset for 100 epochs with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
To train a YOLOv8n-pose model on the COCO8-Pose dataset for 100 epochs with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
!!! example "Train Example"
@ -38,7 +38,7 @@ To train a YOLOv8n model on the COCO8-Pose dataset for 100 epochs with an image
from ultralytics import YOLO
# Load a model
model = YOLO('yolov8n.pt') # load a pretrained model (recommended for training)
model = YOLO('yolov8n-pose.pt') # load a pretrained model (recommended for training)
# Train the model
model.train(data='coco8-pose.yaml', epochs=100, imgsz=640)

@ -1,7 +1,89 @@
---
comments: true
description: Learn about the COCO-Seg dataset, designed for simple training of YOLO models on instance segmentation tasks.
---
# 🚧 Page Under Construction ⚒
# COCO-Seg Dataset
This page is currently under construction! 👷Please check back later for updates. 😃🔜
The [COCO-Seg](https://cocodataset.org/#home) dataset, an extension of the COCO (Common Objects in Context) dataset, is specially designed to aid research in object instance segmentation. It uses the same images as COCO but introduces more detailed segmentation annotations. This dataset is a crucial resource for researchers and developers working on instance segmentation tasks, especially for training YOLO models.
## Key Features
- COCO-Seg retains the original 330K images from COCO.
- The dataset consists of the same 80 object categories found in the original COCO dataset.
- Annotations now include more detailed instance segmentation masks for each object in the images.
- COCO-Seg provides standardized evaluation metrics like mean Average Precision (mAP) for object detection, and mean Average Recall (mAR) for instance segmentation tasks, enabling effective comparison of model performance.
## Dataset Structure
The COCO-Seg dataset is partitioned into three subsets:
1. **Train2017**: This subset contains 118K images for training instance segmentation models.
2. **Val2017**: This subset includes 5K images used for validation purposes during model training.
3. **Test2017**: This subset encompasses 20K images used for testing and benchmarking the trained models. Ground truth annotations for this subset are not publicly available, and the results are submitted to the [COCO evaluation server](https://competitions.codalab.org/competitions/5181) for performance evaluation.
## Applications
COCO-Seg is widely used for training and evaluating deep learning models in instance segmentation, such as the YOLO models. The large number of annotated images, the diversity of object categories, and the standardized evaluation metrics make it an indispensable resource for computer vision researchers and practitioners.
## Dataset YAML
A YAML (Yet Another Markup Language) file is used to define the dataset configuration. It contains information about the dataset's paths, classes, and other relevant information. In the case of the COCO-Seg dataset, the `coco.yaml` file is maintained at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/datasets/coco.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/datasets/coco.yaml).
!!! example "ultralytics/datasets/coco.yaml"
```yaml
--8<-- "ultralytics/datasets/coco.yaml"
```
## Usage
To train a YOLOv8n-seg model on the COCO-Seg dataset for 100 epochs with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
!!! example "Train Example"
=== "Python"
```python
from ultralytics import YOLO
# Load a model
model = YOLO('yolov8n-seg.pt') # load a pretrained model (recommended for training)
# Train the model
model.train(data='coco-seg.yaml', epochs=100, imgsz=640)
```
=== "CLI"
```bash
# Start training from a pretrained *.pt model
yolo detect train data=coco-seg.yaml model=yolov8n.pt epochs=100 imgsz=640
```
## Sample Images and Annotations
COCO-Seg, like its predecessor COCO, contains a diverse set of images with various object categories and complex scenes. However, COCO-Seg introduces more detailed instance segmentation masks for each object in the images. Here are some examples of images from the dataset, along with their corresponding instance segmentation masks:
![Dataset sample image](https://user-images.githubusercontent.com/26833433/239690696-93fa8765-47a2-4b34-a6e5-516d0d1c725b.jpg)
- **Mosaiced Image**: This image demonstrates a training batch composed of mosaiced dataset images. Mosaicing is a technique used during training that combines multiple images into a single image to increase the variety of objects and scenes within each training batch. This aids the model's ability to generalize to different object sizes, aspect ratios, and contexts.
The example showcases the variety and complexity of the images in the COCO-Seg dataset and the benefits of using mosaicing during the training process.
## Citations and Acknowledgments
If you use the COCO-Seg dataset in your research or development work, please cite the original COCO paper and acknowledge the extension to COCO-Seg:
```bibtex
@misc{lin2015microsoft,
title={Microsoft COCO: Common Objects in Context},
author={Tsung-Yi Lin and Michael Maire and Serge Belongie and Lubomir Bourdev and Ross Girshick and James Hays and Pietro Perona and Deva Ramanan and C. Lawrence Zitnick and Piotr Dollár},
year={2015},
eprint={1405.0312},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
```
We extend our thanks to the COCO Consortium for creating and maintaining this invaluable resource for the computer vision community. For more information about the COCO dataset and its creators, visit the [COCO dataset website](https://cocodataset.org/#home).

@ -28,7 +28,7 @@ A YAML (Yet Another Markup Language) file is used to define the dataset configur
## Usage
To train a YOLOv8n model on the COCO8-Seg dataset for 100 epochs with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
To train a YOLOv8n-seg model on the COCO8-Seg dataset for 100 epochs with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
!!! example "Train Example"
@ -38,7 +38,7 @@ To train a YOLOv8n model on the COCO8-Seg dataset for 100 epochs with an image s
from ultralytics import YOLO
# Load a model
model = YOLO('yolov8n.pt') # load a pretrained model (recommended for training)
model = YOLO('yolov8n-seg.pt') # load a pretrained model (recommended for training)
# Train the model
model.train(data='coco8-seg.yaml', epochs=100, imgsz=640)

@ -0,0 +1,34 @@
---
comments: true
description: Understand all the Continuous Integration (CI) tests for Ultralytics repositories and see their statuses in a clear, concise table.
---
# Continuous Integration (CI)
Continuous Integration (CI) is an essential aspect of software development which involves integrating changes and testing them automatically. CI allows us to maintain high-quality code by catching issues early and often in the development process. At Ultralytics, we use various CI tests to ensure the quality and integrity of our codebase.
Here's a brief description of our CI tests:
- **CI:** This is our primary CI test that involves running unit tests, linting checks, and sometimes more comprehensive tests depending on the repository.
- **Docker Deployment:** This test checks the deployment of the project using Docker to ensure the Dockerfile and related scripts are working correctly.
- **Broken Links:** This test scans the codebase for any broken or dead links in our markdown or HTML files.
- **CodeQL:** CodeQL is a tool from GitHub that performs semantic analysis on our code, helping to find potential security vulnerabilities and maintain high-quality code.
- **PyPi Publishing:** This test checks if the project can be packaged and published to PyPi without any errors.
Below is the table showing the status of these CI tests for our main repositories:
| Repository | CI | Docker Deployment | Broken Links | CodeQL | PyPi and Docs Publishing |
|-----------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| [yolov3](https://github.com/ultralytics/yolov3) | [![YOLOv3 CI](https://github.com/ultralytics/yolov3/actions/workflows/ci-testing.yml/badge.svg)](https://github.com/ultralytics/yolov3/actions/workflows/ci-testing.yml) | [![Publish Docker Images](https://github.com/ultralytics/yolov3/actions/workflows/docker.yml/badge.svg)](https://github.com/ultralytics/yolov3/actions/workflows/docker.yml) | [![Check Broken links](https://github.com/ultralytics/yolov3/actions/workflows/links.yml/badge.svg)](https://github.com/ultralytics/yolov3/actions/workflows/links.yml) | [![CodeQL](https://github.com/ultralytics/yolov3/actions/workflows/codeql-analysis.yml/badge.svg)](https://github.com/ultralytics/yolov3/actions/workflows/codeql-analysis.yml) | |
| [yolov5](https://github.com/ultralytics/yolov5) | [![YOLOv5 CI](https://github.com/ultralytics/yolov5/actions/workflows/ci-testing.yml/badge.svg)](https://github.com/ultralytics/yolov5/actions/workflows/ci-testing.yml) | [![Publish Docker Images](https://github.com/ultralytics/yolov5/actions/workflows/docker.yml/badge.svg)](https://github.com/ultralytics/yolov5/actions/workflows/docker.yml) | [![Check Broken links](https://github.com/ultralytics/yolov5/actions/workflows/links.yml/badge.svg)](https://github.com/ultralytics/yolov5/actions/workflows/links.yml) | [![CodeQL](https://github.com/ultralytics/yolov5/actions/workflows/codeql-analysis.yml/badge.svg)](https://github.com/ultralytics/yolov5/actions/workflows/codeql-analysis.yml) | |
| [ultralytics](https://github.com/ultralytics/ultralytics) | [![ultralytics CI](https://github.com/ultralytics/ultralytics/actions/workflows/ci.yaml/badge.svg)](https://github.com/ultralytics/ultralytics/actions/workflows/ci.yaml) | [![Publish Docker Images](https://github.com/ultralytics/ultralytics/actions/workflows/docker.yaml/badge.svg)](https://github.com/ultralytics/ultralytics/actions/workflows/docker.yaml) | [![Check Broken links](https://github.com/ultralytics/ultralytics/actions/workflows/links.yml/badge.svg)](https://github.com/ultralytics/ultralytics/actions/workflows/links.yml) | [![CodeQL](https://github.com/ultralytics/ultralytics/actions/workflows/codeql.yaml/badge.svg)](https://github.com/ultralytics/ultralytics/actions/workflows/codeql.yaml) | [![Publish to PyPI and Deploy Docs](https://github.com/ultralytics/ultralytics/actions/workflows/publish.yml/badge.svg)](https://github.com/ultralytics/ultralytics/actions/workflows/publish.yml) |
| [hub](https://github.com/ultralytics/hub) | [![HUB CI](https://github.com/ultralytics/hub/actions/workflows/ci.yaml/badge.svg)](https://github.com/ultralytics/hub/actions/workflows/ci.yaml) | | [![Check Broken links](https://github.com/ultralytics/hub/actions/workflows/links.yml/badge.svg)](https://github.com/ultralytics/hub/actions/workflows/links.yml) | | |
| [docs](https://github.com/ultralytics/docs) | | | | | [![pages-build-deployment](https://github.com/ultralytics/docs/actions/workflows/pages/pages-build-deployment/badge.svg)](https://github.com/ultralytics/docs/actions/workflows/pages/pages-build-deployment) |
Each badge shows the status of the last run of the corresponding CI test on the `main` branch of the respective repository. If a test fails, the badge will display a "failing" status, and if it passes, it will display a "passing" status.
If you notice a test failing, it would be a great help if you could report it through a GitHub issue in the respective repository.
Remember, a successful CI test does not mean that everything is perfect. It is always recommended to manually review the code before deployment or merging changes.
Happy coding!

@ -7,6 +7,7 @@ Welcome to the Ultralytics Help page! We are committed to providing you with com
- [Frequently Asked Questions (FAQ)](FAQ.md): Find answers to common questions and issues faced by users and contributors of Ultralytics YOLO repositories.
- [Contributing Guide](contributing.md): Learn the best practices for submitting pull requests, reporting bugs, and contributing to the development of our repositories.
- [Continuous Integration (CI) Guide](CI.md): Understand the CI tests we perform for each Ultralytics repository and see their current statuses.
- [Contributor License Agreement (CLA)](CLA.md): Familiarize yourself with our CLA to understand the terms and conditions for contributing to Ultralytics projects.
- [Minimum Reproducible Example (MRE) Guide](minimum_reproducible_example.md): Understand how to create an MRE when submitting bug reports to ensure that our team can quickly and efficiently address the issue.
- [Code of Conduct](code_of_conduct.md): Learn about our community guidelines and expectations to ensure a welcoming and inclusive environment for all participants.

@ -1,11 +1,8 @@
---
comments: true
description: Explore Ultralytics YOLOv8 Inference API for efficient object detection. Check out our Python and CLI examples to streamline your image analysis.
---
# 🚧 Page Under Construction ⚒
This page is currently under construction! 👷Please check back later for updates. 😃🔜
# YOLO Inference API
The YOLO Inference API allows you to access the YOLOv8 object detection capabilities via a RESTful API. This enables you to run object detection on images without the need to install and set up the YOLOv8 environment locally.

@ -1,7 +1,77 @@
---
comments: true
description: YOLOv3, YOLOv3-Ultralytics and YOLOv3u by Ultralytics explained. Learn the evolution of these models and their specifications.
---
# 🚧Page Under Construction ⚒
# YOLOv3, YOLOv3-Ultralytics, and YOLOv3u
This page is currently under construction!👷Please check back later for updates. 😃🔜
## Overview
This document presents an overview of three closely related object detection models, namely [YOLOv3](https://pjreddie.com/darknet/yolo/), [YOLOv3-Ultralytics](https://github.com/ultralytics/yolov3), and [YOLOv3u](https://github.com/ultralytics/ultralytics).
1. **YOLOv3:** This is the third version of the You Only Look Once (YOLO) object detection algorithm. Originally developed by Joseph Redmon, YOLOv3 improved on its predecessors by introducing features such as multiscale predictions and three different sizes of detection kernels.
2. **YOLOv3-Ultralytics:** This is Ultralytics' implementation of the YOLOv3 model. It reproduces the original YOLOv3 architecture and offers additional functionalities, such as support for more pre-trained models and easier customization options.
3. **YOLOv3u:** This is an updated version of YOLOv3-Ultralytics that incorporates the anchor-free, objectness-free split head used in YOLOv8 models. YOLOv3u maintains the same backbone and neck architecture as YOLOv3 but with the updated detection head from YOLOv8.
## Key Features
- **YOLOv3:** Introduced the use of three different scales for detection, leveraging three different sizes of detection kernels: 13x13, 26x26, and 52x52. This significantly improved detection accuracy for objects of different sizes. Additionally, YOLOv3 added features such as multi-label predictions for each bounding box and a better feature extractor network.
- **YOLOv3-Ultralytics:** Ultralytics' implementation of YOLOv3 provides the same performance as the original model but comes with added support for more pre-trained models, additional training methods, and easier customization options. This makes it more versatile and user-friendly for practical applications.
- **YOLOv3u:** This updated model incorporates the anchor-free, objectness-free split head from YOLOv8. By eliminating the need for pre-defined anchor boxes and objectness scores, this detection head design can improve the model's ability to detect objects of varying sizes and shapes. This makes YOLOv3u more robust and accurate for object detection tasks.
## Supported Tasks
YOLOv3, YOLOv3-Ultralytics, and YOLOv3u all support the following tasks:
- Object Detection
## Supported Modes
All three models support the following modes:
- Inference
- Validation
- Training
- Export
## Performance
Below is a comparison of the performance of the three models. The performance is measured in terms of the Mean Average Precision (mAP) on the COCO dataset:
TODO
## Usage
You can use these models for object detection tasks using the Ultralytics YOLOv3 repository. The following is a sample code snippet showing how to use the YOLOv3u model for inference:
```python
from ultralytics import YOLO
# Load the model
model = YOLO('yolov3.pt') # load a pretrained model
# Perform inference
results = model('image.jpg')
# Print the results
results.print()
```
## Citations and Acknowledgments
If you use YOLOv3 in your research, please cite the original YOLO papers and the Ultralytics YOLOv3 repository:
```bibtex
@article{redmon2018yolov3,
title={YOLOv3: An Incremental Improvement},
author={Redmon, Joseph and Farhadi, Ali},
journal={arXiv preprint arXiv:1804.02767},
year={2018}
}
```
Thank you to Joseph Redmon and Ali Farhadi for developing the original YOLOv3.

@ -1,19 +1,21 @@
---
comments: true
description: Detect objects faster and more accurately using Ultralytics YOLOv5u. Find pre-trained models for each task, including Inference, Validation and Training.
description: YOLOv5u by Ultralytics explained. Discover the evolution of this model and its key specifications. Experience faster and more accurate object detection.
---
# YOLOv5u
## Overview
YOLOv5u is an updated version of YOLOv5 that incorporates the anchor-free split Ultralytics head used in the YOLOv8 models. It retains the same backbone and neck architecture as YOLOv5 but offers improved accuracy-speed tradeoff for object detection tasks.
YOLOv5u is an enhanced version of the [YOLOv5](https://github.com/ultralytics/yolov5) object detection model from Ultralytics. This iteration incorporates the anchor-free, objectness-free split head that is featured in the [YOLOv8](./yolov8.md) models. Although it maintains the same backbone and neck architecture as YOLOv5, YOLOv5u provides an improved accuracy-speed tradeoff for object detection tasks, making it a robust choice for numerous applications.
## Key Features
- **Anchor-free Split Ultralytics Head:** YOLOv5u replaces the traditional anchor-based detection head with an anchor-free split Ultralytics head, resulting in improved performance.
- **Optimized Accuracy-Speed Tradeoff:** The updated model offers a better balance between accuracy and speed, making it more suitable for a wider range of applications.
- **Variety of Pre-trained Models:** YOLOv5u offers a range of pre-trained models tailored for various tasks, including Inference, Validation, and Training.
- **Anchor-free Split Ultralytics Head:** YOLOv5u replaces the conventional anchor-based detection head with an anchor-free split Ultralytics head, boosting performance in object detection tasks.
- **Optimized Accuracy-Speed Tradeoff:** By delivering a better balance between accuracy and speed, YOLOv5u is suitable for a diverse range of real-time applications, from autonomous driving to video surveillance.
- **Variety of Pre-trained Models:** YOLOv5u includes numerous pre-trained models for tasks like Inference, Validation, and Training, providing the flexibility to tackle various object detection challenges.
## Supported Tasks
@ -45,4 +47,40 @@ YOLOv5u is an updated version of YOLOv5 that incorporates the anchor-free split
| [YOLOv5s6u](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5s6u.pt) | 1280 | 48.6 | - | - | 15.3 | 24.6 |
| [YOLOv5m6u](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5m6u.pt) | 1280 | 53.6 | - | - | 41.2 | 65.7 |
| [YOLOv5l6u](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5l6u.pt) | 1280 | 55.7 | - | - | 86.1 | 137.4 |
| [YOLOv5x6u](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5x6u.pt) | 1280 | 56.8 | - | - | 155.4 | 250.7 |
| [YOLOv5x6u](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov5x6u.pt) | 1280 | 56.8 | - | - | 155.4 | 250.7 |
## Usage
You can use YOLOv5u for object detection tasks using the Ultralytics repository. The following is a sample code snippet showing how to use YOLOv5u model for inference:
```python
from ultralytics import YOLO
# Load the model
model = YOLO('yolov5n.pt') # load a pretrained model
# Perform inference
results = model('image.jpg')
# Print the results
results.print()
```
## Citations and Acknowledgments
If you use YOLOv5 or YOLOv5u in your research, please cite the Ultralytics YOLOv5 repository as follows:
```bibtex
@software{yolov5,
title = {YOLOv5 by Ultralytics},
author = {Glenn Jocher},
year = {2020},
version = {7.0},
license = {AGPL-3.0},
url = {https://github.com/ultralytics/yolov5},
doi = {10.5281/zenodo.3908559},
orcid = {0000-0001-5950-6979}
}
```
Special thanks to Glenn Jocher and the Ultralytics team for their work on developing and maintaining the YOLOv5 and YOLOv5u models.

@ -74,4 +74,41 @@ YOLOv8 is the latest iteration in the YOLO series of real-time object detectors,
| [YOLOv8m-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-pose.pt) | 640 | 65.0 | 88.8 | 456.3 | 2.00 | 26.4 | 81.0 |
| [YOLOv8l-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-pose.pt) | 640 | 67.6 | 90.0 | 784.5 | 2.59 | 44.4 | 168.6 |
| [YOLOv8x-pose](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose.pt) | 640 | 69.2 | 90.2 | 1607.1 | 3.73 | 69.4 | 263.2 |
| [YOLOv8x-pose-p6](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose-p6.pt) | 1280 | 71.6 | 91.2 | 4088.7 | 10.04 | 99.1 | 1066.4 |
| [YOLOv8x-pose-p6](https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-pose-p6.pt) | 1280 | 71.6 | 91.2 | 4088.7 | 10.04 | 99.1 | 1066.4 |
## Usage
You can use YOLOv8 for object detection tasks using the Ultralytics pip package. The following is a sample code snippet showing how to use YOLOv8 models for inference:
```python
from ultralytics import YOLO
# Load the model
model = YOLO('yolov8n.pt') # load a pretrained model
# Perform inference
results = model('image.jpg')
# Print the results
results.print()
```
## Citation
If you use the YOLOv8 model or any other software from this repository in your work, please cite it using the following format:
```bibtex
@software{yolov8_ultralytics,
author = {Glenn Jocher and Ayush Chaurasia and Jing Qiu},
title = {YOLO by Ultralytics},
version = {8.0.0},
year = {2023},
url = {https://github.com/ultralytics/ultralytics},
orcid = {0000-0001-5950-6979, 0000-0002-7603-6750, 0000-0003-3783-7069},
license = {AGPL-3.0}
}
```
Please note that the DOI is pending and will be added to the citation once it is available. The usage of the software is in accordance with the AGPL-3.0 license.

@ -76,21 +76,7 @@ see the [Configuration](../usage/cfg.md) page.
### Dataset format
The YOLO classification dataset format is same as the torchvision format. Each class of images has its own folder and you have to simply pass the path of the dataset folder, i.e, `yolo classify train data="path/to/dataset"`
```
dataset/
├── train/
├──── class1/
├──── class2/
├──── class3/
├──── ...
├── val/
├──── class1/
├──── class2/
├──── class3/
├──── ...
```
YOLO classification dataset format can be found in detail in the [Dataset Guide](../datasets/classify/index.md).
## Val
@ -190,4 +176,4 @@ i.e. `yolo predict model=yolov8n-cls.onnx`. Usage examples are shown for your mo
| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-cls_web_model/` | ✅ | `imgsz` |
| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-cls_paddle_model/` | ✅ | `imgsz` |
See full `export` details in the [Export](https://docs.ultralytics.com/modes/export/) page.
See full `export` details in the [Export](https://docs.ultralytics.com/modes/export/) page.

@ -67,7 +67,7 @@ Train YOLOv8n on the COCO128 dataset for 100 epochs at image size 640. For a ful
### Dataset format
YOLO detection dataset format can be found in detail in the [Dataset Guide](../yolov5/tutorials/train_custom_data.md). To convert your existing dataset from other formats( like COCO, VOC etc.) to YOLO format, please use [json2yolo tool](https://github.com/ultralytics/JSON2YOLO) by Ultralytics.
YOLO detection dataset format can be found in detail in the [Dataset Guide](../datasets/detect/index.md). To convert your existing dataset from other formats( like COCO etc.) to YOLO format, please use [json2yolo tool](https://github.com/ultralytics/JSON2YOLO) by Ultralytics.
## Val
@ -167,4 +167,4 @@ Available YOLOv8 export formats are in the table below. You can predict or valid
| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n_web_model/` | ✅ | `imgsz` |
| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n_paddle_model/` | ✅ | `imgsz` |
See full `export` details in the [Export](https://docs.ultralytics.com/modes/export/) page.
See full `export` details in the [Export](https://docs.ultralytics.com/modes/export/) page.

@ -8,7 +8,7 @@ to as keypoints. The keypoints can represent various parts of the object such as
features. The locations of the keypoints are usually represented as a set of 2D `[x, y]` or 3D `[x, y, visible]`
coordinates.
<img width="1024" src="https://user-images.githubusercontent.com/26833433/212094133-6bb8c21c-3d47-41df-a512-81c5931054ae.png">
<img width="1024" src="https://user-images.githubusercontent.com/26833433/239691398-d62692dc-713e-4207-9908-2f6710050e5c.jpg">
The output of a pose estimation model is a set of points that represent the keypoints on an object in the image, usually
along with the confidence scores for each point. Pose estimation is a good choice when you need to identify specific
@ -76,6 +76,10 @@ Train a YOLOv8-pose model on the COCO128-pose dataset.
yolo pose train data=coco8-pose.yaml model=yolov8n-pose.yaml pretrained=yolov8n-pose.pt epochs=100 imgsz=640
```
### Dataset format
YOLO pose dataset format can be found in detail in the [Dataset Guide](../datasets/pose/index.md). To convert your existing dataset from other formats( like COCO etc.) to YOLO format, please use [json2yolo tool](https://github.com/ultralytics/JSON2YOLO) by Ultralytics.
## Val
Validate trained YOLOv8n-pose model accuracy on the COCO128-pose dataset. No argument need to passed as the `model`
@ -177,4 +181,4 @@ i.e. `yolo predict model=yolov8n-pose.onnx`. Usage examples are shown for your m
| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-pose_web_model/` | ✅ | `imgsz` |
| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-pose_paddle_model/` | ✅ | `imgsz` |
See full `export` details in the [Export](https://docs.ultralytics.com/modes/export/) page.
See full `export` details in the [Export](https://docs.ultralytics.com/modes/export/) page.

@ -75,11 +75,7 @@ arguments see the [Configuration](../usage/cfg.md) page.
### Dataset format
YOLO segmentation dataset label format extends detection format with segment points.
`cls x1 y1 x2 y2 p1 p2 ... pn`
To convert your existing dataset from other formats( like COCO, VOC etc.) to YOLO format, please use [json2yolo tool](https://github.com/ultralytics/JSON2YOLO) by Ultralytics.
YOLO segmentation dataset format can be found in detail in the [Dataset Guide](../datasets/segment/index.md). To convert your existing dataset from other formats( like COCO etc.) to YOLO format, please use [json2yolo tool](https://github.com/ultralytics/JSON2YOLO) by Ultralytics.
## Val
@ -185,4 +181,4 @@ i.e. `yolo predict model=yolov8n-seg.onnx`. Usage examples are shown for your mo
| [TF.js](https://www.tensorflow.org/js) | `tfjs` | `yolov8n-seg_web_model/` | ✅ | `imgsz` |
| [PaddlePaddle](https://github.com/PaddlePaddle) | `paddle` | `yolov8n-seg_paddle_model/` | ✅ | `imgsz` |
See full `export` details in the [Export](https://docs.ultralytics.com/modes/export/) page.
See full `export` details in the [Export](https://docs.ultralytics.com/modes/export/) page.

@ -29,7 +29,7 @@ Creating a custom model to detect your objects is an iterative process of collec
YOLOv5 models must be trained on labelled data in order to learn classes of objects in that data. There are two options for creating your dataset before you start training:
<details open markdown>
<details markdown>
<summary>Use <a href="https://roboflow.com/?ref=ultralytics">Roboflow</a> to create your dataset in YOLO format</summary>
### 1.1 Collect Images

@ -335,6 +335,7 @@ nav:
- Help: help/index.md
- Frequently Asked Questions (FAQ): help/FAQ.md
- Contributing Guide: help/contributing.md
- Continuous Integration (CI) Guide: help/CI.md
- Contributor License Agreement (CLA): help/CLA.md
- Minimum Reproducible Example (MRE) Guide: help/minimum_reproducible_example.md
- Code of Conduct: help/code_of_conduct.md

@ -1,6 +1,6 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
__version__ = '8.0.105'
__version__ = '8.0.106'
from ultralytics.hub import start
from ultralytics.vit.rtdetr import RTDETR

@ -127,7 +127,7 @@ class Pose(Detect):
y = kpts.view(bs, *self.kpt_shape, -1)
a = (y[:, :, :2] * 2.0 + (self.anchors - 0.5)) * self.strides
if ndim == 3:
a = torch.cat((a, y[:, :, 1:2].sigmoid()), 2)
a = torch.cat((a, y[:, :, 2:3].sigmoid()), 2)
return a.view(bs, self.nk, -1)
else:
y = kpts.clone()

@ -17,9 +17,11 @@ class RTDETRPredictor(BasePredictor):
results = []
for i, bbox in enumerate(bboxes): # (300, 4)
bbox = ops.xywh2xyxy(bbox)
score, cls = scores[i].max(-1) # (300, )
idx = score > self.args.conf
pred = torch.cat([bbox, score[..., None], cls[..., None]], dim=-1)[idx] # filter
score, cls = scores[i].max(-1, keepdim=True) # (300, 1)
idx = score.squeeze(-1) > self.args.conf # (300, )
if self.args.classes is not None:
idx = (cls == torch.tensor(self.args.classes, device=cls.device)).any(1) & idx
pred = torch.cat([bbox, score, cls], dim=-1)[idx] # filter
orig_img = orig_imgs[i] if isinstance(orig_imgs, list) else orig_imgs
oh, ow = orig_img.shape[:2]
if not isinstance(orig_imgs, torch.Tensor):

@ -206,8 +206,6 @@ class ClassificationDataset(torchvision.datasets.ImageFolder):
Args:
root (str): Dataset path.
transform (callable, optional): torchvision transforms, used by default.
album_transform (callable, optional): Albumentations transforms, used if installed.
Attributes:
cache_ram (bool): True if images should be cached in RAM, False otherwise.

@ -414,12 +414,18 @@ class BaseTrainer:
'date': datetime.now().isoformat(),
'version': __version__}
# Use dill (if exists) to serialize the lambda functions where pickle does not do this
try:
import dill as pickle
except ImportError:
import pickle
# Save last, best and delete
torch.save(ckpt, self.last)
torch.save(ckpt, self.last, pickle_module=pickle)
if self.best_fitness == self.fitness:
torch.save(ckpt, self.best)
torch.save(ckpt, self.best, pickle_module=pickle)
if (self.epoch > 0) and (self.save_period > 0) and (self.epoch % self.save_period == 0):
torch.save(ckpt, self.wdir / f'epoch{self.epoch}.pt')
torch.save(ckpt, self.wdir / f'epoch{self.epoch}.pt', pickle_module=pickle)
del ckpt
@staticmethod

@ -754,25 +754,9 @@ ENVIRONMENT = 'Colab' if is_colab() else 'Kaggle' if is_kaggle() else 'Jupyter'
TESTS_RUNNING = is_pytest_running() or is_github_actions_ci()
set_sentry()
# OpenCV Multilanguage-friendly functions ------------------------------------------------------------------------------
imshow_ = cv2.imshow # copy to avoid recursion errors
def imread(filename, flags=cv2.IMREAD_COLOR):
return cv2.imdecode(np.fromfile(filename, np.uint8), flags)
def imwrite(filename, img):
try:
cv2.imencode(Path(filename).suffix, img)[1].tofile(filename)
return True
except Exception:
return False
def imshow(path, im):
imshow_(path.encode('unicode_escape').decode(), im)
# Apply monkey patches if the script is being run from within the parent directory of the script's location
from .patches import imread, imshow, imwrite
# torch.save = torch_save
if Path(inspect.stack()[0].filename).parent.parent.as_posix() in inspect.stack()[-1].filename:
cv2.imread, cv2.imwrite, cv2.imshow = imread, imwrite, imshow # redefine
cv2.imread, cv2.imwrite, cv2.imshow = imread, imwrite, imshow

@ -1,3 +1,5 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
import contextlib
import math
import re

@ -0,0 +1,45 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
"""
Monkey patches to update/extend functionality of existing functions
"""
from pathlib import Path
import cv2
import numpy as np
import torch
# OpenCV Multilanguage-friendly functions ------------------------------------------------------------------------------
_imshow = cv2.imshow # copy to avoid recursion errors
def imread(filename, flags=cv2.IMREAD_COLOR):
return cv2.imdecode(np.fromfile(filename, np.uint8), flags)
def imwrite(filename, img):
try:
cv2.imencode(Path(filename).suffix, img)[1].tofile(filename)
return True
except Exception:
return False
def imshow(path, im):
_imshow(path.encode('unicode_escape').decode(), im)
# PyTorch functions ----------------------------------------------------------------------------------------------------
_torch_save = torch.save # copy to avoid recursion errors
def torch_save(*args, **kwargs):
# Use dill (if exists) to serialize the lambda functions where pickle does not do this
try:
import dill as pickle
except ImportError:
import pickle
if 'pickle_module' not in kwargs:
kwargs['pickle_module'] = pickle
return _torch_save(*args, **kwargs)

@ -341,6 +341,12 @@ def strip_optimizer(f: Union[str, Path] = 'best.pt', s: str = '') -> None:
for f in Path('/Users/glennjocher/Downloads/weights').rglob('*.pt'):
strip_optimizer(f)
"""
# Use dill (if exists) to serialize the lambda functions where pickle does not do this
try:
import dill as pickle
except ImportError:
import pickle
x = torch.load(f, map_location=torch.device('cpu'))
args = {**DEFAULT_CFG_DICT, **x['train_args']} # combine model args with default args, preferring model args
if x.get('ema'):
@ -353,7 +359,7 @@ def strip_optimizer(f: Union[str, Path] = 'best.pt', s: str = '') -> None:
p.requires_grad = False
x['train_args'] = {k: v for k, v in args.items() if k in DEFAULT_CFG_KEYS} # strip non-default keys
# x['model'].args = x['train_args']
torch.save(x, s or f)
torch.save(x, s or f, pickle_module=pickle)
mb = os.path.getsize(s or f) / 1E6 # filesize
LOGGER.info(f"Optimizer stripped from {f},{f' saved as {s},' if s else ''} {mb:.1f}MB")

@ -1,3 +1,5 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
from ultralytics.yolo.utils import LOGGER
try:

@ -62,14 +62,13 @@ class DetectionValidator(BaseValidator):
def postprocess(self, preds):
"""Apply Non-maximum suppression to prediction outputs."""
preds = ops.non_max_suppression(preds,
self.args.conf,
self.args.iou,
labels=self.lb,
multi_label=True,
agnostic=self.args.single_cls,
max_det=self.args.max_det)
return preds
return ops.non_max_suppression(preds,
self.args.conf,
self.args.iou,
labels=self.lb,
multi_label=True,
agnostic=self.args.single_cls,
max_det=self.args.max_det)
def update_metrics(self, preds, batch):
"""Metrics."""

@ -33,15 +33,14 @@ class PoseValidator(DetectionValidator):
def postprocess(self, preds):
"""Apply non-maximum suppression and return detections with high confidence scores."""
preds = ops.non_max_suppression(preds,
self.args.conf,
self.args.iou,
labels=self.lb,
multi_label=True,
agnostic=self.args.single_cls,
max_det=self.args.max_det,
nc=self.nc)
return preds
return ops.non_max_suppression(preds,
self.args.conf,
self.args.iou,
labels=self.lb,
multi_label=True,
agnostic=self.args.single_cls,
max_det=self.args.max_det,
nc=self.nc)
def init_metrics(self, model):
"""Initiate pose estimation metrics for YOLO model."""

Loading…
Cancel
Save