Merge branch 'main' into afpn

afpn
Glenn Jocher 1 year ago committed by GitHub
commit 8fce7bfc29
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 30
      docs/datasets/classify/caltech101.md
  2. 20
      docs/datasets/classify/caltech256.md
  3. 22
      docs/datasets/classify/cifar10.md
  4. 22
      docs/datasets/classify/cifar100.md
  5. 2
      docs/datasets/classify/fashion-mnist.md
  6. 28
      docs/datasets/classify/imagenet.md
  7. 28
      docs/datasets/classify/imagenet10.md
  8. 6
      docs/datasets/classify/imagenette.md
  9. 4
      docs/datasets/classify/imagewoof.md
  10. 2
      docs/datasets/classify/index.md
  11. 24
      docs/datasets/classify/mnist.md
  12. 25
      docs/datasets/detect/argoverse.md
  13. 26
      docs/datasets/detect/coco.md
  14. 26
      docs/datasets/detect/coco8.md
  15. 22
      docs/datasets/detect/globalwheat2020.md
  16. 2
      docs/datasets/detect/index.md
  17. 24
      docs/datasets/detect/objects365.md
  18. 22
      docs/datasets/detect/open-images-v7.md
  19. 22
      docs/datasets/detect/sku-110k.md
  20. 28
      docs/datasets/detect/visdrone.md
  21. 26
      docs/datasets/detect/voc.md
  22. 26
      docs/datasets/detect/xview.md
  23. 129
      docs/datasets/obb/dota-v2.md
  24. 80
      docs/datasets/obb/index.md
  25. 26
      docs/datasets/pose/coco.md
  26. 26
      docs/datasets/pose/coco8-pose.md
  27. 2
      docs/datasets/pose/index.md
  28. 26
      docs/datasets/segment/coco.md
  29. 26
      docs/datasets/segment/coco8-seg.md
  30. 2
      docs/datasets/segment/index.md
  31. 2
      docs/guides/kfold-cross-validation.md
  32. 2
      docs/models/yolov3.md
  33. 8
      docs/modes/train.md
  34. 8
      docs/reference/data/converter.md
  35. 8
      docs/reference/utils/ops.md
  36. 2
      docs/tasks/classify.md
  37. 2
      docs/tasks/detect.md
  38. 2
      docs/tasks/pose.md
  39. 2
      docs/tasks/segment.md
  40. 6
      docs/usage/python.md
  41. 3
      mkdocs.yml
  42. 2
      ultralytics/__init__.py
  43. 37
      ultralytics/cfg/datasets/DOTAv2.yaml
  44. 112
      ultralytics/data/converter.py
  45. 1
      ultralytics/data/dataset.py
  46. 46
      ultralytics/data/utils.py
  47. 14
      ultralytics/engine/results.py
  48. 112
      ultralytics/utils/ops.py

@ -39,7 +39,7 @@ To train a YOLO model on the Caltech-101 dataset for 100 epochs, you can use the
model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training)
# Train the model
model.train(data='caltech101', epochs=100, imgsz=416)
results = model.train(data='caltech101', epochs=100, imgsz=416)
```
=== "CLI"
@ -61,17 +61,21 @@ The example showcases the variety and complexity of the objects in the Caltech-1
If you use the Caltech-101 dataset in your research or development work, please cite the following paper:
```bibtex
@article{fei2007learning,
title={Learning generative visual models from few training examples: An incremental Bayesian approach tested on 101 object categories},
author={Fei-Fei, Li and Fergus, Rob and Perona, Pietro},
journal={Computer vision and Image understanding},
volume={106},
number={1},
pages={59--70},
year={2007},
publisher={Elsevier}
}
```
!!! note ""
=== "BibTeX"
```bibtex
@article{fei2007learning,
title={Learning generative visual models from few training examples: An incremental Bayesian approach tested on 101 object categories},
author={Fei-Fei, Li and Fergus, Rob and Perona, Pietro},
journal={Computer vision and Image understanding},
volume={106},
number={1},
pages={59--70},
year={2007},
publisher={Elsevier}
}
```
We would like to acknowledge Li Fei-Fei, Rob Fergus, and Pietro Perona for creating and maintaining the Caltech-101 dataset as a valuable resource for the machine learning and computer vision research community. For more information about the Caltech-101 dataset and its creators, visit the [Caltech-101 dataset website](https://data.caltech.edu/records/mzrjq-6wc02).

@ -39,7 +39,7 @@ To train a YOLO model on the Caltech-256 dataset for 100 epochs, you can use the
model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training)
# Train the model
model.train(data='caltech256', epochs=100, imgsz=416)
results = model.train(data='caltech256', epochs=100, imgsz=416)
```
=== "CLI"
@ -61,13 +61,17 @@ The example showcases the diversity and complexity of the objects in the Caltech
If you use the Caltech-256 dataset in your research or development work, please cite the following paper:
```bibtex
@article{griffin2007caltech,
title={Caltech-256 object category dataset},
author={Griffin, Gregory and Holub, Alex and Perona, Pietro},
year={2007}
}
```
!!! note ""
=== "BibTeX"
```bibtex
@article{griffin2007caltech,
title={Caltech-256 object category dataset},
author={Griffin, Gregory and Holub, Alex and Perona, Pietro},
year={2007}
}
```
We would like to acknowledge Gregory Griffin, Alex Holub, and Pietro Perona for creating and maintaining the Caltech-256 dataset as a valuable resource for the machine learning and computer vision research community. For more information about the

@ -42,7 +42,7 @@ To train a YOLO model on the CIFAR-10 dataset for 100 epochs with an image size
model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training)
# Train the model
model.train(data='cifar10', epochs=100, imgsz=32)
results = model.train(data='cifar10', epochs=100, imgsz=32)
```
=== "CLI"
@ -64,13 +64,17 @@ The example showcases the variety and complexity of the objects in the CIFAR-10
If you use the CIFAR-10 dataset in your research or development work, please cite the following paper:
```bibtex
@TECHREPORT{Krizhevsky09learningmultiple,
author={Alex Krizhevsky},
title={Learning multiple layers of features from tiny images},
institution={},
year={2009}
}
```
!!! note ""
=== "BibTeX"
```bibtex
@TECHREPORT{Krizhevsky09learningmultiple,
author={Alex Krizhevsky},
title={Learning multiple layers of features from tiny images},
institution={},
year={2009}
}
```
We would like to acknowledge Alex Krizhevsky for creating and maintaining the CIFAR-10 dataset as a valuable resource for the machine learning and computer vision research community. For more information about the CIFAR-10 dataset and its creator, visit the [CIFAR-10 dataset website](https://www.cs.toronto.edu/~kriz/cifar.html).

@ -42,7 +42,7 @@ To train a YOLO model on the CIFAR-100 dataset for 100 epochs with an image size
model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training)
# Train the model
model.train(data='cifar100', epochs=100, imgsz=32)
results = model.train(data='cifar100', epochs=100, imgsz=32)
```
=== "CLI"
@ -64,13 +64,17 @@ The example showcases the variety and complexity of the objects in the CIFAR-100
If you use the CIFAR-100 dataset in your research or development work, please cite the following paper:
```bibtex
@TECHREPORT{Krizhevsky09learningmultiple,
author={Alex Krizhevsky},
title={Learning multiple layers of features from tiny images},
institution={},
year={2009}
}
```
!!! note ""
=== "BibTeX"
```bibtex
@TECHREPORT{Krizhevsky09learningmultiple,
author={Alex Krizhevsky},
title={Learning multiple layers of features from tiny images},
institution={},
year={2009}
}
```
We would like to acknowledge Alex Krizhevsky for creating and maintaining the CIFAR-100 dataset as a valuable resource for the machine learning and computer vision research community. For more information about the CIFAR-100 dataset and its creator, visit the [CIFAR-100 dataset website](https://www.cs.toronto.edu/~kriz/cifar.html).

@ -56,7 +56,7 @@ To train a CNN model on the Fashion-MNIST dataset for 100 epochs with an image s
model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training)
# Train the model
model.train(data='fashion-mnist', epochs=100, imgsz=28)
results = model.train(data='fashion-mnist', epochs=100, imgsz=28)
```
=== "CLI"

@ -42,7 +42,7 @@ To train a deep learning model on the ImageNet dataset for 100 epochs with an im
model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training)
# Train the model
model.train(data='imagenet', epochs=100, imgsz=224)
results = model.train(data='imagenet', epochs=100, imgsz=224)
```
=== "CLI"
@ -64,16 +64,20 @@ The example showcases the variety and complexity of the images in the ImageNet d
If you use the ImageNet dataset in your research or development work, please cite the following paper:
```bibtex
@article{ILSVRC15,
author = {Olga Russakovsky and Jia Deng and Hao Su and Jonathan Krause and Sanjeev Satheesh and Sean Ma and Zhiheng Huang and Andrej Karpathy and Aditya Khosla and Michael Bernstein and Alexander C. Berg and Li Fei-Fei},
title={ImageNet Large Scale Visual Recognition Challenge},
year={2015},
journal={International Journal of Computer Vision (IJCV)},
volume={115},
number={3},
pages={211-252}
}
```
!!! note ""
=== "BibTeX"
```bibtex
@article{ILSVRC15,
author = {Olga Russakovsky and Jia Deng and Hao Su and Jonathan Krause and Sanjeev Satheesh and Sean Ma and Zhiheng Huang and Andrej Karpathy and Aditya Khosla and Michael Bernstein and Alexander C. Berg and Li Fei-Fei},
title={ImageNet Large Scale Visual Recognition Challenge},
year={2015},
journal={International Journal of Computer Vision (IJCV)},
volume={115},
number={3},
pages={211-252}
}
```
We would like to acknowledge the ImageNet team, led by Olga Russakovsky, Jia Deng, and Li Fei-Fei, for creating and maintaining the ImageNet dataset as a valuable resource for the machine learning and computer vision research community. For more information about the ImageNet dataset and its creators, visit the [ImageNet website](https://www.image-net.org/).

@ -38,7 +38,7 @@ To test a deep learning model on the ImageNet10 dataset with an image size of 22
model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training)
# Train the model
model.train(data='imagenet10', epochs=5, imgsz=224)
results = model.train(data='imagenet10', epochs=5, imgsz=224)
```
=== "CLI"
@ -59,16 +59,20 @@ The example showcases the variety and complexity of the images in the ImageNet10
If you use the ImageNet10 dataset in your research or development work, please cite the original ImageNet paper:
```bibtex
@article{ILSVRC15,
author = {Olga Russakovsky and Jia Deng and Hao Su and Jonathan Krause and Sanjeev Satheesh and Sean Ma and Zhiheng Huang and Andrej Karpathy and Aditya Khosla and Michael Bernstein and Alexander C. Berg and Li Fei-Fei},
title={ImageNet Large Scale Visual Recognition Challenge},
year={2015},
journal={International Journal of Computer Vision (IJCV)},
volume={115},
number={3},
pages={211-252}
}
```
!!! note ""
=== "BibTeX"
```bibtex
@article{ILSVRC15,
author = {Olga Russakovsky and Jia Deng and Hao Su and Jonathan Krause and Sanjeev Satheesh and Sean Ma and Zhiheng Huang and Andrej Karpathy and Aditya Khosla and Michael Bernstein and Alexander C. Berg and Li Fei-Fei},
title={ImageNet Large Scale Visual Recognition Challenge},
year={2015},
journal={International Journal of Computer Vision (IJCV)},
volume={115},
number={3},
pages={211-252}
}
```
We would like to acknowledge the ImageNet team, led by Olga Russakovsky, Jia Deng, and Li Fei-Fei, for creating and maintaining the ImageNet dataset. The ImageNet10 dataset, while a compact subset, is a valuable resource for quick testing and debugging in the machine learning and computer vision research community. For more information about the ImageNet dataset and its creators, visit the [ImageNet website](https://www.image-net.org/).

@ -40,7 +40,7 @@ To train a model on the ImageNette dataset for 100 epochs with a standard image
model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training)
# Train the model
model.train(data='imagenette', epochs=100, imgsz=224)
results = model.train(data='imagenette', epochs=100, imgsz=224)
```
=== "CLI"
@ -75,7 +75,7 @@ To use these datasets, simply replace 'imagenette' with 'imagenette160' or 'imag
model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training)
# Train the model with ImageNette160
model.train(data='imagenette160', epochs=100, imgsz=160)
results = model.train(data='imagenette160', epochs=100, imgsz=160)
```
=== "CLI"
@ -96,7 +96,7 @@ To use these datasets, simply replace 'imagenette' with 'imagenette160' or 'imag
model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training)
# Train the model with ImageNette320
model.train(data='imagenette320', epochs=100, imgsz=320)
results = model.train(data='imagenette320', epochs=100, imgsz=320)
```
=== "CLI"

@ -37,7 +37,7 @@ To train a CNN model on the ImageWoof dataset for 100 epochs with an image size
model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training)
# Train the model
model.train(data='imagewoof', epochs=100, imgsz=224)
results = model.train(data='imagewoof', epochs=100, imgsz=224)
```
=== "CLI"
@ -79,6 +79,6 @@ The example showcases the subtle differences and similarities among the differen
## Citations and Acknowledgments
If you use the ImageWoof dataset in your research or development work, please make sure to acknowledge the creators of the dataset by linking to the [official dataset repository](https://github.com/fastai/imagenette). As of my knowledge cutoff in September 2021, there is no official publication specifically about ImageWoof for citation.
If you use the ImageWoof dataset in your research or development work, please make sure to acknowledge the creators of the dataset by linking to the [official dataset repository](https://github.com/fastai/imagenette).
We would like to acknowledge the FastAI team for creating and maintaining the ImageWoof dataset as a valuable resource for the machine learning and computer vision research community. For more information about the ImageWoof dataset, visit the [ImageWoof dataset repository](https://github.com/fastai/imagenette).

@ -91,7 +91,7 @@ In this example, the `train` directory contains subdirectories for each class in
model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training)
# Train the model
model.train(data='path/to/dataset', epochs=100, imgsz=640)
results = model.train(data='path/to/dataset', epochs=100, imgsz=640)
```
=== "CLI"

@ -45,7 +45,7 @@ To train a CNN model on the MNIST dataset for 100 epochs with an image size of 3
model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training)
# Train the model
model.train(data='mnist', epochs=100, imgsz=32)
results = model.train(data='mnist', epochs=100, imgsz=32)
```
=== "CLI"
@ -69,14 +69,18 @@ If you use the MNIST dataset in your
research or development work, please cite the following paper:
```bibtex
@article{lecun2010mnist,
title={MNIST handwritten digit database},
author={LeCun, Yann and Cortes, Corinna and Burges, CJ},
journal={ATT Labs [Online]. Available: http://yann.lecun.com/exdb/mnist},
volume={2},
year={2010}
}
```
!!! note ""
=== "BibTeX"
```bibtex
@article{lecun2010mnist,
title={MNIST handwritten digit database},
author={LeCun, Yann and Cortes, Corinna and Burges, CJ},
journal={ATT Labs [Online]. Available: http://yann.lecun.com/exdb/mnist},
volume={2},
year={2010}
}
```
We would like to acknowledge Yann LeCun, Corinna Cortes, and Christopher J.C. Burges for creating and maintaining the MNIST dataset as a valuable resource for the machine learning and computer vision research community. For more information about the MNIST dataset and its creators, visit the [MNIST dataset website](http://yann.lecun.com/exdb/mnist/).

@ -12,7 +12,6 @@ The [Argoverse](https://www.argoverse.org/) dataset is a collection of data desi
The Argoverse dataset *.zip file required for training was removed from Amazon S3 after the shutdown of Argo AI by Ford, but we have made it available for manual download on [Google Drive](https://drive.google.com/file/d/1st9qW3BeIwQsnR0t8mRpvbsSWIo16ACi/view?usp=drive_link).
## Key Features
- Argoverse contains over 290K labeled 3D object tracks and 5 million object instances across 1,263 distinct scenes.
@ -57,7 +56,7 @@ To train a YOLOv8n model on the Argoverse dataset for 100 epochs with an image s
model = YOLO('yolov8n.pt') # load a pretrained model (recommended for training)
# Train the model
model.train(data='Argoverse.yaml', epochs=100, imgsz=640)
results = model.train(data='Argoverse.yaml', epochs=100, imgsz=640)
```
=== "CLI"
@ -81,14 +80,18 @@ The example showcases the variety and complexity of the data in the Argoverse da
If you use the Argoverse dataset in your research or development work, please cite the following paper:
```bibtex
@inproceedings{chang2019argoverse,
title={Argoverse: 3D Tracking and Forecasting with Rich Maps},
author={Chang, Ming-Fang and Lambert, John and Sangkloy, Patsorn and Singh, Jagjeet and Bak, Slawomir and Hartnett, Andrew and Wang, Dequan and Carr, Peter and Lucey, Simon and Ramanan, Deva and others},
booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
pages={8748--8757},
year={2019}
}
```
!!! note ""
=== "BibTeX"
```bibtex
@inproceedings{chang2019argoverse,
title={Argoverse: 3D Tracking and Forecasting with Rich Maps},
author={Chang, Ming-Fang and Lambert, John and Sangkloy, Patsorn and Singh, Jagjeet and Bak, Slawomir and Hartnett, Andrew and Wang, Dequan and Carr, Peter and Lucey, Simon and Ramanan, Deva and others},
booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
pages={8748--8757},
year={2019}
}
```
We would like to acknowledge Argo AI for creating and maintaining the Argoverse dataset as a valuable resource for the autonomous driving research community. For more information about the Argoverse dataset and its creators, visit the [Argoverse dataset website](https://www.argoverse.org/).

@ -52,7 +52,7 @@ To train a YOLOv8n model on the COCO dataset for 100 epochs with an image size o
model = YOLO('yolov8n.pt') # load a pretrained model (recommended for training)
# Train the model
model.train(data='coco.yaml', epochs=100, imgsz=640)
results = model.train(data='coco.yaml', epochs=100, imgsz=640)
```
=== "CLI"
@ -76,15 +76,19 @@ The example showcases the variety and complexity of the images in the COCO datas
If you use the COCO dataset in your research or development work, please cite the following paper:
```bibtex
@misc{lin2015microsoft,
title={Microsoft COCO: Common Objects in Context},
author={Tsung-Yi Lin and Michael Maire and Serge Belongie and Lubomir Bourdev and Ross Girshick and James Hays and Pietro Perona and Deva Ramanan and C. Lawrence Zitnick and Piotr Dollár},
year={2015},
eprint={1405.0312},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
```
!!! note ""
=== "BibTeX"
```bibtex
@misc{lin2015microsoft,
title={Microsoft COCO: Common Objects in Context},
author={Tsung-Yi Lin and Michael Maire and Serge Belongie and Lubomir Bourdev and Ross Girshick and James Hays and Pietro Perona and Deva Ramanan and C. Lawrence Zitnick and Piotr Dollár},
year={2015},
eprint={1405.0312},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
```
We would like to acknowledge the COCO Consortium for creating and maintaining this valuable resource for the computer vision community. For more information about the COCO dataset and its creators, visit the [COCO dataset website](https://cocodataset.org/#home).

@ -42,7 +42,7 @@ To train a YOLOv8n model on the COCO8 dataset for 100 epochs with an image size
model = YOLO('yolov8n.pt') # load a pretrained model (recommended for training)
# Train the model
model.train(data='coco8.yaml', epochs=100, imgsz=640)
results = model.train(data='coco8.yaml', epochs=100, imgsz=640)
```
=== "CLI"
@ -66,15 +66,19 @@ The example showcases the variety and complexity of the images in the COCO8 data
If you use the COCO dataset in your research or development work, please cite the following paper:
```bibtex
@misc{lin2015microsoft,
title={Microsoft COCO: Common Objects in Context},
author={Tsung-Yi Lin and Michael Maire and Serge Belongie and Lubomir Bourdev and Ross Girshick and James Hays and Pietro Perona and Deva Ramanan and C. Lawrence Zitnick and Piotr Dollár},
year={2015},
eprint={1405.0312},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
```
!!! note ""
=== "BibTeX"
```bibtex
@misc{lin2015microsoft,
title={Microsoft COCO: Common Objects in Context},
author={Tsung-Yi Lin and Michael Maire and Serge Belongie and Lubomir Bourdev and Ross Girshick and James Hays and Pietro Perona and Deva Ramanan and C. Lawrence Zitnick and Piotr Dollár},
year={2015},
eprint={1405.0312},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
```
We would like to acknowledge the COCO Consortium for creating and maintaining this valuable resource for the computer vision community. For more information about the COCO dataset and its creators, visit the [COCO dataset website](https://cocodataset.org/#home).

@ -51,7 +51,7 @@ To train a YOLOv8n model on the Global Wheat Head Dataset for 100 epochs with an
model = YOLO('yolov8n.pt') # load a pretrained model (recommended for training)
# Train the model
model.train(data='GlobalWheat2020.yaml', epochs=100, imgsz=640)
results = model.train(data='GlobalWheat2020.yaml', epochs=100, imgsz=640)
```
=== "CLI"
@ -75,13 +75,17 @@ The example showcases the variety and complexity of the data in the Global Wheat
If you use the Global Wheat Head Dataset in your research or development work, please cite the following paper:
```bibtex
@article{david2020global,
title={Global Wheat Head Detection (GWHD) Dataset: A Large and Diverse Dataset of High-Resolution RGB-Labelled Images to Develop and Benchmark Wheat Head Detection Methods},
author={David, Etienne and Madec, Simon and Sadeghi-Tehran, Pouria and Aasen, Helge and Zheng, Bangyou and Liu, Shouyang and Kirchgessner, Norbert and Ishikawa, Goro and Nagasawa, Koichi and Badhon, Minhajul and others},
journal={arXiv preprint arXiv:2005.02162},
year={2020}
}
```
!!! note ""
=== "BibTeX"
```bibtex
@article{david2020global,
title={Global Wheat Head Detection (GWHD) Dataset: A Large and Diverse Dataset of High-Resolution RGB-Labelled Images to Develop and Benchmark Wheat Head Detection Methods},
author={David, Etienne and Madec, Simon and Sadeghi-Tehran, Pouria and Aasen, Helge and Zheng, Bangyou and Liu, Shouyang and Kirchgessner, Norbert and Ishikawa, Goro and Nagasawa, Koichi and Badhon, Minhajul and others},
journal={arXiv preprint arXiv:2005.02162},
year={2020}
}
```
We would like to acknowledge the researchers and institutions that contributed to the creation and maintenance of the Global Wheat Head Dataset as a valuable resource for the plant phenotyping and crop management research community. For more information about the dataset and its creators, visit the [Global Wheat Head Dataset website](http://www.global-wheat.com/).

@ -59,7 +59,7 @@ Here's how you can use these formats to train your model:
model = YOLO('yolov8n.pt') # load a pretrained model (recommended for training)
# Train the model
model.train(data='coco8.yaml', epochs=100, imgsz=640)
results = model.train(data='coco8.yaml', epochs=100, imgsz=640)
```
=== "CLI"

@ -51,7 +51,7 @@ To train a YOLOv8n model on the Objects365 dataset for 100 epochs with an image
model = YOLO('yolov8n.pt') # load a pretrained model (recommended for training)
# Train the model
model.train(data='Objects365.yaml', epochs=100, imgsz=640)
results = model.train(data='Objects365.yaml', epochs=100, imgsz=640)
```
=== "CLI"
@ -75,14 +75,18 @@ The example showcases the variety and complexity of the data in the Objects365 d
If you use the Objects365 dataset in your research or development work, please cite the following paper:
```bibtex
@inproceedings{shao2019objects365,
title={Objects365: A Large-scale, High-quality Dataset for Object Detection},
author={Shao, Shuai and Li, Zeming and Zhang, Tianyuan and Peng, Chao and Yu, Gang and Li, Jing and Zhang, Xiangyu and Sun, Jian},
booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision},
pages={8425--8434},
year={2019}
}
```
!!! note ""
=== "BibTeX"
```bibtex
@inproceedings{shao2019objects365,
title={Objects365: A Large-scale, High-quality Dataset for Object Detection},
author={Shao, Shuai and Li, Zeming and Zhang, Tianyuan and Peng, Chao and Yu, Gang and Li, Jing and Zhang, Xiangyu and Sun, Jian},
booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision},
pages={8425--8434},
year={2019}
}
```
We would like to acknowledge the team of researchers who created and maintain the Objects365 dataset as a valuable resource for the computer vision research community. For more information about the Objects365 dataset and its creators, visit the [Objects365 dataset website](https://www.objects365.org/).

@ -70,7 +70,7 @@ To train a YOLOv8n model on the Open Images V7 dataset for 100 epochs with an im
model = YOLO('yolov8n.pt')
# Train the model on the Open Images V7 dataset
model.train(data='open-images-v7.yaml', epochs=100, imgsz=640)
results = model.train(data='open-images-v7.yaml', epochs=100, imgsz=640)
```
=== "CLI"
@ -94,13 +94,17 @@ Researchers can gain invaluable insights into the array of computer vision chall
For those employing Open Images V7 in their work, it's prudent to cite the relevant papers and acknowledge the creators:
```bibtex
@article{OpenImages,
author = {Alina Kuznetsova and Hassan Rom and Neil Alldrin and Jasper Uijlings and Ivan Krasin and Jordi Pont-Tuset and Shahab Kamali and Stefan Popov and Matteo Malloci and Alexander Kolesnikov and Tom Duerig and Vittorio Ferrari},
title = {The Open Images Dataset V4: Unified image classification, object detection, and visual relationship detection at scale},
year = {2020},
journal = {IJCV}
}
```
!!! note ""
=== "BibTeX"
```bibtex
@article{OpenImages,
author = {Alina Kuznetsova and Hassan Rom and Neil Alldrin and Jasper Uijlings and Ivan Krasin and Jordi Pont-Tuset and Shahab Kamali and Stefan Popov and Matteo Malloci and Alexander Kolesnikov and Tom Duerig and Vittorio Ferrari},
title = {The Open Images Dataset V4: Unified image classification, object detection, and visual relationship detection at scale},
year = {2020},
journal = {IJCV}
}
```
A heartfelt acknowledgment goes out to the Google AI team for creating and maintaining the Open Images V7 dataset. For a deep dive into the dataset and its offerings, navigate to the [official Open Images V7 website](https://storage.googleapis.com/openimages/web/index.html).

@ -53,7 +53,7 @@ To train a YOLOv8n model on the SKU-110K dataset for 100 epochs with an image si
model = YOLO('yolov8n.pt') # load a pretrained model (recommended for training)
# Train the model
model.train(data='SKU-110K.yaml', epochs=100, imgsz=640)
results = model.train(data='SKU-110K.yaml', epochs=100, imgsz=640)
```
=== "CLI"
@ -77,13 +77,17 @@ The example showcases the variety and complexity of the data in the SKU-110k dat
If you use the SKU-110k dataset in your research or development work, please cite the following paper:
```bibtex
@inproceedings{goldman2019dense,
author = {Eran Goldman and Roei Herzig and Aviv Eisenschtat and Jacob Goldberger and Tal Hassner},
title = {Precise Detection in Densely Packed Scenes},
booktitle = {Proc. Conf. Comput. Vision Pattern Recognition (CVPR)},
year = {2019}
}
```
!!! note ""
=== "BibTeX"
```bibtex
@inproceedings{goldman2019dense,
author = {Eran Goldman and Roei Herzig and Aviv Eisenschtat and Jacob Goldberger and Tal Hassner},
title = {Precise Detection in Densely Packed Scenes},
booktitle = {Proc. Conf. Comput. Vision Pattern Recognition (CVPR)},
year = {2019}
}
```
We would like to acknowledge Eran Goldman et al. for creating and maintaining the SKU-110k dataset as a valuable resource for the computer vision research community. For more information about the SKU-110k dataset and its creators, visit the [SKU-110k dataset GitHub repository](https://github.com/eg4000/SKU110K_CVPR19).

@ -49,7 +49,7 @@ To train a YOLOv8n model on the VisDrone dataset for 100 epochs with an image si
model = YOLO('yolov8n.pt') # load a pretrained model (recommended for training)
# Train the model
model.train(data='VisDrone.yaml', epochs=100, imgsz=640)
results = model.train(data='VisDrone.yaml', epochs=100, imgsz=640)
```
=== "CLI"
@ -73,16 +73,20 @@ The example showcases the variety and complexity of the data in the VisDrone dat
If you use the VisDrone dataset in your research or development work, please cite the following paper:
```bibtex
@ARTICLE{9573394,
author={Zhu, Pengfei and Wen, Longyin and Du, Dawei and Bian, Xiao and Fan, Heng and Hu, Qinghua and Ling, Haibin},
journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
title={Detection and Tracking Meet Drones Challenge},
year={2021},
volume={},
number={},
pages={1-1},
doi={10.1109/TPAMI.2021.3119563}}
```
!!! note ""
=== "BibTeX"
```bibtex
@ARTICLE{9573394,
author={Zhu, Pengfei and Wen, Longyin and Du, Dawei and Bian, Xiao and Fan, Heng and Hu, Qinghua and Ling, Haibin},
journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
title={Detection and Tracking Meet Drones Challenge},
year={2021},
volume={},
number={},
pages={1-1},
doi={10.1109/TPAMI.2021.3119563}}
```
We would like to acknowledge the AISKYEYE team at the Lab of Machine Learning and Data Mining, Tianjin University, China, for creating and maintaining the VisDrone dataset as a valuable resource for the drone-based computer vision research community. For more information about the VisDrone dataset and its creators, visit the [VisDrone Dataset GitHub repository](https://github.com/VisDrone/VisDrone-Dataset).

@ -52,7 +52,7 @@ To train a YOLOv8n model on the VOC dataset for 100 epochs with an image size of
model = YOLO('yolov8n.pt') # load a pretrained model (recommended for training)
# Train the model
model.train(data='VOC.yaml', epochs=100, imgsz=640)
results = model.train(data='VOC.yaml', epochs=100, imgsz=640)
```
=== "CLI"
@ -77,15 +77,19 @@ The example showcases the variety and complexity of the images in the VOC datase
If you use the VOC dataset in your research or development work, please cite the following paper:
```bibtex
@misc{everingham2010pascal,
title={The PASCAL Visual Object Classes (VOC) Challenge},
author={Mark Everingham and Luc Van Gool and Christopher K. I. Williams and John Winn and Andrew Zisserman},
year={2010},
eprint={0909.5206},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
```
!!! note ""
=== "BibTeX"
```bibtex
@misc{everingham2010pascal,
title={The PASCAL Visual Object Classes (VOC) Challenge},
author={Mark Everingham and Luc Van Gool and Christopher K. I. Williams and John Winn and Andrew Zisserman},
year={2010},
eprint={0909.5206},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
```
We would like to acknowledge the PASCAL VOC Consortium for creating and maintaining this valuable resource for the computer vision community. For more information about the VOC dataset and its creators, visit the [PASCAL VOC dataset website](http://host.robots.ox.ac.uk/pascal/VOC/).

@ -55,7 +55,7 @@ To train a model on the xView dataset for 100 epochs with an image size of 640,
model = YOLO('yolov8n.pt') # load a pretrained model (recommended for training)
# Train the model
model.train(data='xView.yaml', epochs=100, imgsz=640)
results = model.train(data='xView.yaml', epochs=100, imgsz=640)
```
=== "CLI"
@ -79,15 +79,19 @@ The example showcases the variety and complexity of the data in the xView datase
If you use the xView dataset in your research or development work, please cite the following paper:
```bibtex
@misc{lam2018xview,
title={xView: Objects in Context in Overhead Imagery},
author={Darius Lam and Richard Kuzma and Kevin McGee and Samuel Dooley and Michael Laielli and Matthew Klaric and Yaroslav Bulatov and Brendan McCord},
year={2018},
eprint={1802.07856},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
```
!!! note ""
=== "BibTeX"
```bibtex
@misc{lam2018xview,
title={xView: Objects in Context in Overhead Imagery},
author={Darius Lam and Richard Kuzma and Kevin McGee and Samuel Dooley and Michael Laielli and Matthew Klaric and Yaroslav Bulatov and Brendan McCord},
year={2018},
eprint={1802.07856},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
```
We would like to acknowledge the [Defense Innovation Unit](https://www.diu.mil/) (DIU) and the creators of the xView dataset for their valuable contribution to the computer vision research community. For more information about the xView dataset and its creators, visit the [xView dataset website](http://xviewdataset.org/).

@ -0,0 +1,129 @@
---
comments: true
description: Delve into DOTA v2, an Oriented Bounding Box (OBB) aerial imagery dataset with 1.7 million instances and 11,268 images.
keywords: DOTA v2, object detection, aerial images, computer vision, deep learning, annotations, oriented bounding boxes, OBB
---
# DOTA v2 Dataset with OBB
[DOTA v2](https://captain-whu.github.io/DOTA/index.html) stands as a specialized dataset, emphasizing object detection in aerial images. Originating from the DOTA series of datasets, it offers annotated images capturing a diverse array of aerial scenes with Oriented Bounding Boxes (OBB).
![DOTA v2 classes visual](https://user-images.githubusercontent.com/26833433/259461765-72fdd0d8-266b-44a9-8199-199329bf5ca9.jpg)
## Key Features
- Collection from various sensors and platforms, with image sizes ranging from 800 × 800 to 20,000 × 20,000 pixels.
- Features more than 1.7M Oriented Bounding Boxes across 18 categories.
- Encompasses multiscale object detection.
- Instances are annotated by experts using arbitrary (8 d.o.f.) quadrilateral, capturing objects of different scales, orientations, and shapes.
## Dataset Versions
### DOTA-v1.0
- Contains 15 common categories.
- Comprises 2,806 images with 188,282 instances.
- Split ratios: 1/2 for training, 1/6 for validation, and 1/3 for testing.
### DOTA-v1.5
- Incorporates the same images as DOTA-v1.0.
- Very small instances (less than 10 pixels) are also annotated.
- Addition of a new category: "container crane".
- A total of 403,318 instances.
- Released for the DOAI Challenge 2019 on Object Detection in Aerial Images.
### DOTA-v2.0
- Collections from Google Earth, GF-2 Satellite, and other aerial images.
- Contains 18 common categories.
- Comprises 11,268 images with a whopping 1,793,658 instances.
- New categories introduced: "airport" and "helipad".
- Image splits:
- Training: 1,830 images with 268,627 instances.
- Validation: 593 images with 81,048 instances.
- Test-dev: 2,792 images with 353,346 instances.
- Test-challenge: 6,053 images with 1,090,637 instances.
## Dataset Structure
DOTA v2 exhibits a structured layout tailored for OBB object detection challenges:
- **Images**: A vast collection of high-resolution aerial images capturing diverse terrains and structures.
- **Oriented Bounding Boxes**: Annotations in the form of rotated rectangles encapsulating objects irrespective of their orientation, ideal for capturing objects like airplanes, ships, and buildings.
## Applications
DOTA v2 serves as a benchmark for training and evaluating models specifically tailored for aerial image analysis. With the inclusion of OBB annotations, it provides a unique challenge, enabling the development of specialized object detection models that cater to aerial imagery's nuances.
## Dataset YAML
Typically, datasets incorporate a YAML (Yet Another Markup Language) file detailing the dataset's configuration. For DOTA v2, a hypothetical `DOTAv2.yaml` could be used. For accurate paths and configurations, it's vital to consult the dataset's official repository or documentation.
!!! example "DOTAv2.yaml"
```yaml
--8<-- "ultralytics/cfg/datasets/DOTAv2.yaml"
```
## Usage
To train a model on the DOTA v2 dataset, you can utilize the following code snippets. Always refer to your model's documentation for a thorough list of available arguments.
!!! warning
Please note that all images and associated annotations in the DOTAv2 dataset can be used for academic purposes, but commercial use is prohibited. Your understanding and respect for the dataset creators' wishes are greatly appreciated!
!!! example "Train Example"
=== "Python"
```python
from ultralytics import YOLO
# Create a new YOLOv8n-OBB model from scratch
model = YOLO('yolov8n-obb.yaml')
# Train the model on the DOTAv2 dataset
results = model.train(data='DOTAv2.yaml', epochs=100, imgsz=640)
```
=== "CLI"
```bash
# Train a new YOLOv8n-OBB model on the DOTAv2 dataset
yolo detect train data=DOTAv2.yaml model=yolov8n.pt epochs=100 imgsz=640
```
## Sample Data and Annotations
Having a glance at the dataset illustrates its depth:
![Dataset sample image](https://captain-whu.github.io/DOTA/images/instances-DOTA.jpg)
- **DOTA v2**: This snapshot underlines the complexity of aerial scenes and the significance of Oriented Bounding Box annotations, capturing objects in their natural orientation.
The dataset's richness offers invaluable insights into object detection challenges exclusive to aerial imagery.
## Citations and Acknowledgments
For those leveraging DOTA v2 in their endeavors, it's pertinent to cite the relevant research papers:
!!! note ""
=== "BibTeX"
```bibtex
@article{9560031,
author={Ding, Jian and Xue, Nan and Xia, Gui-Song and Bai, Xiang and Yang, Wen and Yang, Michael and Belongie, Serge and Luo, Jiebo and Datcu, Mihai and Pelillo, Marcello and Zhang, Liangpei},
journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
title={Object Detection in Aerial Images: A Large-Scale Benchmark and Challenges},
year={2021},
volume={},
number={},
pages={1-1},
doi={10.1109/TPAMI.2021.3117983}
}
```
A special note of gratitude to the team behind DOTA v2 for their commendable effort in curating this dataset. For an exhaustive understanding of the dataset and its nuances, please visit the [official DOTA v2 website](https://captain-whu.github.io/DOTA/index.html).

@ -0,0 +1,80 @@
---
comments: true
description: Dive deep into various oriented bounding box (OBB) dataset formats compatible with the Ultralytics YOLO model. Grasp the nuances of using and converting datasets to this format.
keywords: Ultralytics, YOLO, oriented bounding boxes, OBB, dataset formats, label formats, DOTA v2, data conversion
---
# Oriented Bounding Box Datasets Overview
Training a precise object detection model with oriented bounding boxes (OBB) requires a thorough dataset. This guide elucidates the various OBB dataset formats compatible with the Ultralytics YOLO model, offering insights into their structure, application, and methods for format conversions.
## Supported OBB Dataset Formats
### YOLO OBB Format
The YOLO OBB format designates bounding boxes by their four corner points with coordinates normalized between 0 and 1. It follows this format:
```bash
class_index, x1, y1, x2, y2, x3, y3, x4, y4
```
Internally, YOLO processes losses and outputs in the `xywhr` format, which represents the bounding box's center point (xy), width, height, and rotation.
<p align="center"><img width="800" src="https://user-images.githubusercontent.com/26833433/259471881-59020fe2-09a4-4dcc-acce-9b0f7cfa40ee.png"></p>
An example of a `*.txt` label file for the above image, which contains an object of class `0` in OBB format, could look like:
```bash
0 0.780811 0.743961 0.782371 0.74686 0.777691 0.752174 0.776131 0.749758
```
## Usage
To train a model using these OBB formats:
!!! example ""
=== "Python"
```python
from ultralytics import YOLO
# Create a new YOLOv8n-OBB model from scratch
model = YOLO('yolov8n-obb.yaml')
# Train the model on the DOTAv2 dataset
results = model.train(data='DOTAv2.yaml', epochs=100, imgsz=640)
```
=== "CLI"
```bash
# Train a new YOLOv8n-OBB model on the DOTAv2 dataset
yolo detect train data=DOTAv2.yaml model=yolov8n.pt epochs=100 imgsz=640
```
## Supported Datasets
Currently, the following datasets with Oriented Bounding Boxes are supported:
- [**DOTA v2**](./dota-v2.md): DOTA (A Large-scale Dataset for Object Detection in Aerial Images) version 2, emphasizes detection from aerial perspectives and contains oriented bounding boxes with 1.7 million instances and 11,268 images.
### Incorporating your own OBB dataset
For those looking to introduce their own datasets with oriented bounding boxes, ensure compatibility with the "YOLO OBB format" mentioned above. Convert your annotations to this required format and detail the paths, classes, and class names in a corresponding YAML configuration file.
## Convert Label Formats
### DOTA Dataset Format to YOLO OBB Format
Transitioning labels from the DOTA dataset format to the YOLO OBB format can be achieved with this script:
```python
from ultralytics.data.converter import convert_dota_to_yolo_obb
convert_dota_to_yolo_obb('path/to/DOTA')
```
This conversion mechanism is instrumental for datasets in the DOTA format, ensuring alignment with the Ultralytics YOLO OBB format.
It's imperative to validate the compatibility of the dataset with your model and adhere to the necessary format conventions. Properly structured datasets are pivotal for training efficient object detection models with oriented bounding boxes.

@ -53,7 +53,7 @@ To train a YOLOv8n-pose model on the COCO-Pose dataset for 100 epochs with an im
model = YOLO('yolov8n-pose.pt') # load a pretrained model (recommended for training)
# Train the model
model.train(data='coco-pose.yaml', epochs=100, imgsz=640)
results = model.train(data='coco-pose.yaml', epochs=100, imgsz=640)
```
=== "CLI"
@ -77,15 +77,19 @@ The example showcases the variety and complexity of the images in the COCO-Pose
If you use the COCO-Pose dataset in your research or development work, please cite the following paper:
```bibtex
@misc{lin2015microsoft,
title={Microsoft COCO: Common Objects in Context},
author={Tsung-Yi Lin and Michael Maire and Serge Belongie and Lubomir Bourdev and Ross Girshick and James Hays and Pietro Perona and Deva Ramanan and C. Lawrence Zitnick and Piotr Dollár},
year={2015},
eprint={1405.0312},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
```
!!! note ""
=== "BibTeX"
```bibtex
@misc{lin2015microsoft,
title={Microsoft COCO: Common Objects in Context},
author={Tsung-Yi Lin and Michael Maire and Serge Belongie and Lubomir Bourdev and Ross Girshick and James Hays and Pietro Perona and Deva Ramanan and C. Lawrence Zitnick and Piotr Dollár},
year={2015},
eprint={1405.0312},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
```
We would like to acknowledge the COCO Consortium for creating and maintaining this valuable resource for the computer vision community. For more information about the COCO-Pose dataset and its creators, visit the [COCO dataset website](https://cocodataset.org/#home).

@ -42,7 +42,7 @@ To train a YOLOv8n-pose model on the COCO8-Pose dataset for 100 epochs with an i
model = YOLO('yolov8n-pose.pt') # load a pretrained model (recommended for training)
# Train the model
model.train(data='coco8-pose.yaml', epochs=100, imgsz=640)
results = model.train(data='coco8-pose.yaml', epochs=100, imgsz=640)
```
=== "CLI"
@ -66,15 +66,19 @@ The example showcases the variety and complexity of the images in the COCO8-Pose
If you use the COCO dataset in your research or development work, please cite the following paper:
```bibtex
@misc{lin2015microsoft,
title={Microsoft COCO: Common Objects in Context},
author={Tsung-Yi Lin and Michael Maire and Serge Belongie and Lubomir Bourdev and Ross Girshick and James Hays and Pietro Perona and Deva Ramanan and C. Lawrence Zitnick and Piotr Dollár},
year={2015},
eprint={1405.0312},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
```
!!! note ""
=== "BibTeX"
```bibtex
@misc{lin2015microsoft,
title={Microsoft COCO: Common Objects in Context},
author={Tsung-Yi Lin and Michael Maire and Serge Belongie and Lubomir Bourdev and Ross Girshick and James Hays and Pietro Perona and Deva Ramanan and C. Lawrence Zitnick and Piotr Dollár},
year={2015},
eprint={1405.0312},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
```
We would like to acknowledge the COCO Consortium for creating and maintaining this valuable resource for the computer vision community. For more information about the COCO dataset and its creators, visit the [COCO dataset website](https://cocodataset.org/#home).

@ -78,7 +78,7 @@ For example if we assume five keypoints of facial landmark: [left eye, right eye
model = YOLO('yolov8n-pose.pt') # load a pretrained model (recommended for training)
# Train the model
model.train(data='coco128-pose.yaml', epochs=100, imgsz=640)
results = model.train(data='coco128-pose.yaml', epochs=100, imgsz=640)
```
=== "CLI"

@ -52,7 +52,7 @@ To train a YOLOv8n-seg model on the COCO-Seg dataset for 100 epochs with an imag
model = YOLO('yolov8n-seg.pt') # load a pretrained model (recommended for training)
# Train the model
model.train(data='coco-seg.yaml', epochs=100, imgsz=640)
results = model.train(data='coco-seg.yaml', epochs=100, imgsz=640)
```
=== "CLI"
@ -76,15 +76,19 @@ The example showcases the variety and complexity of the images in the COCO-Seg d
If you use the COCO-Seg dataset in your research or development work, please cite the original COCO paper and acknowledge the extension to COCO-Seg:
```bibtex
@misc{lin2015microsoft,
title={Microsoft COCO: Common Objects in Context},
author={Tsung-Yi Lin and Michael Maire and Serge Belongie and Lubomir Bourdev and Ross Girshick and James Hays and Pietro Perona and Deva Ramanan and C. Lawrence Zitnick and Piotr Dollár},
year={2015},
eprint={1405.0312},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
```
!!! note ""
=== "BibTeX"
```bibtex
@misc{lin2015microsoft,
title={Microsoft COCO: Common Objects in Context},
author={Tsung-Yi Lin and Michael Maire and Serge Belongie and Lubomir Bourdev and Ross Girshick and James Hays and Pietro Perona and Deva Ramanan and C. Lawrence Zitnick and Piotr Dollár},
year={2015},
eprint={1405.0312},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
```
We extend our thanks to the COCO Consortium for creating and maintaining this invaluable resource for the computer vision community. For more information about the COCO dataset and its creators, visit the [COCO dataset website](https://cocodataset.org/#home).

@ -42,7 +42,7 @@ To train a YOLOv8n-seg model on the COCO8-Seg dataset for 100 epochs with an ima
model = YOLO('yolov8n-seg.pt') # load a pretrained model (recommended for training)
# Train the model
model.train(data='coco8-seg.yaml', epochs=100, imgsz=640)
results = model.train(data='coco8-seg.yaml', epochs=100, imgsz=640)
```
=== "CLI"
@ -66,15 +66,19 @@ The example showcases the variety and complexity of the images in the COCO8-Seg
If you use the COCO dataset in your research or development work, please cite the following paper:
```bibtex
@misc{lin2015microsoft,
title={Microsoft COCO: Common Objects in Context},
author={Tsung-Yi Lin and Michael Maire and Serge Belongie and Lubomir Bourdev and Ross Girshick and James Hays and Pietro Perona and Deva Ramanan and C. Lawrence Zitnick and Piotr Dollár},
year={2015},
eprint={1405.0312},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
```
!!! note ""
=== "BibTeX"
```bibtex
@misc{lin2015microsoft,
title={Microsoft COCO: Common Objects in Context},
author={Tsung-Yi Lin and Michael Maire and Serge Belongie and Lubomir Bourdev and Ross Girshick and James Hays and Pietro Perona and Deva Ramanan and C. Lawrence Zitnick and Piotr Dollár},
year={2015},
eprint={1405.0312},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
```
We would like to acknowledge the COCO Consortium for creating and maintaining this valuable resource for the computer vision community. For more information about the COCO dataset and its creators, visit the [COCO dataset website](https://cocodataset.org/#home).

@ -79,7 +79,7 @@ The `train` and `val` fields specify the paths to the directories containing the
model = YOLO('yolov8n-seg.pt') # load a pretrained model (recommended for training)
# Train the model
model.train(data='coco128-seg.yaml', epochs=100, imgsz=640)
results = model.train(data='coco128-seg.yaml', epochs=100, imgsz=640)
```
=== "CLI"

@ -246,7 +246,7 @@ fold_lbl_distrb.to_csv(save_path / "kfold_label_distribution.csv")
results = {}
for k in range(ksplit):
dataset_yaml = ds_yamls[k]
model.train(data=dataset_yaml, *args, **kwargs) # Include any training arguments
results = model.train(data=dataset_yaml, *args, **kwargs) # Include any training arguments
results[k] = model.metrics # save output metrics for further analysis
```

@ -55,7 +55,7 @@ You can use YOLOv3 for object detection tasks using the Ultralytics repository.
This example provides simple inference code for YOLOv3. For more options including handling inference results see [Predict](../modes/predict.md) mode. For using YOLOv3 with additional modes see [Train](../modes/train.md), [Val](../modes/val.md) and [Export](../modes/export.md).
=== "Python"
=== "Python"
PyTorch pretrained `*.pt` models as well as configuration `*.yaml` files can be passed to the `YOLO()` class to create a model instance in python:

@ -31,7 +31,7 @@ Train YOLOv8n on the COCO128 dataset for 100 epochs at image size 640. See Argum
model = YOLO('yolov8n.yaml').load('yolov8n.pt') # build from YAML and transfer weights
# Train the model
model.train(data='coco128.yaml', epochs=100, imgsz=640)
results = model.train(data='coco128.yaml', epochs=100, imgsz=640)
```
=== "CLI"
@ -61,7 +61,7 @@ The training device can be specified using the `device` argument. If no argument
model = YOLO('yolov8n.pt') # load a pretrained model (recommended for training)
# Train the model with 2 GPUs
model.train(data='coco128.yaml', epochs=100, imgsz=640, device=[0, 1])
results = model.train(data='coco128.yaml', epochs=100, imgsz=640, device=[0, 1])
```
=== "CLI"
@ -87,7 +87,7 @@ To enable training on Apple M1 and M2 chips, you should specify 'mps' as your de
model = YOLO('yolov8n.pt') # load a pretrained model (recommended for training)
# Train the model with 2 GPUs
model.train(data='coco128.yaml', epochs=100, imgsz=640, device='mps')
results = model.train(data='coco128.yaml', epochs=100, imgsz=640, device='mps')
```
=== "CLI"
@ -119,7 +119,7 @@ Below is an example of how to resume an interrupted training using Python and vi
model = YOLO('path/to/last.pt') # load a partially trained model
# Resume training
model.train(resume=True)
results = model.train(resume=True)
```
=== "CLI"

@ -18,17 +18,17 @@ keywords: Ultralytics, Data Converter, coco91_to_coco80_class, merge_multi_segme
<br><br>
---
## ::: ultralytics.data.converter.rle2polygon
## ::: ultralytics.data.converter.convert_dota_to_yolo_obb
<br><br>
---
## ::: ultralytics.data.converter.min_index
## ::: ultralytics.data.converter.rle2polygon
<br><br>
---
## ::: ultralytics.data.converter.merge_multi_segment
## ::: ultralytics.data.converter.min_index
<br><br>
---
## ::: ultralytics.data.converter.delete_dsstore
## ::: ultralytics.data.converter.merge_multi_segment
<br><br>

@ -77,6 +77,14 @@ keywords: Ultralytics YOLO, Utility Operations, segment2box, make_divisible, cli
## ::: ultralytics.utils.ops.ltwh2xywh
<br><br>
---
## ::: ultralytics.utils.ops.xyxyxyxy2xywhr
<br><br>
---
## ::: ultralytics.utils.ops.xywhr2xyxyxyxy
<br><br>
---
## ::: ultralytics.utils.ops.ltwh2xyxy
<br><br>

@ -59,7 +59,7 @@ see the [Configuration](../usage/cfg.md) page.
model = YOLO('yolov8n-cls.yaml').load('yolov8n-cls.pt') # build from YAML and transfer weights
# Train the model
model.train(data='mnist160', epochs=100, imgsz=64)
results = model.train(data='mnist160', epochs=100, imgsz=64)
```
=== "CLI"

@ -51,7 +51,7 @@ Train YOLOv8n on the COCO128 dataset for 100 epochs at image size 640. For a ful
model = YOLO('yolov8n.yaml').load('yolov8n.pt') # build from YAML and transfer weights
# Train the model
model.train(data='coco128.yaml', epochs=100, imgsz=640)
results = model.train(data='coco128.yaml', epochs=100, imgsz=640)
```
=== "CLI"

@ -62,7 +62,7 @@ Train a YOLOv8-pose model on the COCO128-pose dataset.
model = YOLO('yolov8n-pose.yaml').load('yolov8n-pose.pt') # build from YAML and transfer weights
# Train the model
model.train(data='coco8-pose.yaml', epochs=100, imgsz=640)
results = model.train(data='coco8-pose.yaml', epochs=100, imgsz=640)
```
=== "CLI"

@ -59,7 +59,7 @@ arguments see the [Configuration](../usage/cfg.md) page.
model = YOLO('yolov8n-seg.yaml').load('yolov8n.pt') # build from YAML and transfer weights
# Train the model
model.train(data='coco128-seg.yaml', epochs=100, imgsz=640)
results = model.train(data='coco128-seg.yaml', epochs=100, imgsz=640)
```
=== "CLI"

@ -52,7 +52,7 @@ accurately predict the classes and locations of objects in an image.
from ultralytics import YOLO
model = YOLO('yolov8n.pt') # pass any model type
model.train(epochs=5)
results = model.train(epochs=5)
```
=== "From scratch"
@ -60,13 +60,13 @@ accurately predict the classes and locations of objects in an image.
from ultralytics import YOLO
model = YOLO('yolov8n.yaml')
model.train(data='coco128.yaml', epochs=5)
results = model.train(data='coco128.yaml', epochs=5)
```
=== "Resume"
```python
model = YOLO("last.pt")
model.train(resume=True)
results = model.train(resume=True)
```
[Train Examples](../modes/train.md){ .md-button .md-button--primary}

@ -207,6 +207,9 @@ nav:
- Imagenette: datasets/classify/imagenette.md
- Imagewoof: datasets/classify/imagewoof.md
- MNIST: datasets/classify/mnist.md
- Oriented Bounding Boxes (OBB):
- datasets/obb/index.md
- DOTAv2: datasets/obb/dota-v2.md
- Multi-Object Tracking:
- datasets/track/index.md
- Guides:

@ -1,6 +1,6 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
__version__ = '8.0.150'
__version__ = '8.0.151'
from ultralytics.hub import start
from ultralytics.models import RTDETR, SAM, YOLO

@ -0,0 +1,37 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
# DOTA 2.0 dataset https://captain-whu.github.io/DOTA/index.html for object detection in aerial images by Wuhan University
# Example usage: yolo train model=yolov8n-obb.pt data=DOTAv2.yaml
# parent
# ├── ultralytics
# └── datasets
# └── dota2 ← downloads here (2GB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/DOTAv2 # dataset root dir
train: images/train # train images (relative to 'path') 1411 images
val: images/val # val images (relative to 'path') 458 images
test: images/test # test images (optional) 937 images
# Classes for DOTA 2.0
names:
0: plane
1: ship
2: storage tank
3: baseball diamond
4: tennis court
5: basketball court
6: ground track field
7: harbor
8: bridge
9: large vehicle
10: small vehicle
11: helicopter
12: roundabout
13: soccer ball field
14: swimming pool
15: container crane
16: airport
17: helipad
# Download script/URL (optional)
download: https://github.com/ultralytics/yolov5/releases/download/v1.0/DOTAv2.zip

@ -117,6 +117,97 @@ def convert_coco(labels_dir='../coco/annotations/', use_segments=False, use_keyp
file.write(('%g ' * len(line)).rstrip() % line + '\n')
def convert_dota_to_yolo_obb(dota_root_path: str):
"""
Converts DOTA dataset annotations to YOLO OBB (Oriented Bounding Box) format.
The function processes images in the 'train' and 'val' folders of the DOTA dataset. For each image, it reads the
associated label from the original labels directory and writes new labels in YOLO OBB format to a new directory.
Args:
dota_root_path (str): The root directory path of the DOTA dataset.
Example:
```python
from ultralytics.data.converter import convert_dota_to_yolo_obb
convert_dota_to_yolo_obb('path/to/DOTA')
```
Notes:
The directory structure assumed for the DOTA dataset:
- DOTA
- images
- train
- val
- labels
- train_original
- val_original
After the function execution, the new labels will be saved in:
- DOTA
- labels
- train
- val
"""
dota_root_path = Path(dota_root_path)
# Class names to indices mapping
class_mapping = {
'plane': 0,
'ship': 1,
'storage-tank': 2,
'baseball-diamond': 3,
'tennis-court': 4,
'basketball-court': 5,
'ground-track-field': 6,
'harbor': 7,
'bridge': 8,
'large-vehicle': 9,
'small-vehicle': 10,
'helicopter': 11,
'roundabout': 12,
'soccer ball-field': 13,
'swimming-pool': 14,
'container-crane': 15,
'airport': 16,
'helipad': 17}
def convert_label(image_name, image_width, image_height, orig_label_dir, save_dir):
orig_label_path = orig_label_dir / f'{image_name}.txt'
save_path = save_dir / f'{image_name}.txt'
with orig_label_path.open('r') as f, save_path.open('w') as g:
lines = f.readlines()
for line in lines:
parts = line.strip().split()
if len(parts) < 9:
continue
class_name = parts[8]
class_idx = class_mapping[class_name]
coords = [float(p) for p in parts[:8]]
normalized_coords = [
coords[i] / image_width if i % 2 == 0 else coords[i] / image_height for i in range(8)]
formatted_coords = ['{:.6g}'.format(coord) for coord in normalized_coords]
g.write(f"{class_idx} {' '.join(formatted_coords)}\n")
for phase in ['train', 'val']:
image_dir = dota_root_path / 'images' / phase
orig_label_dir = dota_root_path / 'labels' / f'{phase}_original'
save_dir = dota_root_path / 'labels' / phase
save_dir.mkdir(parents=True, exist_ok=True)
image_paths = list(image_dir.iterdir())
for image_path in tqdm(image_paths, desc=f'Processing {phase} images'):
if image_path.suffix != '.png':
continue
image_name_without_ext = image_path.stem
img = cv2.imread(str(image_path))
h, w = img.shape[:2]
convert_label(image_name_without_ext, w, h, orig_label_dir, save_dir)
def rle2polygon(segmentation):
"""
Convert Run-Length Encoding (RLE) mask to polygon coordinates.
@ -209,24 +300,3 @@ def merge_multi_segment(segments):
nidx = abs(idx[1] - idx[0])
s.append(segments[i][nidx:])
return s
def delete_dsstore(path='../datasets'):
"""Delete Apple .DS_Store files in the specified directory and its subdirectories."""
from pathlib import Path
files = list(Path(path).rglob('.DS_store'))
print(files)
for f in files:
f.unlink()
if __name__ == '__main__':
source = 'COCO'
if source == 'COCO':
convert_coco(
'../datasets/coco/annotations', # directory with *.json
use_segments=False,
use_keypoints=True,
cls91to80=False)

@ -126,6 +126,7 @@ class YOLODataset(BaseDataset):
# Read cache
[cache.pop(k) for k in ('hash', 'version', 'msgs')] # remove items
labels = cache['labels']
assert len(labels), f'No valid labels found, please check your dataset. {HELP_URL}'
self.im_files = [lb['im_file'] for lb in labels] # update im_files
# Check if the dataset is all boxes or all segments

@ -24,7 +24,7 @@ from ultralytics.utils.checks import check_file, check_font, is_ascii
from ultralytics.utils.downloads import download, safe_download, unzip_file
from ultralytics.utils.ops import segments2boxes
HELP_URL = 'See https://docs.ultralytics.com/yolov5/tutorials/train_custom_data'
HELP_URL = 'See https://docs.ultralytics.com/datasets/detect for dataset formatting guidance.'
IMG_FORMATS = 'bmp', 'dng', 'jpeg', 'jpg', 'mpo', 'png', 'tif', 'tiff', 'webp', 'pfm' # image suffixes
VID_FORMATS = 'asf', 'avi', 'gif', 'm4v', 'mkv', 'mov', 'mp4', 'mpeg', 'mpg', 'ts', 'wmv', 'webm' # video suffixes
PIN_MEMORY = str(os.getenv('PIN_MEMORY', True)).lower() == 'true' # global pin_memory for dataloaders
@ -289,9 +289,6 @@ def check_cls_dataset(dataset: str, split=''):
- 'test' (Path): The directory path containing the test set of the dataset.
- 'nc' (int): The number of classes in the dataset.
- 'names' (dict): A dictionary of class names in the dataset.
Raises:
FileNotFoundError: If the specified dataset is not found and cannot be downloaded.
"""
dataset = Path(dataset)
@ -329,13 +326,16 @@ class HUBDatasetStats():
task (str): Dataset task. Options are 'detect', 'segment', 'pose', 'classify'. Default is 'detect'.
autodownload (bool): Attempt to download dataset if not found locally. Default is False.
Usage
Example:
```python
from ultralytics.data.utils import HUBDatasetStats
stats = HUBDatasetStats('/Users/glennjocher/Downloads/coco8.zip', task='detect') # detect dataset
stats = HUBDatasetStats('/Users/glennjocher/Downloads/coco8-seg.zip', task='segment') # segment dataset
stats = HUBDatasetStats('/Users/glennjocher/Downloads/coco8-pose.zip', task='pose') # pose dataset
stats = HUBDatasetStats('path/to/coco8.zip', task='detect') # detect dataset
stats = HUBDatasetStats('path/to/coco8-seg.zip', task='segment') # segment dataset
stats = HUBDatasetStats('path/to/coco8-pose.zip', task='pose') # pose dataset
stats.get_json(save=False)
stats.process_images()
```
"""
def __init__(self, path='coco128.yaml', task='detect', autodownload=False):
@ -459,11 +459,14 @@ def compress_one_image(f, f_new=None, max_dim=1920, quality=50):
max_dim (int, optional): The maximum dimension (width or height) of the output image. Default is 1920 pixels.
quality (int, optional): The image compression quality as a percentage. Default is 50%.
Usage:
Example:
```python
from pathlib import Path
from ultralytics.data.utils import compress_one_image
for f in Path('/Users/glennjocher/Downloads/dataset').rglob('*.jpg'):
for f in Path('path/to/dataset').rglob('*.jpg'):
compress_one_image(f)
```
"""
try: # use PIL
im = Image.open(f)
@ -488,9 +491,12 @@ def delete_dsstore(path):
Args:
path (str, optional): The directory path where the ".DS_store" files should be deleted.
Usage:
Example:
```python
from ultralytics.data.utils import delete_dsstore
delete_dsstore('/Users/glennjocher/Downloads/dataset')
delete_dsstore('path/to/dir')
```
Note:
".DS_store" files are created by the Apple operating system and contain metadata about folders and files. They
@ -505,17 +511,18 @@ def delete_dsstore(path):
def zip_directory(dir, use_zipfile_library=True):
"""
Zips a directory and saves the archive to the specified output path.
Zips a directory and saves the archive to the specified output path. Equivalent to 'zip -r coco8.zip coco8/'
Args:
dir (str): The path to the directory to be zipped.
use_zipfile_library (bool): Whether to use zipfile library or shutil for zipping.
Usage:
Example:
```python
from ultralytics.data.utils import zip_directory
zip_directory('/Users/glennjocher/Downloads/playground')
zip -r coco8-pose.zip coco8-pose
zip_directory('/path/to/dir')
```
"""
delete_dsstore(dir)
if use_zipfile_library:
@ -538,9 +545,12 @@ def autosplit(path=DATASETS_DIR / 'coco128/images', weights=(0.9, 0.1, 0.0), ann
weights (list | tuple, optional): Train, validation, and test split fractions. Defaults to (0.9, 0.1, 0.0).
annotated_only (bool, optional): If True, only images with an associated txt file are used. Defaults to False.
Usage:
from utils.dataloaders import autosplit
Example:
```python
from ultralytics.utils.dataloaders import autosplit
autosplit()
```
"""
path = Path(path) # images dir

@ -354,12 +354,14 @@ class Results(SimpleClass):
results = []
data = self.boxes.data.cpu().tolist()
h, w = self.orig_shape if normalize else (1, 1)
for i, row in enumerate(data):
for i, row in enumerate(data): # xyxy, track_id if tracking, conf, class_id
box = {'x1': row[0] / w, 'y1': row[1] / h, 'x2': row[2] / w, 'y2': row[3] / h}
conf = row[4]
id = int(row[5])
name = self.names[id]
result = {'name': name, 'class': id, 'confidence': conf, 'box': box}
conf = row[-2]
class_id = int(row[-1])
name = self.names[class_id]
result = {'name': name, 'class': class_id, 'confidence': conf, 'box': box}
if self.boxes.is_track:
result['track_id'] = int(row[-3]) # track ID
if self.masks:
x, y = self.masks.xy[i][:, 0], self.masks.xy[i][:, 1] # numpy array
result['segments'] = {'x': (x / w).tolist(), 'y': (y / h).tolist()}
@ -404,7 +406,7 @@ class Boxes(BaseTensor):
if boxes.ndim == 1:
boxes = boxes[None, :]
n = boxes.shape[-1]
assert n in (6, 7), f'expected `n` in [6, 7], but got {n}' # xyxy, (track_id), conf, cls
assert n in (6, 7), f'expected `n` in [6, 7], but got {n}' # xyxy, track_id, conf, cls
super().__init__(boxes, orig_shape)
self.is_track = n == 7
self.orig_shape = orig_shape

@ -357,14 +357,15 @@ def scale_image(masks, im0_shape, ratio_pad=None):
def xyxy2xywh(x):
"""
Convert bounding box coordinates from (x1, y1, x2, y2) format to (x, y, width, height) format.
Convert bounding box coordinates from (x1, y1, x2, y2) format to (x, y, width, height) format where (x1, y1) is the
top-left corner and (x2, y2) is the bottom-right corner.
Args:
x (np.ndarray | torch.Tensor): The input bounding box coordinates in (x1, y1, x2, y2) format.
Returns:
y (np.ndarray | torch.Tensor): The bounding box coordinates in (x, y, width, height) format.
y (np.ndarray | torch.Tensor): The bounding box coordinates in (x, y, width, height) format.
"""
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x)
y[..., 0] = (x[..., 0] + x[..., 2]) / 2 # x center
y[..., 1] = (x[..., 1] + x[..., 3]) / 2 # y center
y[..., 2] = x[..., 2] - x[..., 0] # width
@ -382,11 +383,13 @@ def xywh2xyxy(x):
Returns:
y (np.ndarray | torch.Tensor): The bounding box coordinates in (x1, y1, x2, y2) format.
"""
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
y[..., 0] = x[..., 0] - x[..., 2] / 2 # top left x
y[..., 1] = x[..., 1] - x[..., 3] / 2 # top left y
y[..., 2] = x[..., 0] + x[..., 2] / 2 # bottom right x
y[..., 3] = x[..., 1] + x[..., 3] / 2 # bottom right y
y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x)
dw = x[..., 2] / 2 # half-width
dh = x[..., 3] / 2 # half-height
y[..., 0] = x[..., 0] - dw # top left x
y[..., 1] = x[..., 1] - dh # top left y
y[..., 2] = x[..., 0] + dw # bottom right x
y[..., 3] = x[..., 1] + dh # bottom right y
return y
@ -404,7 +407,7 @@ def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
y (np.ndarray | torch.Tensor): The coordinates of the bounding box in the format [x1, y1, x2, y2] where
x1,y1 is the top-left corner, x2,y2 is the bottom-right corner of the bounding box.
"""
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x)
y[..., 0] = w * (x[..., 0] - x[..., 2] / 2) + padw # top left x
y[..., 1] = h * (x[..., 1] - x[..., 3] / 2) + padh # top left y
y[..., 2] = w * (x[..., 0] + x[..., 2] / 2) + padw # bottom right x
@ -428,7 +431,7 @@ def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0):
"""
if clip:
clip_boxes(x, (h - eps, w - eps)) # warning: inplace clip
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x)
y[..., 0] = ((x[..., 0] + x[..., 2]) / 2) / w # x center
y[..., 1] = ((x[..., 1] + x[..., 3]) / 2) / h # y center
y[..., 2] = (x[..., 2] - x[..., 0]) / w # width
@ -449,7 +452,7 @@ def xyn2xy(x, w=640, h=640, padw=0, padh=0):
Returns:
y (np.ndarray | torch.Tensor): The x and y coordinates of the top left corner of the bounding box
"""
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x)
y[..., 0] = w * x[..., 0] + padw # top left x
y[..., 1] = h * x[..., 1] + padh # top left y
return y
@ -464,7 +467,7 @@ def xywh2ltwh(x):
Returns:
y (np.ndarray | torch.Tensor): The bounding box coordinates in the xyltwh format
"""
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x)
y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x
y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y
return y
@ -479,7 +482,7 @@ def xyxy2ltwh(x):
Returns:
y (np.ndarray | torch.Tensor): The bounding box coordinates in the xyltwh format.
"""
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x)
y[:, 2] = x[:, 2] - x[:, 0] # width
y[:, 3] = x[:, 3] - x[:, 1] # height
return y
@ -492,12 +495,91 @@ def ltwh2xywh(x):
Args:
x (torch.Tensor): the input tensor
"""
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x)
y[:, 0] = x[:, 0] + x[:, 2] / 2 # center x
y[:, 1] = x[:, 1] + x[:, 3] / 2 # center y
return y
def xyxyxyxy2xywhr(corners):
"""
Convert batched Oriented Bounding Boxes (OBB) from [xy1, xy2, xy3, xy4] to [xywh, rotation].
Args:
corners (numpy.ndarray | torch.Tensor): Input corners of shape (n, 8).
Returns:
(numpy.ndarray | torch.Tensor): Converted data in [cx, cy, w, h, rotation] format of shape (n, 5).
"""
if isinstance(corners, torch.Tensor):
is_numpy = False
atan2 = torch.atan2
sqrt = torch.sqrt
else:
is_numpy = True
atan2 = np.arctan2
sqrt = np.sqrt
x1, y1, x2, y2, x3, y3, x4, y4 = corners.T
cx = (x1 + x3) / 2
cy = (y1 + y3) / 2
dx21 = x2 - x1
dy21 = y2 - y1
w = sqrt(dx21 ** 2 + dy21 ** 2)
h = sqrt((x2 - x3) ** 2 + (y2 - y3) ** 2)
rotation = atan2(-dy21, dx21)
rotation *= 180.0 / math.pi # radians to degrees
return np.vstack((cx, cy, w, h, rotation)).T if is_numpy else torch.stack((cx, cy, w, h, rotation), dim=1)
def xywhr2xyxyxyxy(center):
"""
Convert batched Oriented Bounding Boxes (OBB) from [xywh, rotation] to [xy1, xy2, xy3, xy4].
Args:
center (numpy.ndarray | torch.Tensor): Input data in [cx, cy, w, h, rotation] format of shape (n, 5).
Returns:
(numpy.ndarray | torch.Tensor): Converted corner points of shape (n, 8).
"""
if isinstance(center, torch.Tensor):
is_numpy = False
cos = torch.cos
sin = torch.sin
else:
is_numpy = True
cos = np.cos
sin = np.sin
cx, cy, w, h, rotation = center.T
rotation *= math.pi / 180.0 # degrees to radians
dx = w / 2
dy = h / 2
cos_rot = cos(rotation)
sin_rot = sin(rotation)
dx_cos_rot = dx * cos_rot
dx_sin_rot = dx * sin_rot
dy_cos_rot = dy * cos_rot
dy_sin_rot = dy * sin_rot
x1 = cx - dx_cos_rot - dy_sin_rot
y1 = cy + dx_sin_rot - dy_cos_rot
x2 = cx + dx_cos_rot - dy_sin_rot
y2 = cy - dx_sin_rot - dy_cos_rot
x3 = cx + dx_cos_rot + dy_sin_rot
y3 = cy - dx_sin_rot + dy_cos_rot
x4 = cx - dx_cos_rot + dy_sin_rot
y4 = cy + dx_sin_rot + dy_cos_rot
return np.vstack((x1, y1, x2, y2, x3, y3, x4, y4)).T if is_numpy else torch.stack(
(x1, y1, x2, y2, x3, y3, x4, y4), dim=1)
def ltwh2xyxy(x):
"""
It converts the bounding box from [x1, y1, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
@ -508,7 +590,7 @@ def ltwh2xyxy(x):
Returns:
y (np.ndarray | torch.Tensor): the xyxy coordinates of the bounding boxes.
"""
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x)
y[:, 2] = x[:, 2] + x[:, 0] # width
y[:, 3] = x[:, 3] + x[:, 1] # height
return y

Loading…
Cancel
Save