From 29ace4982dcd1d5c3c18e47e7d005e62279d9a1e Mon Sep 17 00:00:00 2001 From: Laughing <61612323+Laughing-q@users.noreply.github.com> Date: Thu, 10 Aug 2023 06:15:14 +0800 Subject: [PATCH 1/3] Add "assert len(labels)" catch (#4257) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Glenn Jocher --- ultralytics/data/dataset.py | 1 + ultralytics/data/utils.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/ultralytics/data/dataset.py b/ultralytics/data/dataset.py index 575243f38e..1d13261b0e 100644 --- a/ultralytics/data/dataset.py +++ b/ultralytics/data/dataset.py @@ -126,6 +126,7 @@ class YOLODataset(BaseDataset): # Read cache [cache.pop(k) for k in ('hash', 'version', 'msgs')] # remove items labels = cache['labels'] + assert len(labels), f'No valid labels found, please check your dataset. {HELP_URL}' self.im_files = [lb['im_file'] for lb in labels] # update im_files # Check if the dataset is all boxes or all segments diff --git a/ultralytics/data/utils.py b/ultralytics/data/utils.py index 561b994248..807ddad95b 100644 --- a/ultralytics/data/utils.py +++ b/ultralytics/data/utils.py @@ -24,7 +24,7 @@ from ultralytics.utils.checks import check_file, check_font, is_ascii from ultralytics.utils.downloads import download, safe_download, unzip_file from ultralytics.utils.ops import segments2boxes -HELP_URL = 'See https://docs.ultralytics.com/yolov5/tutorials/train_custom_data' +HELP_URL = 'See https://docs.ultralytics.com/datasets/detect for YOLO dataset format help.' IMG_FORMATS = 'bmp', 'dng', 'jpeg', 'jpg', 'mpo', 'png', 'tif', 'tiff', 'webp', 'pfm' # image suffixes VID_FORMATS = 'asf', 'avi', 'gif', 'm4v', 'mkv', 'mov', 'mp4', 'mpeg', 'mpg', 'ts', 'wmv', 'webm' # video suffixes PIN_MEMORY = str(os.getenv('PIN_MEMORY', True)).lower() == 'true' # global pin_memory for dataloaders From a76af55533ce63cb5717ace6e4648b5f3b52a001 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20L=C3=A9vesque?= Date: Wed, 9 Aug 2023 18:19:28 -0400 Subject: [PATCH 2/3] Fixes Results.tojson when tracking (#4270) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Glenn Jocher --- ultralytics/engine/results.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/ultralytics/engine/results.py b/ultralytics/engine/results.py index 60b57bf934..fd0d48b7c9 100644 --- a/ultralytics/engine/results.py +++ b/ultralytics/engine/results.py @@ -354,12 +354,14 @@ class Results(SimpleClass): results = [] data = self.boxes.data.cpu().tolist() h, w = self.orig_shape if normalize else (1, 1) - for i, row in enumerate(data): + for i, row in enumerate(data): # xyxy, track_id if tracking, conf, class_id box = {'x1': row[0] / w, 'y1': row[1] / h, 'x2': row[2] / w, 'y2': row[3] / h} - conf = row[4] - id = int(row[5]) - name = self.names[id] - result = {'name': name, 'class': id, 'confidence': conf, 'box': box} + conf = row[-2] + class_id = int(row[-1]) + name = self.names[class_id] + result = {'name': name, 'class': class_id, 'confidence': conf, 'box': box} + if self.boxes.is_track: + result['track_id'] = int(row[-3]) # track ID if self.masks: x, y = self.masks.xy[i][:, 0], self.masks.xy[i][:, 1] # numpy array result['segments'] = {'x': (x / w).tolist(), 'y': (y / h).tolist()} @@ -404,7 +406,7 @@ class Boxes(BaseTensor): if boxes.ndim == 1: boxes = boxes[None, :] n = boxes.shape[-1] - assert n in (6, 7), f'expected `n` in [6, 7], but got {n}' # xyxy, (track_id), conf, cls + assert n in (6, 7), f'expected `n` in [6, 7], but got {n}' # xyxy, track_id, conf, cls super().__init__(boxes, orig_shape) self.is_track = n == 7 self.orig_shape = orig_shape From c9be1f3cce89778f79fb462797b8ca0300e3813d Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 10 Aug 2023 00:55:36 +0200 Subject: [PATCH 3/3] `ultralytics 8.0.151` add `DOTAv2.yaml` for OBB training (#4258) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Kayzwer <68285002+Kayzwer@users.noreply.github.com> --- docs/datasets/classify/caltech101.md | 30 +++--- docs/datasets/classify/caltech256.md | 20 ++-- docs/datasets/classify/cifar10.md | 22 ++-- docs/datasets/classify/cifar100.md | 22 ++-- docs/datasets/classify/fashion-mnist.md | 2 +- docs/datasets/classify/imagenet.md | 28 ++--- docs/datasets/classify/imagenet10.md | 28 ++--- docs/datasets/classify/imagenette.md | 6 +- docs/datasets/classify/imagewoof.md | 4 +- docs/datasets/classify/index.md | 2 +- docs/datasets/classify/mnist.md | 24 +++-- docs/datasets/detect/argoverse.md | 25 +++-- docs/datasets/detect/coco.md | 26 +++-- docs/datasets/detect/coco8.md | 26 +++-- docs/datasets/detect/globalwheat2020.md | 22 ++-- docs/datasets/detect/index.md | 2 +- docs/datasets/detect/objects365.md | 24 +++-- docs/datasets/detect/open-images-v7.md | 22 ++-- docs/datasets/detect/sku-110k.md | 22 ++-- docs/datasets/detect/visdrone.md | 28 ++--- docs/datasets/detect/voc.md | 26 +++-- docs/datasets/detect/xview.md | 26 +++-- docs/datasets/obb/dota-v2.md | 129 ++++++++++++++++++++++++ docs/datasets/obb/index.md | 80 +++++++++++++++ docs/datasets/pose/coco.md | 26 +++-- docs/datasets/pose/coco8-pose.md | 26 +++-- docs/datasets/pose/index.md | 2 +- docs/datasets/segment/coco.md | 26 +++-- docs/datasets/segment/coco8-seg.md | 26 +++-- docs/datasets/segment/index.md | 2 +- docs/guides/kfold-cross-validation.md | 2 +- docs/models/yolov3.md | 2 +- docs/modes/train.md | 8 +- docs/reference/data/converter.md | 8 +- docs/reference/utils/ops.md | 8 ++ docs/tasks/classify.md | 2 +- docs/tasks/detect.md | 2 +- docs/tasks/pose.md | 2 +- docs/tasks/segment.md | 2 +- docs/usage/python.md | 6 +- mkdocs.yml | 3 + ultralytics/__init__.py | 2 +- ultralytics/cfg/datasets/DOTAv2.yaml | 37 +++++++ ultralytics/data/converter.py | 112 ++++++++++++++++---- ultralytics/data/utils.py | 46 +++++---- ultralytics/utils/ops.py | 112 +++++++++++++++++--- 46 files changed, 805 insertions(+), 303 deletions(-) create mode 100644 docs/datasets/obb/dota-v2.md create mode 100644 docs/datasets/obb/index.md create mode 100644 ultralytics/cfg/datasets/DOTAv2.yaml diff --git a/docs/datasets/classify/caltech101.md b/docs/datasets/classify/caltech101.md index 765c1009bc..bc7e91169e 100644 --- a/docs/datasets/classify/caltech101.md +++ b/docs/datasets/classify/caltech101.md @@ -39,7 +39,7 @@ To train a YOLO model on the Caltech-101 dataset for 100 epochs, you can use the model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training) # Train the model - model.train(data='caltech101', epochs=100, imgsz=416) + results = model.train(data='caltech101', epochs=100, imgsz=416) ``` === "CLI" @@ -61,17 +61,21 @@ The example showcases the variety and complexity of the objects in the Caltech-1 If you use the Caltech-101 dataset in your research or development work, please cite the following paper: -```bibtex -@article{fei2007learning, - title={Learning generative visual models from few training examples: An incremental Bayesian approach tested on 101 object categories}, - author={Fei-Fei, Li and Fergus, Rob and Perona, Pietro}, - journal={Computer vision and Image understanding}, - volume={106}, - number={1}, - pages={59--70}, - year={2007}, - publisher={Elsevier} -} -``` +!!! note "" + + === "BibTeX" + + ```bibtex + @article{fei2007learning, + title={Learning generative visual models from few training examples: An incremental Bayesian approach tested on 101 object categories}, + author={Fei-Fei, Li and Fergus, Rob and Perona, Pietro}, + journal={Computer vision and Image understanding}, + volume={106}, + number={1}, + pages={59--70}, + year={2007}, + publisher={Elsevier} + } + ``` We would like to acknowledge Li Fei-Fei, Rob Fergus, and Pietro Perona for creating and maintaining the Caltech-101 dataset as a valuable resource for the machine learning and computer vision research community. For more information about the Caltech-101 dataset and its creators, visit the [Caltech-101 dataset website](https://data.caltech.edu/records/mzrjq-6wc02). diff --git a/docs/datasets/classify/caltech256.md b/docs/datasets/classify/caltech256.md index 5830e0b6c5..664167dd8b 100644 --- a/docs/datasets/classify/caltech256.md +++ b/docs/datasets/classify/caltech256.md @@ -39,7 +39,7 @@ To train a YOLO model on the Caltech-256 dataset for 100 epochs, you can use the model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training) # Train the model - model.train(data='caltech256', epochs=100, imgsz=416) + results = model.train(data='caltech256', epochs=100, imgsz=416) ``` === "CLI" @@ -61,13 +61,17 @@ The example showcases the diversity and complexity of the objects in the Caltech If you use the Caltech-256 dataset in your research or development work, please cite the following paper: -```bibtex -@article{griffin2007caltech, - title={Caltech-256 object category dataset}, - author={Griffin, Gregory and Holub, Alex and Perona, Pietro}, - year={2007} -} -``` +!!! note "" + + === "BibTeX" + + ```bibtex + @article{griffin2007caltech, + title={Caltech-256 object category dataset}, + author={Griffin, Gregory and Holub, Alex and Perona, Pietro}, + year={2007} + } + ``` We would like to acknowledge Gregory Griffin, Alex Holub, and Pietro Perona for creating and maintaining the Caltech-256 dataset as a valuable resource for the machine learning and computer vision research community. For more information about the diff --git a/docs/datasets/classify/cifar10.md b/docs/datasets/classify/cifar10.md index 4292564236..100c2386de 100644 --- a/docs/datasets/classify/cifar10.md +++ b/docs/datasets/classify/cifar10.md @@ -42,7 +42,7 @@ To train a YOLO model on the CIFAR-10 dataset for 100 epochs with an image size model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training) # Train the model - model.train(data='cifar10', epochs=100, imgsz=32) + results = model.train(data='cifar10', epochs=100, imgsz=32) ``` === "CLI" @@ -64,13 +64,17 @@ The example showcases the variety and complexity of the objects in the CIFAR-10 If you use the CIFAR-10 dataset in your research or development work, please cite the following paper: -```bibtex -@TECHREPORT{Krizhevsky09learningmultiple, - author={Alex Krizhevsky}, - title={Learning multiple layers of features from tiny images}, - institution={}, - year={2009} -} -``` +!!! note "" + + === "BibTeX" + + ```bibtex + @TECHREPORT{Krizhevsky09learningmultiple, + author={Alex Krizhevsky}, + title={Learning multiple layers of features from tiny images}, + institution={}, + year={2009} + } + ``` We would like to acknowledge Alex Krizhevsky for creating and maintaining the CIFAR-10 dataset as a valuable resource for the machine learning and computer vision research community. For more information about the CIFAR-10 dataset and its creator, visit the [CIFAR-10 dataset website](https://www.cs.toronto.edu/~kriz/cifar.html). diff --git a/docs/datasets/classify/cifar100.md b/docs/datasets/classify/cifar100.md index c5c9b7d2c7..9f7f407879 100644 --- a/docs/datasets/classify/cifar100.md +++ b/docs/datasets/classify/cifar100.md @@ -42,7 +42,7 @@ To train a YOLO model on the CIFAR-100 dataset for 100 epochs with an image size model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training) # Train the model - model.train(data='cifar100', epochs=100, imgsz=32) + results = model.train(data='cifar100', epochs=100, imgsz=32) ``` === "CLI" @@ -64,13 +64,17 @@ The example showcases the variety and complexity of the objects in the CIFAR-100 If you use the CIFAR-100 dataset in your research or development work, please cite the following paper: -```bibtex -@TECHREPORT{Krizhevsky09learningmultiple, - author={Alex Krizhevsky}, - title={Learning multiple layers of features from tiny images}, - institution={}, - year={2009} -} -``` +!!! note "" + + === "BibTeX" + + ```bibtex + @TECHREPORT{Krizhevsky09learningmultiple, + author={Alex Krizhevsky}, + title={Learning multiple layers of features from tiny images}, + institution={}, + year={2009} + } + ``` We would like to acknowledge Alex Krizhevsky for creating and maintaining the CIFAR-100 dataset as a valuable resource for the machine learning and computer vision research community. For more information about the CIFAR-100 dataset and its creator, visit the [CIFAR-100 dataset website](https://www.cs.toronto.edu/~kriz/cifar.html). diff --git a/docs/datasets/classify/fashion-mnist.md b/docs/datasets/classify/fashion-mnist.md index 876bed2fad..319b2210d9 100644 --- a/docs/datasets/classify/fashion-mnist.md +++ b/docs/datasets/classify/fashion-mnist.md @@ -56,7 +56,7 @@ To train a CNN model on the Fashion-MNIST dataset for 100 epochs with an image s model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training) # Train the model - model.train(data='fashion-mnist', epochs=100, imgsz=28) + results = model.train(data='fashion-mnist', epochs=100, imgsz=28) ``` === "CLI" diff --git a/docs/datasets/classify/imagenet.md b/docs/datasets/classify/imagenet.md index ba7da29434..008a3ff196 100644 --- a/docs/datasets/classify/imagenet.md +++ b/docs/datasets/classify/imagenet.md @@ -42,7 +42,7 @@ To train a deep learning model on the ImageNet dataset for 100 epochs with an im model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training) # Train the model - model.train(data='imagenet', epochs=100, imgsz=224) + results = model.train(data='imagenet', epochs=100, imgsz=224) ``` === "CLI" @@ -64,16 +64,20 @@ The example showcases the variety and complexity of the images in the ImageNet d If you use the ImageNet dataset in your research or development work, please cite the following paper: -```bibtex -@article{ILSVRC15, - author = {Olga Russakovsky and Jia Deng and Hao Su and Jonathan Krause and Sanjeev Satheesh and Sean Ma and Zhiheng Huang and Andrej Karpathy and Aditya Khosla and Michael Bernstein and Alexander C. Berg and Li Fei-Fei}, - title={ImageNet Large Scale Visual Recognition Challenge}, - year={2015}, - journal={International Journal of Computer Vision (IJCV)}, - volume={115}, - number={3}, - pages={211-252} -} -``` +!!! note "" + + === "BibTeX" + + ```bibtex + @article{ILSVRC15, + author = {Olga Russakovsky and Jia Deng and Hao Su and Jonathan Krause and Sanjeev Satheesh and Sean Ma and Zhiheng Huang and Andrej Karpathy and Aditya Khosla and Michael Bernstein and Alexander C. Berg and Li Fei-Fei}, + title={ImageNet Large Scale Visual Recognition Challenge}, + year={2015}, + journal={International Journal of Computer Vision (IJCV)}, + volume={115}, + number={3}, + pages={211-252} + } + ``` We would like to acknowledge the ImageNet team, led by Olga Russakovsky, Jia Deng, and Li Fei-Fei, for creating and maintaining the ImageNet dataset as a valuable resource for the machine learning and computer vision research community. For more information about the ImageNet dataset and its creators, visit the [ImageNet website](https://www.image-net.org/). diff --git a/docs/datasets/classify/imagenet10.md b/docs/datasets/classify/imagenet10.md index 3520f4c6ce..886d5afe9c 100644 --- a/docs/datasets/classify/imagenet10.md +++ b/docs/datasets/classify/imagenet10.md @@ -38,7 +38,7 @@ To test a deep learning model on the ImageNet10 dataset with an image size of 22 model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training) # Train the model - model.train(data='imagenet10', epochs=5, imgsz=224) + results = model.train(data='imagenet10', epochs=5, imgsz=224) ``` === "CLI" @@ -59,16 +59,20 @@ The example showcases the variety and complexity of the images in the ImageNet10 If you use the ImageNet10 dataset in your research or development work, please cite the original ImageNet paper: -```bibtex -@article{ILSVRC15, - author = {Olga Russakovsky and Jia Deng and Hao Su and Jonathan Krause and Sanjeev Satheesh and Sean Ma and Zhiheng Huang and Andrej Karpathy and Aditya Khosla and Michael Bernstein and Alexander C. Berg and Li Fei-Fei}, - title={ImageNet Large Scale Visual Recognition Challenge}, - year={2015}, - journal={International Journal of Computer Vision (IJCV)}, - volume={115}, - number={3}, - pages={211-252} -} -``` +!!! note "" + + === "BibTeX" + + ```bibtex + @article{ILSVRC15, + author = {Olga Russakovsky and Jia Deng and Hao Su and Jonathan Krause and Sanjeev Satheesh and Sean Ma and Zhiheng Huang and Andrej Karpathy and Aditya Khosla and Michael Bernstein and Alexander C. Berg and Li Fei-Fei}, + title={ImageNet Large Scale Visual Recognition Challenge}, + year={2015}, + journal={International Journal of Computer Vision (IJCV)}, + volume={115}, + number={3}, + pages={211-252} + } + ``` We would like to acknowledge the ImageNet team, led by Olga Russakovsky, Jia Deng, and Li Fei-Fei, for creating and maintaining the ImageNet dataset. The ImageNet10 dataset, while a compact subset, is a valuable resource for quick testing and debugging in the machine learning and computer vision research community. For more information about the ImageNet dataset and its creators, visit the [ImageNet website](https://www.image-net.org/). diff --git a/docs/datasets/classify/imagenette.md b/docs/datasets/classify/imagenette.md index 968e279c30..cc5cb27042 100644 --- a/docs/datasets/classify/imagenette.md +++ b/docs/datasets/classify/imagenette.md @@ -40,7 +40,7 @@ To train a model on the ImageNette dataset for 100 epochs with a standard image model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training) # Train the model - model.train(data='imagenette', epochs=100, imgsz=224) + results = model.train(data='imagenette', epochs=100, imgsz=224) ``` === "CLI" @@ -75,7 +75,7 @@ To use these datasets, simply replace 'imagenette' with 'imagenette160' or 'imag model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training) # Train the model with ImageNette160 - model.train(data='imagenette160', epochs=100, imgsz=160) + results = model.train(data='imagenette160', epochs=100, imgsz=160) ``` === "CLI" @@ -96,7 +96,7 @@ To use these datasets, simply replace 'imagenette' with 'imagenette160' or 'imag model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training) # Train the model with ImageNette320 - model.train(data='imagenette320', epochs=100, imgsz=320) + results = model.train(data='imagenette320', epochs=100, imgsz=320) ``` === "CLI" diff --git a/docs/datasets/classify/imagewoof.md b/docs/datasets/classify/imagewoof.md index 86e51f76d8..2c1271f444 100644 --- a/docs/datasets/classify/imagewoof.md +++ b/docs/datasets/classify/imagewoof.md @@ -37,7 +37,7 @@ To train a CNN model on the ImageWoof dataset for 100 epochs with an image size model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training) # Train the model - model.train(data='imagewoof', epochs=100, imgsz=224) + results = model.train(data='imagewoof', epochs=100, imgsz=224) ``` === "CLI" @@ -79,6 +79,6 @@ The example showcases the subtle differences and similarities among the differen ## Citations and Acknowledgments -If you use the ImageWoof dataset in your research or development work, please make sure to acknowledge the creators of the dataset by linking to the [official dataset repository](https://github.com/fastai/imagenette). As of my knowledge cutoff in September 2021, there is no official publication specifically about ImageWoof for citation. +If you use the ImageWoof dataset in your research or development work, please make sure to acknowledge the creators of the dataset by linking to the [official dataset repository](https://github.com/fastai/imagenette). We would like to acknowledge the FastAI team for creating and maintaining the ImageWoof dataset as a valuable resource for the machine learning and computer vision research community. For more information about the ImageWoof dataset, visit the [ImageWoof dataset repository](https://github.com/fastai/imagenette). diff --git a/docs/datasets/classify/index.md b/docs/datasets/classify/index.md index 220d3aa9c0..748701e6ee 100644 --- a/docs/datasets/classify/index.md +++ b/docs/datasets/classify/index.md @@ -91,7 +91,7 @@ In this example, the `train` directory contains subdirectories for each class in model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training) # Train the model - model.train(data='path/to/dataset', epochs=100, imgsz=640) + results = model.train(data='path/to/dataset', epochs=100, imgsz=640) ``` === "CLI" diff --git a/docs/datasets/classify/mnist.md b/docs/datasets/classify/mnist.md index 9dac46b4ae..40f6a779ae 100644 --- a/docs/datasets/classify/mnist.md +++ b/docs/datasets/classify/mnist.md @@ -45,7 +45,7 @@ To train a CNN model on the MNIST dataset for 100 epochs with an image size of 3 model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training) # Train the model - model.train(data='mnist', epochs=100, imgsz=32) + results = model.train(data='mnist', epochs=100, imgsz=32) ``` === "CLI" @@ -69,14 +69,18 @@ If you use the MNIST dataset in your research or development work, please cite the following paper: -```bibtex -@article{lecun2010mnist, - title={MNIST handwritten digit database}, - author={LeCun, Yann and Cortes, Corinna and Burges, CJ}, - journal={ATT Labs [Online]. Available: http://yann.lecun.com/exdb/mnist}, - volume={2}, - year={2010} -} -``` +!!! note "" + + === "BibTeX" + + ```bibtex + @article{lecun2010mnist, + title={MNIST handwritten digit database}, + author={LeCun, Yann and Cortes, Corinna and Burges, CJ}, + journal={ATT Labs [Online]. Available: http://yann.lecun.com/exdb/mnist}, + volume={2}, + year={2010} + } + ``` We would like to acknowledge Yann LeCun, Corinna Cortes, and Christopher J.C. Burges for creating and maintaining the MNIST dataset as a valuable resource for the machine learning and computer vision research community. For more information about the MNIST dataset and its creators, visit the [MNIST dataset website](http://yann.lecun.com/exdb/mnist/). diff --git a/docs/datasets/detect/argoverse.md b/docs/datasets/detect/argoverse.md index 0836fa4314..cbc2fd2411 100644 --- a/docs/datasets/detect/argoverse.md +++ b/docs/datasets/detect/argoverse.md @@ -12,7 +12,6 @@ The [Argoverse](https://www.argoverse.org/) dataset is a collection of data desi The Argoverse dataset *.zip file required for training was removed from Amazon S3 after the shutdown of Argo AI by Ford, but we have made it available for manual download on [Google Drive](https://drive.google.com/file/d/1st9qW3BeIwQsnR0t8mRpvbsSWIo16ACi/view?usp=drive_link). - ## Key Features - Argoverse contains over 290K labeled 3D object tracks and 5 million object instances across 1,263 distinct scenes. @@ -57,7 +56,7 @@ To train a YOLOv8n model on the Argoverse dataset for 100 epochs with an image s model = YOLO('yolov8n.pt') # load a pretrained model (recommended for training) # Train the model - model.train(data='Argoverse.yaml', epochs=100, imgsz=640) + results = model.train(data='Argoverse.yaml', epochs=100, imgsz=640) ``` === "CLI" @@ -81,14 +80,18 @@ The example showcases the variety and complexity of the data in the Argoverse da If you use the Argoverse dataset in your research or development work, please cite the following paper: -```bibtex -@inproceedings{chang2019argoverse, - title={Argoverse: 3D Tracking and Forecasting with Rich Maps}, - author={Chang, Ming-Fang and Lambert, John and Sangkloy, Patsorn and Singh, Jagjeet and Bak, Slawomir and Hartnett, Andrew and Wang, Dequan and Carr, Peter and Lucey, Simon and Ramanan, Deva and others}, - booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, - pages={8748--8757}, - year={2019} -} -``` +!!! note "" + + === "BibTeX" + + ```bibtex + @inproceedings{chang2019argoverse, + title={Argoverse: 3D Tracking and Forecasting with Rich Maps}, + author={Chang, Ming-Fang and Lambert, John and Sangkloy, Patsorn and Singh, Jagjeet and Bak, Slawomir and Hartnett, Andrew and Wang, Dequan and Carr, Peter and Lucey, Simon and Ramanan, Deva and others}, + booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, + pages={8748--8757}, + year={2019} + } + ``` We would like to acknowledge Argo AI for creating and maintaining the Argoverse dataset as a valuable resource for the autonomous driving research community. For more information about the Argoverse dataset and its creators, visit the [Argoverse dataset website](https://www.argoverse.org/). diff --git a/docs/datasets/detect/coco.md b/docs/datasets/detect/coco.md index b8d1934651..903037040b 100644 --- a/docs/datasets/detect/coco.md +++ b/docs/datasets/detect/coco.md @@ -52,7 +52,7 @@ To train a YOLOv8n model on the COCO dataset for 100 epochs with an image size o model = YOLO('yolov8n.pt') # load a pretrained model (recommended for training) # Train the model - model.train(data='coco.yaml', epochs=100, imgsz=640) + results = model.train(data='coco.yaml', epochs=100, imgsz=640) ``` === "CLI" @@ -76,15 +76,19 @@ The example showcases the variety and complexity of the images in the COCO datas If you use the COCO dataset in your research or development work, please cite the following paper: -```bibtex -@misc{lin2015microsoft, - title={Microsoft COCO: Common Objects in Context}, - author={Tsung-Yi Lin and Michael Maire and Serge Belongie and Lubomir Bourdev and Ross Girshick and James Hays and Pietro Perona and Deva Ramanan and C. Lawrence Zitnick and Piotr Dollár}, - year={2015}, - eprint={1405.0312}, - archivePrefix={arXiv}, - primaryClass={cs.CV} -} -``` +!!! note "" + + === "BibTeX" + + ```bibtex + @misc{lin2015microsoft, + title={Microsoft COCO: Common Objects in Context}, + author={Tsung-Yi Lin and Michael Maire and Serge Belongie and Lubomir Bourdev and Ross Girshick and James Hays and Pietro Perona and Deva Ramanan and C. Lawrence Zitnick and Piotr Dollár}, + year={2015}, + eprint={1405.0312}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` We would like to acknowledge the COCO Consortium for creating and maintaining this valuable resource for the computer vision community. For more information about the COCO dataset and its creators, visit the [COCO dataset website](https://cocodataset.org/#home). diff --git a/docs/datasets/detect/coco8.md b/docs/datasets/detect/coco8.md index 7fc71fb251..c365b3bd4e 100644 --- a/docs/datasets/detect/coco8.md +++ b/docs/datasets/detect/coco8.md @@ -42,7 +42,7 @@ To train a YOLOv8n model on the COCO8 dataset for 100 epochs with an image size model = YOLO('yolov8n.pt') # load a pretrained model (recommended for training) # Train the model - model.train(data='coco8.yaml', epochs=100, imgsz=640) + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) ``` === "CLI" @@ -66,15 +66,19 @@ The example showcases the variety and complexity of the images in the COCO8 data If you use the COCO dataset in your research or development work, please cite the following paper: -```bibtex -@misc{lin2015microsoft, - title={Microsoft COCO: Common Objects in Context}, - author={Tsung-Yi Lin and Michael Maire and Serge Belongie and Lubomir Bourdev and Ross Girshick and James Hays and Pietro Perona and Deva Ramanan and C. Lawrence Zitnick and Piotr Dollár}, - year={2015}, - eprint={1405.0312}, - archivePrefix={arXiv}, - primaryClass={cs.CV} -} -``` +!!! note "" + + === "BibTeX" + + ```bibtex + @misc{lin2015microsoft, + title={Microsoft COCO: Common Objects in Context}, + author={Tsung-Yi Lin and Michael Maire and Serge Belongie and Lubomir Bourdev and Ross Girshick and James Hays and Pietro Perona and Deva Ramanan and C. Lawrence Zitnick and Piotr Dollár}, + year={2015}, + eprint={1405.0312}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` We would like to acknowledge the COCO Consortium for creating and maintaining this valuable resource for the computer vision community. For more information about the COCO dataset and its creators, visit the [COCO dataset website](https://cocodataset.org/#home). diff --git a/docs/datasets/detect/globalwheat2020.md b/docs/datasets/detect/globalwheat2020.md index 3a60aa0b57..0f0d7c0a80 100644 --- a/docs/datasets/detect/globalwheat2020.md +++ b/docs/datasets/detect/globalwheat2020.md @@ -51,7 +51,7 @@ To train a YOLOv8n model on the Global Wheat Head Dataset for 100 epochs with an model = YOLO('yolov8n.pt') # load a pretrained model (recommended for training) # Train the model - model.train(data='GlobalWheat2020.yaml', epochs=100, imgsz=640) + results = model.train(data='GlobalWheat2020.yaml', epochs=100, imgsz=640) ``` === "CLI" @@ -75,13 +75,17 @@ The example showcases the variety and complexity of the data in the Global Wheat If you use the Global Wheat Head Dataset in your research or development work, please cite the following paper: -```bibtex -@article{david2020global, - title={Global Wheat Head Detection (GWHD) Dataset: A Large and Diverse Dataset of High-Resolution RGB-Labelled Images to Develop and Benchmark Wheat Head Detection Methods}, - author={David, Etienne and Madec, Simon and Sadeghi-Tehran, Pouria and Aasen, Helge and Zheng, Bangyou and Liu, Shouyang and Kirchgessner, Norbert and Ishikawa, Goro and Nagasawa, Koichi and Badhon, Minhajul and others}, - journal={arXiv preprint arXiv:2005.02162}, - year={2020} -} -``` +!!! note "" + + === "BibTeX" + + ```bibtex + @article{david2020global, + title={Global Wheat Head Detection (GWHD) Dataset: A Large and Diverse Dataset of High-Resolution RGB-Labelled Images to Develop and Benchmark Wheat Head Detection Methods}, + author={David, Etienne and Madec, Simon and Sadeghi-Tehran, Pouria and Aasen, Helge and Zheng, Bangyou and Liu, Shouyang and Kirchgessner, Norbert and Ishikawa, Goro and Nagasawa, Koichi and Badhon, Minhajul and others}, + journal={arXiv preprint arXiv:2005.02162}, + year={2020} + } + ``` We would like to acknowledge the researchers and institutions that contributed to the creation and maintenance of the Global Wheat Head Dataset as a valuable resource for the plant phenotyping and crop management research community. For more information about the dataset and its creators, visit the [Global Wheat Head Dataset website](http://www.global-wheat.com/). diff --git a/docs/datasets/detect/index.md b/docs/datasets/detect/index.md index b6dd1e1b17..f93e6b01af 100644 --- a/docs/datasets/detect/index.md +++ b/docs/datasets/detect/index.md @@ -59,7 +59,7 @@ Here's how you can use these formats to train your model: model = YOLO('yolov8n.pt') # load a pretrained model (recommended for training) # Train the model - model.train(data='coco8.yaml', epochs=100, imgsz=640) + results = model.train(data='coco8.yaml', epochs=100, imgsz=640) ``` === "CLI" diff --git a/docs/datasets/detect/objects365.md b/docs/datasets/detect/objects365.md index 0254b47cab..06301b1941 100644 --- a/docs/datasets/detect/objects365.md +++ b/docs/datasets/detect/objects365.md @@ -51,7 +51,7 @@ To train a YOLOv8n model on the Objects365 dataset for 100 epochs with an image model = YOLO('yolov8n.pt') # load a pretrained model (recommended for training) # Train the model - model.train(data='Objects365.yaml', epochs=100, imgsz=640) + results = model.train(data='Objects365.yaml', epochs=100, imgsz=640) ``` === "CLI" @@ -75,14 +75,18 @@ The example showcases the variety and complexity of the data in the Objects365 d If you use the Objects365 dataset in your research or development work, please cite the following paper: -```bibtex -@inproceedings{shao2019objects365, - title={Objects365: A Large-scale, High-quality Dataset for Object Detection}, - author={Shao, Shuai and Li, Zeming and Zhang, Tianyuan and Peng, Chao and Yu, Gang and Li, Jing and Zhang, Xiangyu and Sun, Jian}, - booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision}, - pages={8425--8434}, - year={2019} -} -``` +!!! note "" + + === "BibTeX" + + ```bibtex + @inproceedings{shao2019objects365, + title={Objects365: A Large-scale, High-quality Dataset for Object Detection}, + author={Shao, Shuai and Li, Zeming and Zhang, Tianyuan and Peng, Chao and Yu, Gang and Li, Jing and Zhang, Xiangyu and Sun, Jian}, + booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision}, + pages={8425--8434}, + year={2019} + } + ``` We would like to acknowledge the team of researchers who created and maintain the Objects365 dataset as a valuable resource for the computer vision research community. For more information about the Objects365 dataset and its creators, visit the [Objects365 dataset website](https://www.objects365.org/). diff --git a/docs/datasets/detect/open-images-v7.md b/docs/datasets/detect/open-images-v7.md index 5e8dd17a3f..1d97a8dd34 100644 --- a/docs/datasets/detect/open-images-v7.md +++ b/docs/datasets/detect/open-images-v7.md @@ -70,7 +70,7 @@ To train a YOLOv8n model on the Open Images V7 dataset for 100 epochs with an im model = YOLO('yolov8n.pt') # Train the model on the Open Images V7 dataset - model.train(data='open-images-v7.yaml', epochs=100, imgsz=640) + results = model.train(data='open-images-v7.yaml', epochs=100, imgsz=640) ``` === "CLI" @@ -94,13 +94,17 @@ Researchers can gain invaluable insights into the array of computer vision chall For those employing Open Images V7 in their work, it's prudent to cite the relevant papers and acknowledge the creators: -```bibtex -@article{OpenImages, - author = {Alina Kuznetsova and Hassan Rom and Neil Alldrin and Jasper Uijlings and Ivan Krasin and Jordi Pont-Tuset and Shahab Kamali and Stefan Popov and Matteo Malloci and Alexander Kolesnikov and Tom Duerig and Vittorio Ferrari}, - title = {The Open Images Dataset V4: Unified image classification, object detection, and visual relationship detection at scale}, - year = {2020}, - journal = {IJCV} -} -``` +!!! note "" + + === "BibTeX" + + ```bibtex + @article{OpenImages, + author = {Alina Kuznetsova and Hassan Rom and Neil Alldrin and Jasper Uijlings and Ivan Krasin and Jordi Pont-Tuset and Shahab Kamali and Stefan Popov and Matteo Malloci and Alexander Kolesnikov and Tom Duerig and Vittorio Ferrari}, + title = {The Open Images Dataset V4: Unified image classification, object detection, and visual relationship detection at scale}, + year = {2020}, + journal = {IJCV} + } + ``` A heartfelt acknowledgment goes out to the Google AI team for creating and maintaining the Open Images V7 dataset. For a deep dive into the dataset and its offerings, navigate to the [official Open Images V7 website](https://storage.googleapis.com/openimages/web/index.html). diff --git a/docs/datasets/detect/sku-110k.md b/docs/datasets/detect/sku-110k.md index ac56f2b98d..07c35793b9 100644 --- a/docs/datasets/detect/sku-110k.md +++ b/docs/datasets/detect/sku-110k.md @@ -53,7 +53,7 @@ To train a YOLOv8n model on the SKU-110K dataset for 100 epochs with an image si model = YOLO('yolov8n.pt') # load a pretrained model (recommended for training) # Train the model - model.train(data='SKU-110K.yaml', epochs=100, imgsz=640) + results = model.train(data='SKU-110K.yaml', epochs=100, imgsz=640) ``` === "CLI" @@ -77,13 +77,17 @@ The example showcases the variety and complexity of the data in the SKU-110k dat If you use the SKU-110k dataset in your research or development work, please cite the following paper: -```bibtex -@inproceedings{goldman2019dense, - author = {Eran Goldman and Roei Herzig and Aviv Eisenschtat and Jacob Goldberger and Tal Hassner}, - title = {Precise Detection in Densely Packed Scenes}, - booktitle = {Proc. Conf. Comput. Vision Pattern Recognition (CVPR)}, - year = {2019} -} -``` +!!! note "" + + === "BibTeX" + + ```bibtex + @inproceedings{goldman2019dense, + author = {Eran Goldman and Roei Herzig and Aviv Eisenschtat and Jacob Goldberger and Tal Hassner}, + title = {Precise Detection in Densely Packed Scenes}, + booktitle = {Proc. Conf. Comput. Vision Pattern Recognition (CVPR)}, + year = {2019} + } + ``` We would like to acknowledge Eran Goldman et al. for creating and maintaining the SKU-110k dataset as a valuable resource for the computer vision research community. For more information about the SKU-110k dataset and its creators, visit the [SKU-110k dataset GitHub repository](https://github.com/eg4000/SKU110K_CVPR19). diff --git a/docs/datasets/detect/visdrone.md b/docs/datasets/detect/visdrone.md index 5468d572bc..be0c956e1d 100644 --- a/docs/datasets/detect/visdrone.md +++ b/docs/datasets/detect/visdrone.md @@ -49,7 +49,7 @@ To train a YOLOv8n model on the VisDrone dataset for 100 epochs with an image si model = YOLO('yolov8n.pt') # load a pretrained model (recommended for training) # Train the model - model.train(data='VisDrone.yaml', epochs=100, imgsz=640) + results = model.train(data='VisDrone.yaml', epochs=100, imgsz=640) ``` === "CLI" @@ -73,16 +73,20 @@ The example showcases the variety and complexity of the data in the VisDrone dat If you use the VisDrone dataset in your research or development work, please cite the following paper: -```bibtex -@ARTICLE{9573394, - author={Zhu, Pengfei and Wen, Longyin and Du, Dawei and Bian, Xiao and Fan, Heng and Hu, Qinghua and Ling, Haibin}, - journal={IEEE Transactions on Pattern Analysis and Machine Intelligence}, - title={Detection and Tracking Meet Drones Challenge}, - year={2021}, - volume={}, - number={}, - pages={1-1}, - doi={10.1109/TPAMI.2021.3119563}} -``` +!!! note "" + + === "BibTeX" + + ```bibtex + @ARTICLE{9573394, + author={Zhu, Pengfei and Wen, Longyin and Du, Dawei and Bian, Xiao and Fan, Heng and Hu, Qinghua and Ling, Haibin}, + journal={IEEE Transactions on Pattern Analysis and Machine Intelligence}, + title={Detection and Tracking Meet Drones Challenge}, + year={2021}, + volume={}, + number={}, + pages={1-1}, + doi={10.1109/TPAMI.2021.3119563}} + ``` We would like to acknowledge the AISKYEYE team at the Lab of Machine Learning and Data Mining, Tianjin University, China, for creating and maintaining the VisDrone dataset as a valuable resource for the drone-based computer vision research community. For more information about the VisDrone dataset and its creators, visit the [VisDrone Dataset GitHub repository](https://github.com/VisDrone/VisDrone-Dataset). diff --git a/docs/datasets/detect/voc.md b/docs/datasets/detect/voc.md index d29da7df4b..ec18af7ff3 100644 --- a/docs/datasets/detect/voc.md +++ b/docs/datasets/detect/voc.md @@ -52,7 +52,7 @@ To train a YOLOv8n model on the VOC dataset for 100 epochs with an image size of model = YOLO('yolov8n.pt') # load a pretrained model (recommended for training) # Train the model - model.train(data='VOC.yaml', epochs=100, imgsz=640) + results = model.train(data='VOC.yaml', epochs=100, imgsz=640) ``` === "CLI" @@ -77,15 +77,19 @@ The example showcases the variety and complexity of the images in the VOC datase If you use the VOC dataset in your research or development work, please cite the following paper: -```bibtex -@misc{everingham2010pascal, - title={The PASCAL Visual Object Classes (VOC) Challenge}, - author={Mark Everingham and Luc Van Gool and Christopher K. I. Williams and John Winn and Andrew Zisserman}, - year={2010}, - eprint={0909.5206}, - archivePrefix={arXiv}, - primaryClass={cs.CV} -} -``` +!!! note "" + + === "BibTeX" + + ```bibtex + @misc{everingham2010pascal, + title={The PASCAL Visual Object Classes (VOC) Challenge}, + author={Mark Everingham and Luc Van Gool and Christopher K. I. Williams and John Winn and Andrew Zisserman}, + year={2010}, + eprint={0909.5206}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` We would like to acknowledge the PASCAL VOC Consortium for creating and maintaining this valuable resource for the computer vision community. For more information about the VOC dataset and its creators, visit the [PASCAL VOC dataset website](http://host.robots.ox.ac.uk/pascal/VOC/). diff --git a/docs/datasets/detect/xview.md b/docs/datasets/detect/xview.md index 9da4ca87af..e47268d22e 100644 --- a/docs/datasets/detect/xview.md +++ b/docs/datasets/detect/xview.md @@ -55,7 +55,7 @@ To train a model on the xView dataset for 100 epochs with an image size of 640, model = YOLO('yolov8n.pt') # load a pretrained model (recommended for training) # Train the model - model.train(data='xView.yaml', epochs=100, imgsz=640) + results = model.train(data='xView.yaml', epochs=100, imgsz=640) ``` === "CLI" @@ -79,15 +79,19 @@ The example showcases the variety and complexity of the data in the xView datase If you use the xView dataset in your research or development work, please cite the following paper: -```bibtex -@misc{lam2018xview, - title={xView: Objects in Context in Overhead Imagery}, - author={Darius Lam and Richard Kuzma and Kevin McGee and Samuel Dooley and Michael Laielli and Matthew Klaric and Yaroslav Bulatov and Brendan McCord}, - year={2018}, - eprint={1802.07856}, - archivePrefix={arXiv}, - primaryClass={cs.CV} -} -``` +!!! note "" + + === "BibTeX" + + ```bibtex + @misc{lam2018xview, + title={xView: Objects in Context in Overhead Imagery}, + author={Darius Lam and Richard Kuzma and Kevin McGee and Samuel Dooley and Michael Laielli and Matthew Klaric and Yaroslav Bulatov and Brendan McCord}, + year={2018}, + eprint={1802.07856}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` We would like to acknowledge the [Defense Innovation Unit](https://www.diu.mil/) (DIU) and the creators of the xView dataset for their valuable contribution to the computer vision research community. For more information about the xView dataset and its creators, visit the [xView dataset website](http://xviewdataset.org/). diff --git a/docs/datasets/obb/dota-v2.md b/docs/datasets/obb/dota-v2.md new file mode 100644 index 0000000000..d50e014a4d --- /dev/null +++ b/docs/datasets/obb/dota-v2.md @@ -0,0 +1,129 @@ +--- +comments: true +description: Delve into DOTA v2, an Oriented Bounding Box (OBB) aerial imagery dataset with 1.7 million instances and 11,268 images. +keywords: DOTA v2, object detection, aerial images, computer vision, deep learning, annotations, oriented bounding boxes, OBB +--- + +# DOTA v2 Dataset with OBB + +[DOTA v2](https://captain-whu.github.io/DOTA/index.html) stands as a specialized dataset, emphasizing object detection in aerial images. Originating from the DOTA series of datasets, it offers annotated images capturing a diverse array of aerial scenes with Oriented Bounding Boxes (OBB). + +![DOTA v2 classes visual](https://user-images.githubusercontent.com/26833433/259461765-72fdd0d8-266b-44a9-8199-199329bf5ca9.jpg) + +## Key Features + +- Collection from various sensors and platforms, with image sizes ranging from 800 × 800 to 20,000 × 20,000 pixels. +- Features more than 1.7M Oriented Bounding Boxes across 18 categories. +- Encompasses multiscale object detection. +- Instances are annotated by experts using arbitrary (8 d.o.f.) quadrilateral, capturing objects of different scales, orientations, and shapes. + +## Dataset Versions + +### DOTA-v1.0 + +- Contains 15 common categories. +- Comprises 2,806 images with 188,282 instances. +- Split ratios: 1/2 for training, 1/6 for validation, and 1/3 for testing. + +### DOTA-v1.5 + +- Incorporates the same images as DOTA-v1.0. +- Very small instances (less than 10 pixels) are also annotated. +- Addition of a new category: "container crane". +- A total of 403,318 instances. +- Released for the DOAI Challenge 2019 on Object Detection in Aerial Images. + +### DOTA-v2.0 + +- Collections from Google Earth, GF-2 Satellite, and other aerial images. +- Contains 18 common categories. +- Comprises 11,268 images with a whopping 1,793,658 instances. +- New categories introduced: "airport" and "helipad". +- Image splits: + - Training: 1,830 images with 268,627 instances. + - Validation: 593 images with 81,048 instances. + - Test-dev: 2,792 images with 353,346 instances. + - Test-challenge: 6,053 images with 1,090,637 instances. + +## Dataset Structure + +DOTA v2 exhibits a structured layout tailored for OBB object detection challenges: + +- **Images**: A vast collection of high-resolution aerial images capturing diverse terrains and structures. +- **Oriented Bounding Boxes**: Annotations in the form of rotated rectangles encapsulating objects irrespective of their orientation, ideal for capturing objects like airplanes, ships, and buildings. + +## Applications + +DOTA v2 serves as a benchmark for training and evaluating models specifically tailored for aerial image analysis. With the inclusion of OBB annotations, it provides a unique challenge, enabling the development of specialized object detection models that cater to aerial imagery's nuances. + +## Dataset YAML + +Typically, datasets incorporate a YAML (Yet Another Markup Language) file detailing the dataset's configuration. For DOTA v2, a hypothetical `DOTAv2.yaml` could be used. For accurate paths and configurations, it's vital to consult the dataset's official repository or documentation. + +!!! example "DOTAv2.yaml" + + ```yaml + --8<-- "ultralytics/cfg/datasets/DOTAv2.yaml" + ``` + +## Usage + +To train a model on the DOTA v2 dataset, you can utilize the following code snippets. Always refer to your model's documentation for a thorough list of available arguments. + +!!! warning + + Please note that all images and associated annotations in the DOTAv2 dataset can be used for academic purposes, but commercial use is prohibited. Your understanding and respect for the dataset creators' wishes are greatly appreciated! + +!!! example "Train Example" + + === "Python" + + ```python + from ultralytics import YOLO + + # Create a new YOLOv8n-OBB model from scratch + model = YOLO('yolov8n-obb.yaml') + + # Train the model on the DOTAv2 dataset + results = model.train(data='DOTAv2.yaml', epochs=100, imgsz=640) + ``` + + === "CLI" + + ```bash + # Train a new YOLOv8n-OBB model on the DOTAv2 dataset + yolo detect train data=DOTAv2.yaml model=yolov8n.pt epochs=100 imgsz=640 + ``` + +## Sample Data and Annotations + +Having a glance at the dataset illustrates its depth: + +![Dataset sample image](https://captain-whu.github.io/DOTA/images/instances-DOTA.jpg) + +- **DOTA v2**: This snapshot underlines the complexity of aerial scenes and the significance of Oriented Bounding Box annotations, capturing objects in their natural orientation. + +The dataset's richness offers invaluable insights into object detection challenges exclusive to aerial imagery. + +## Citations and Acknowledgments + +For those leveraging DOTA v2 in their endeavors, it's pertinent to cite the relevant research papers: + +!!! note "" + + === "BibTeX" + + ```bibtex + @article{9560031, + author={Ding, Jian and Xue, Nan and Xia, Gui-Song and Bai, Xiang and Yang, Wen and Yang, Michael and Belongie, Serge and Luo, Jiebo and Datcu, Mihai and Pelillo, Marcello and Zhang, Liangpei}, + journal={IEEE Transactions on Pattern Analysis and Machine Intelligence}, + title={Object Detection in Aerial Images: A Large-Scale Benchmark and Challenges}, + year={2021}, + volume={}, + number={}, + pages={1-1}, + doi={10.1109/TPAMI.2021.3117983} + } + ``` + +A special note of gratitude to the team behind DOTA v2 for their commendable effort in curating this dataset. For an exhaustive understanding of the dataset and its nuances, please visit the [official DOTA v2 website](https://captain-whu.github.io/DOTA/index.html). \ No newline at end of file diff --git a/docs/datasets/obb/index.md b/docs/datasets/obb/index.md new file mode 100644 index 0000000000..1e1f544011 --- /dev/null +++ b/docs/datasets/obb/index.md @@ -0,0 +1,80 @@ +--- +comments: true +description: Dive deep into various oriented bounding box (OBB) dataset formats compatible with the Ultralytics YOLO model. Grasp the nuances of using and converting datasets to this format. +keywords: Ultralytics, YOLO, oriented bounding boxes, OBB, dataset formats, label formats, DOTA v2, data conversion +--- + +# Oriented Bounding Box Datasets Overview + +Training a precise object detection model with oriented bounding boxes (OBB) requires a thorough dataset. This guide elucidates the various OBB dataset formats compatible with the Ultralytics YOLO model, offering insights into their structure, application, and methods for format conversions. + +## Supported OBB Dataset Formats + +### YOLO OBB Format + +The YOLO OBB format designates bounding boxes by their four corner points with coordinates normalized between 0 and 1. It follows this format: + +```bash +class_index, x1, y1, x2, y2, x3, y3, x4, y4 +``` + +Internally, YOLO processes losses and outputs in the `xywhr` format, which represents the bounding box's center point (xy), width, height, and rotation. + +

+ +An example of a `*.txt` label file for the above image, which contains an object of class `0` in OBB format, could look like: + +```bash +0 0.780811 0.743961 0.782371 0.74686 0.777691 0.752174 0.776131 0.749758 +``` + +## Usage + +To train a model using these OBB formats: + +!!! example "" + + === "Python" + + ```python + from ultralytics import YOLO + + # Create a new YOLOv8n-OBB model from scratch + model = YOLO('yolov8n-obb.yaml') + + # Train the model on the DOTAv2 dataset + results = model.train(data='DOTAv2.yaml', epochs=100, imgsz=640) + ``` + + === "CLI" + + ```bash + # Train a new YOLOv8n-OBB model on the DOTAv2 dataset + yolo detect train data=DOTAv2.yaml model=yolov8n.pt epochs=100 imgsz=640 + ``` + +## Supported Datasets + +Currently, the following datasets with Oriented Bounding Boxes are supported: + +- [**DOTA v2**](./dota-v2.md): DOTA (A Large-scale Dataset for Object Detection in Aerial Images) version 2, emphasizes detection from aerial perspectives and contains oriented bounding boxes with 1.7 million instances and 11,268 images. + +### Incorporating your own OBB dataset + +For those looking to introduce their own datasets with oriented bounding boxes, ensure compatibility with the "YOLO OBB format" mentioned above. Convert your annotations to this required format and detail the paths, classes, and class names in a corresponding YAML configuration file. + +## Convert Label Formats + +### DOTA Dataset Format to YOLO OBB Format + +Transitioning labels from the DOTA dataset format to the YOLO OBB format can be achieved with this script: + +```python +from ultralytics.data.converter import convert_dota_to_yolo_obb + +convert_dota_to_yolo_obb('path/to/DOTA') +``` + +This conversion mechanism is instrumental for datasets in the DOTA format, ensuring alignment with the Ultralytics YOLO OBB format. + +It's imperative to validate the compatibility of the dataset with your model and adhere to the necessary format conventions. Properly structured datasets are pivotal for training efficient object detection models with oriented bounding boxes. diff --git a/docs/datasets/pose/coco.md b/docs/datasets/pose/coco.md index 715cad8194..899a667c0a 100644 --- a/docs/datasets/pose/coco.md +++ b/docs/datasets/pose/coco.md @@ -53,7 +53,7 @@ To train a YOLOv8n-pose model on the COCO-Pose dataset for 100 epochs with an im model = YOLO('yolov8n-pose.pt') # load a pretrained model (recommended for training) # Train the model - model.train(data='coco-pose.yaml', epochs=100, imgsz=640) + results = model.train(data='coco-pose.yaml', epochs=100, imgsz=640) ``` === "CLI" @@ -77,15 +77,19 @@ The example showcases the variety and complexity of the images in the COCO-Pose If you use the COCO-Pose dataset in your research or development work, please cite the following paper: -```bibtex -@misc{lin2015microsoft, - title={Microsoft COCO: Common Objects in Context}, - author={Tsung-Yi Lin and Michael Maire and Serge Belongie and Lubomir Bourdev and Ross Girshick and James Hays and Pietro Perona and Deva Ramanan and C. Lawrence Zitnick and Piotr Dollár}, - year={2015}, - eprint={1405.0312}, - archivePrefix={arXiv}, - primaryClass={cs.CV} -} -``` +!!! note "" + + === "BibTeX" + + ```bibtex + @misc{lin2015microsoft, + title={Microsoft COCO: Common Objects in Context}, + author={Tsung-Yi Lin and Michael Maire and Serge Belongie and Lubomir Bourdev and Ross Girshick and James Hays and Pietro Perona and Deva Ramanan and C. Lawrence Zitnick and Piotr Dollár}, + year={2015}, + eprint={1405.0312}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` We would like to acknowledge the COCO Consortium for creating and maintaining this valuable resource for the computer vision community. For more information about the COCO-Pose dataset and its creators, visit the [COCO dataset website](https://cocodataset.org/#home). diff --git a/docs/datasets/pose/coco8-pose.md b/docs/datasets/pose/coco8-pose.md index ed6cfb67df..8125e59e11 100644 --- a/docs/datasets/pose/coco8-pose.md +++ b/docs/datasets/pose/coco8-pose.md @@ -42,7 +42,7 @@ To train a YOLOv8n-pose model on the COCO8-Pose dataset for 100 epochs with an i model = YOLO('yolov8n-pose.pt') # load a pretrained model (recommended for training) # Train the model - model.train(data='coco8-pose.yaml', epochs=100, imgsz=640) + results = model.train(data='coco8-pose.yaml', epochs=100, imgsz=640) ``` === "CLI" @@ -66,15 +66,19 @@ The example showcases the variety and complexity of the images in the COCO8-Pose If you use the COCO dataset in your research or development work, please cite the following paper: -```bibtex -@misc{lin2015microsoft, - title={Microsoft COCO: Common Objects in Context}, - author={Tsung-Yi Lin and Michael Maire and Serge Belongie and Lubomir Bourdev and Ross Girshick and James Hays and Pietro Perona and Deva Ramanan and C. Lawrence Zitnick and Piotr Dollár}, - year={2015}, - eprint={1405.0312}, - archivePrefix={arXiv}, - primaryClass={cs.CV} -} -``` +!!! note "" + + === "BibTeX" + + ```bibtex + @misc{lin2015microsoft, + title={Microsoft COCO: Common Objects in Context}, + author={Tsung-Yi Lin and Michael Maire and Serge Belongie and Lubomir Bourdev and Ross Girshick and James Hays and Pietro Perona and Deva Ramanan and C. Lawrence Zitnick and Piotr Dollár}, + year={2015}, + eprint={1405.0312}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` We would like to acknowledge the COCO Consortium for creating and maintaining this valuable resource for the computer vision community. For more information about the COCO dataset and its creators, visit the [COCO dataset website](https://cocodataset.org/#home). diff --git a/docs/datasets/pose/index.md b/docs/datasets/pose/index.md index 18cb134089..56a6c35e0e 100644 --- a/docs/datasets/pose/index.md +++ b/docs/datasets/pose/index.md @@ -78,7 +78,7 @@ For example if we assume five keypoints of facial landmark: [left eye, right eye model = YOLO('yolov8n-pose.pt') # load a pretrained model (recommended for training) # Train the model - model.train(data='coco128-pose.yaml', epochs=100, imgsz=640) + results = model.train(data='coco128-pose.yaml', epochs=100, imgsz=640) ``` === "CLI" diff --git a/docs/datasets/segment/coco.md b/docs/datasets/segment/coco.md index 1f144c2324..c1810fd026 100644 --- a/docs/datasets/segment/coco.md +++ b/docs/datasets/segment/coco.md @@ -52,7 +52,7 @@ To train a YOLOv8n-seg model on the COCO-Seg dataset for 100 epochs with an imag model = YOLO('yolov8n-seg.pt') # load a pretrained model (recommended for training) # Train the model - model.train(data='coco-seg.yaml', epochs=100, imgsz=640) + results = model.train(data='coco-seg.yaml', epochs=100, imgsz=640) ``` === "CLI" @@ -76,15 +76,19 @@ The example showcases the variety and complexity of the images in the COCO-Seg d If you use the COCO-Seg dataset in your research or development work, please cite the original COCO paper and acknowledge the extension to COCO-Seg: -```bibtex -@misc{lin2015microsoft, - title={Microsoft COCO: Common Objects in Context}, - author={Tsung-Yi Lin and Michael Maire and Serge Belongie and Lubomir Bourdev and Ross Girshick and James Hays and Pietro Perona and Deva Ramanan and C. Lawrence Zitnick and Piotr Dollár}, - year={2015}, - eprint={1405.0312}, - archivePrefix={arXiv}, - primaryClass={cs.CV} -} -``` +!!! note "" + + === "BibTeX" + + ```bibtex + @misc{lin2015microsoft, + title={Microsoft COCO: Common Objects in Context}, + author={Tsung-Yi Lin and Michael Maire and Serge Belongie and Lubomir Bourdev and Ross Girshick and James Hays and Pietro Perona and Deva Ramanan and C. Lawrence Zitnick and Piotr Dollár}, + year={2015}, + eprint={1405.0312}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` We extend our thanks to the COCO Consortium for creating and maintaining this invaluable resource for the computer vision community. For more information about the COCO dataset and its creators, visit the [COCO dataset website](https://cocodataset.org/#home). diff --git a/docs/datasets/segment/coco8-seg.md b/docs/datasets/segment/coco8-seg.md index d27e22d955..e4305cdf75 100644 --- a/docs/datasets/segment/coco8-seg.md +++ b/docs/datasets/segment/coco8-seg.md @@ -42,7 +42,7 @@ To train a YOLOv8n-seg model on the COCO8-Seg dataset for 100 epochs with an ima model = YOLO('yolov8n-seg.pt') # load a pretrained model (recommended for training) # Train the model - model.train(data='coco8-seg.yaml', epochs=100, imgsz=640) + results = model.train(data='coco8-seg.yaml', epochs=100, imgsz=640) ``` === "CLI" @@ -66,15 +66,19 @@ The example showcases the variety and complexity of the images in the COCO8-Seg If you use the COCO dataset in your research or development work, please cite the following paper: -```bibtex -@misc{lin2015microsoft, - title={Microsoft COCO: Common Objects in Context}, - author={Tsung-Yi Lin and Michael Maire and Serge Belongie and Lubomir Bourdev and Ross Girshick and James Hays and Pietro Perona and Deva Ramanan and C. Lawrence Zitnick and Piotr Dollár}, - year={2015}, - eprint={1405.0312}, - archivePrefix={arXiv}, - primaryClass={cs.CV} -} -``` +!!! note "" + + === "BibTeX" + + ```bibtex + @misc{lin2015microsoft, + title={Microsoft COCO: Common Objects in Context}, + author={Tsung-Yi Lin and Michael Maire and Serge Belongie and Lubomir Bourdev and Ross Girshick and James Hays and Pietro Perona and Deva Ramanan and C. Lawrence Zitnick and Piotr Dollár}, + year={2015}, + eprint={1405.0312}, + archivePrefix={arXiv}, + primaryClass={cs.CV} + } + ``` We would like to acknowledge the COCO Consortium for creating and maintaining this valuable resource for the computer vision community. For more information about the COCO dataset and its creators, visit the [COCO dataset website](https://cocodataset.org/#home). diff --git a/docs/datasets/segment/index.md b/docs/datasets/segment/index.md index b5e279c0b1..915feca49a 100644 --- a/docs/datasets/segment/index.md +++ b/docs/datasets/segment/index.md @@ -79,7 +79,7 @@ The `train` and `val` fields specify the paths to the directories containing the model = YOLO('yolov8n-seg.pt') # load a pretrained model (recommended for training) # Train the model - model.train(data='coco128-seg.yaml', epochs=100, imgsz=640) + results = model.train(data='coco128-seg.yaml', epochs=100, imgsz=640) ``` === "CLI" diff --git a/docs/guides/kfold-cross-validation.md b/docs/guides/kfold-cross-validation.md index 2accea6db1..41854d445a 100644 --- a/docs/guides/kfold-cross-validation.md +++ b/docs/guides/kfold-cross-validation.md @@ -246,7 +246,7 @@ fold_lbl_distrb.to_csv(save_path / "kfold_label_distribution.csv") results = {} for k in range(ksplit): dataset_yaml = ds_yamls[k] - model.train(data=dataset_yaml, *args, **kwargs) # Include any training arguments + results = model.train(data=dataset_yaml, *args, **kwargs) # Include any training arguments results[k] = model.metrics # save output metrics for further analysis ``` diff --git a/docs/models/yolov3.md b/docs/models/yolov3.md index efe216041d..70676abebc 100644 --- a/docs/models/yolov3.md +++ b/docs/models/yolov3.md @@ -55,7 +55,7 @@ You can use YOLOv3 for object detection tasks using the Ultralytics repository. This example provides simple inference code for YOLOv3. For more options including handling inference results see [Predict](../modes/predict.md) mode. For using YOLOv3 with additional modes see [Train](../modes/train.md), [Val](../modes/val.md) and [Export](../modes/export.md). - === "Python" + === "Python" PyTorch pretrained `*.pt` models as well as configuration `*.yaml` files can be passed to the `YOLO()` class to create a model instance in python: diff --git a/docs/modes/train.md b/docs/modes/train.md index f9d844a564..b806b077e8 100644 --- a/docs/modes/train.md +++ b/docs/modes/train.md @@ -31,7 +31,7 @@ Train YOLOv8n on the COCO128 dataset for 100 epochs at image size 640. See Argum model = YOLO('yolov8n.yaml').load('yolov8n.pt') # build from YAML and transfer weights # Train the model - model.train(data='coco128.yaml', epochs=100, imgsz=640) + results = model.train(data='coco128.yaml', epochs=100, imgsz=640) ``` === "CLI" @@ -61,7 +61,7 @@ The training device can be specified using the `device` argument. If no argument model = YOLO('yolov8n.pt') # load a pretrained model (recommended for training) # Train the model with 2 GPUs - model.train(data='coco128.yaml', epochs=100, imgsz=640, device=[0, 1]) + results = model.train(data='coco128.yaml', epochs=100, imgsz=640, device=[0, 1]) ``` === "CLI" @@ -87,7 +87,7 @@ To enable training on Apple M1 and M2 chips, you should specify 'mps' as your de model = YOLO('yolov8n.pt') # load a pretrained model (recommended for training) # Train the model with 2 GPUs - model.train(data='coco128.yaml', epochs=100, imgsz=640, device='mps') + results = model.train(data='coco128.yaml', epochs=100, imgsz=640, device='mps') ``` === "CLI" @@ -119,7 +119,7 @@ Below is an example of how to resume an interrupted training using Python and vi model = YOLO('path/to/last.pt') # load a partially trained model # Resume training - model.train(resume=True) + results = model.train(resume=True) ``` === "CLI" diff --git a/docs/reference/data/converter.md b/docs/reference/data/converter.md index ca199e1f16..6b4c3f7738 100644 --- a/docs/reference/data/converter.md +++ b/docs/reference/data/converter.md @@ -18,17 +18,17 @@ keywords: Ultralytics, Data Converter, coco91_to_coco80_class, merge_multi_segme

--- -## ::: ultralytics.data.converter.rle2polygon +## ::: ultralytics.data.converter.convert_dota_to_yolo_obb

--- -## ::: ultralytics.data.converter.min_index +## ::: ultralytics.data.converter.rle2polygon

--- -## ::: ultralytics.data.converter.merge_multi_segment +## ::: ultralytics.data.converter.min_index

--- -## ::: ultralytics.data.converter.delete_dsstore +## ::: ultralytics.data.converter.merge_multi_segment

diff --git a/docs/reference/utils/ops.md b/docs/reference/utils/ops.md index 1dde402aa1..d45fd90606 100644 --- a/docs/reference/utils/ops.md +++ b/docs/reference/utils/ops.md @@ -77,6 +77,14 @@ keywords: Ultralytics YOLO, Utility Operations, segment2box, make_divisible, cli ## ::: ultralytics.utils.ops.ltwh2xywh

+--- +## ::: ultralytics.utils.ops.xyxyxyxy2xywhr +

+ +--- +## ::: ultralytics.utils.ops.xywhr2xyxyxyxy +

+ --- ## ::: ultralytics.utils.ops.ltwh2xyxy

diff --git a/docs/tasks/classify.md b/docs/tasks/classify.md index c179df69bb..1abd86c320 100644 --- a/docs/tasks/classify.md +++ b/docs/tasks/classify.md @@ -59,7 +59,7 @@ see the [Configuration](../usage/cfg.md) page. model = YOLO('yolov8n-cls.yaml').load('yolov8n-cls.pt') # build from YAML and transfer weights # Train the model - model.train(data='mnist160', epochs=100, imgsz=64) + results = model.train(data='mnist160', epochs=100, imgsz=64) ``` === "CLI" diff --git a/docs/tasks/detect.md b/docs/tasks/detect.md index 0b730c4d6a..29609e6c62 100644 --- a/docs/tasks/detect.md +++ b/docs/tasks/detect.md @@ -51,7 +51,7 @@ Train YOLOv8n on the COCO128 dataset for 100 epochs at image size 640. For a ful model = YOLO('yolov8n.yaml').load('yolov8n.pt') # build from YAML and transfer weights # Train the model - model.train(data='coco128.yaml', epochs=100, imgsz=640) + results = model.train(data='coco128.yaml', epochs=100, imgsz=640) ``` === "CLI" diff --git a/docs/tasks/pose.md b/docs/tasks/pose.md index 13643d1662..f6bde385d1 100644 --- a/docs/tasks/pose.md +++ b/docs/tasks/pose.md @@ -62,7 +62,7 @@ Train a YOLOv8-pose model on the COCO128-pose dataset. model = YOLO('yolov8n-pose.yaml').load('yolov8n-pose.pt') # build from YAML and transfer weights # Train the model - model.train(data='coco8-pose.yaml', epochs=100, imgsz=640) + results = model.train(data='coco8-pose.yaml', epochs=100, imgsz=640) ``` === "CLI" diff --git a/docs/tasks/segment.md b/docs/tasks/segment.md index c8daabd291..4abea007aa 100644 --- a/docs/tasks/segment.md +++ b/docs/tasks/segment.md @@ -59,7 +59,7 @@ arguments see the [Configuration](../usage/cfg.md) page. model = YOLO('yolov8n-seg.yaml').load('yolov8n.pt') # build from YAML and transfer weights # Train the model - model.train(data='coco128-seg.yaml', epochs=100, imgsz=640) + results = model.train(data='coco128-seg.yaml', epochs=100, imgsz=640) ``` === "CLI" diff --git a/docs/usage/python.md b/docs/usage/python.md index 610d927749..bc6cbd29a8 100644 --- a/docs/usage/python.md +++ b/docs/usage/python.md @@ -52,7 +52,7 @@ accurately predict the classes and locations of objects in an image. from ultralytics import YOLO model = YOLO('yolov8n.pt') # pass any model type - model.train(epochs=5) + results = model.train(epochs=5) ``` === "From scratch" @@ -60,13 +60,13 @@ accurately predict the classes and locations of objects in an image. from ultralytics import YOLO model = YOLO('yolov8n.yaml') - model.train(data='coco128.yaml', epochs=5) + results = model.train(data='coco128.yaml', epochs=5) ``` === "Resume" ```python model = YOLO("last.pt") - model.train(resume=True) + results = model.train(resume=True) ``` [Train Examples](../modes/train.md){ .md-button .md-button--primary} diff --git a/mkdocs.yml b/mkdocs.yml index e01b111476..2beadc6c1d 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -207,6 +207,9 @@ nav: - Imagenette: datasets/classify/imagenette.md - Imagewoof: datasets/classify/imagewoof.md - MNIST: datasets/classify/mnist.md + - Oriented Bounding Boxes (OBB): + - datasets/obb/index.md + - DOTAv2: datasets/obb/dota-v2.md - Multi-Object Tracking: - datasets/track/index.md - Guides: diff --git a/ultralytics/__init__.py b/ultralytics/__init__.py index 8111704d7f..fbfcfbb479 100644 --- a/ultralytics/__init__.py +++ b/ultralytics/__init__.py @@ -1,6 +1,6 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license -__version__ = '8.0.150' +__version__ = '8.0.151' from ultralytics.hub import start from ultralytics.models import RTDETR, SAM, YOLO diff --git a/ultralytics/cfg/datasets/DOTAv2.yaml b/ultralytics/cfg/datasets/DOTAv2.yaml new file mode 100644 index 0000000000..c663bdd5c4 --- /dev/null +++ b/ultralytics/cfg/datasets/DOTAv2.yaml @@ -0,0 +1,37 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +# DOTA 2.0 dataset https://captain-whu.github.io/DOTA/index.html for object detection in aerial images by Wuhan University +# Example usage: yolo train model=yolov8n-obb.pt data=DOTAv2.yaml +# parent +# ├── ultralytics +# └── datasets +# └── dota2 ← downloads here (2GB) + +# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] +path: ../datasets/DOTAv2 # dataset root dir +train: images/train # train images (relative to 'path') 1411 images +val: images/val # val images (relative to 'path') 458 images +test: images/test # test images (optional) 937 images + +# Classes for DOTA 2.0 +names: + 0: plane + 1: ship + 2: storage tank + 3: baseball diamond + 4: tennis court + 5: basketball court + 6: ground track field + 7: harbor + 8: bridge + 9: large vehicle + 10: small vehicle + 11: helicopter + 12: roundabout + 13: soccer ball field + 14: swimming pool + 15: container crane + 16: airport + 17: helipad + +# Download script/URL (optional) +download: https://github.com/ultralytics/yolov5/releases/download/v1.0/DOTAv2.zip diff --git a/ultralytics/data/converter.py b/ultralytics/data/converter.py index 38b414730e..32568858b8 100644 --- a/ultralytics/data/converter.py +++ b/ultralytics/data/converter.py @@ -117,6 +117,97 @@ def convert_coco(labels_dir='../coco/annotations/', use_segments=False, use_keyp file.write(('%g ' * len(line)).rstrip() % line + '\n') +def convert_dota_to_yolo_obb(dota_root_path: str): + """ + Converts DOTA dataset annotations to YOLO OBB (Oriented Bounding Box) format. + + The function processes images in the 'train' and 'val' folders of the DOTA dataset. For each image, it reads the + associated label from the original labels directory and writes new labels in YOLO OBB format to a new directory. + + Args: + dota_root_path (str): The root directory path of the DOTA dataset. + + Example: + ```python + from ultralytics.data.converter import convert_dota_to_yolo_obb + + convert_dota_to_yolo_obb('path/to/DOTA') + ``` + + Notes: + The directory structure assumed for the DOTA dataset: + - DOTA + - images + - train + - val + - labels + - train_original + - val_original + + After the function execution, the new labels will be saved in: + - DOTA + - labels + - train + - val + """ + dota_root_path = Path(dota_root_path) + + # Class names to indices mapping + class_mapping = { + 'plane': 0, + 'ship': 1, + 'storage-tank': 2, + 'baseball-diamond': 3, + 'tennis-court': 4, + 'basketball-court': 5, + 'ground-track-field': 6, + 'harbor': 7, + 'bridge': 8, + 'large-vehicle': 9, + 'small-vehicle': 10, + 'helicopter': 11, + 'roundabout': 12, + 'soccer ball-field': 13, + 'swimming-pool': 14, + 'container-crane': 15, + 'airport': 16, + 'helipad': 17} + + def convert_label(image_name, image_width, image_height, orig_label_dir, save_dir): + orig_label_path = orig_label_dir / f'{image_name}.txt' + save_path = save_dir / f'{image_name}.txt' + + with orig_label_path.open('r') as f, save_path.open('w') as g: + lines = f.readlines() + for line in lines: + parts = line.strip().split() + if len(parts) < 9: + continue + class_name = parts[8] + class_idx = class_mapping[class_name] + coords = [float(p) for p in parts[:8]] + normalized_coords = [ + coords[i] / image_width if i % 2 == 0 else coords[i] / image_height for i in range(8)] + formatted_coords = ['{:.6g}'.format(coord) for coord in normalized_coords] + g.write(f"{class_idx} {' '.join(formatted_coords)}\n") + + for phase in ['train', 'val']: + image_dir = dota_root_path / 'images' / phase + orig_label_dir = dota_root_path / 'labels' / f'{phase}_original' + save_dir = dota_root_path / 'labels' / phase + + save_dir.mkdir(parents=True, exist_ok=True) + + image_paths = list(image_dir.iterdir()) + for image_path in tqdm(image_paths, desc=f'Processing {phase} images'): + if image_path.suffix != '.png': + continue + image_name_without_ext = image_path.stem + img = cv2.imread(str(image_path)) + h, w = img.shape[:2] + convert_label(image_name_without_ext, w, h, orig_label_dir, save_dir) + + def rle2polygon(segmentation): """ Convert Run-Length Encoding (RLE) mask to polygon coordinates. @@ -209,24 +300,3 @@ def merge_multi_segment(segments): nidx = abs(idx[1] - idx[0]) s.append(segments[i][nidx:]) return s - - -def delete_dsstore(path='../datasets'): - """Delete Apple .DS_Store files in the specified directory and its subdirectories.""" - from pathlib import Path - - files = list(Path(path).rglob('.DS_store')) - print(files) - for f in files: - f.unlink() - - -if __name__ == '__main__': - source = 'COCO' - - if source == 'COCO': - convert_coco( - '../datasets/coco/annotations', # directory with *.json - use_segments=False, - use_keypoints=True, - cls91to80=False) diff --git a/ultralytics/data/utils.py b/ultralytics/data/utils.py index 807ddad95b..a46f0b71f7 100644 --- a/ultralytics/data/utils.py +++ b/ultralytics/data/utils.py @@ -24,7 +24,7 @@ from ultralytics.utils.checks import check_file, check_font, is_ascii from ultralytics.utils.downloads import download, safe_download, unzip_file from ultralytics.utils.ops import segments2boxes -HELP_URL = 'See https://docs.ultralytics.com/datasets/detect for YOLO dataset format help.' +HELP_URL = 'See https://docs.ultralytics.com/datasets/detect for dataset formatting guidance.' IMG_FORMATS = 'bmp', 'dng', 'jpeg', 'jpg', 'mpo', 'png', 'tif', 'tiff', 'webp', 'pfm' # image suffixes VID_FORMATS = 'asf', 'avi', 'gif', 'm4v', 'mkv', 'mov', 'mp4', 'mpeg', 'mpg', 'ts', 'wmv', 'webm' # video suffixes PIN_MEMORY = str(os.getenv('PIN_MEMORY', True)).lower() == 'true' # global pin_memory for dataloaders @@ -289,9 +289,6 @@ def check_cls_dataset(dataset: str, split=''): - 'test' (Path): The directory path containing the test set of the dataset. - 'nc' (int): The number of classes in the dataset. - 'names' (dict): A dictionary of class names in the dataset. - - Raises: - FileNotFoundError: If the specified dataset is not found and cannot be downloaded. """ dataset = Path(dataset) @@ -329,13 +326,16 @@ class HUBDatasetStats(): task (str): Dataset task. Options are 'detect', 'segment', 'pose', 'classify'. Default is 'detect'. autodownload (bool): Attempt to download dataset if not found locally. Default is False. - Usage + Example: + ```python from ultralytics.data.utils import HUBDatasetStats - stats = HUBDatasetStats('/Users/glennjocher/Downloads/coco8.zip', task='detect') # detect dataset - stats = HUBDatasetStats('/Users/glennjocher/Downloads/coco8-seg.zip', task='segment') # segment dataset - stats = HUBDatasetStats('/Users/glennjocher/Downloads/coco8-pose.zip', task='pose') # pose dataset + + stats = HUBDatasetStats('path/to/coco8.zip', task='detect') # detect dataset + stats = HUBDatasetStats('path/to/coco8-seg.zip', task='segment') # segment dataset + stats = HUBDatasetStats('path/to/coco8-pose.zip', task='pose') # pose dataset stats.get_json(save=False) stats.process_images() + ``` """ def __init__(self, path='coco128.yaml', task='detect', autodownload=False): @@ -459,11 +459,14 @@ def compress_one_image(f, f_new=None, max_dim=1920, quality=50): max_dim (int, optional): The maximum dimension (width or height) of the output image. Default is 1920 pixels. quality (int, optional): The image compression quality as a percentage. Default is 50%. - Usage: + Example: + ```python from pathlib import Path from ultralytics.data.utils import compress_one_image - for f in Path('/Users/glennjocher/Downloads/dataset').rglob('*.jpg'): + + for f in Path('path/to/dataset').rglob('*.jpg'): compress_one_image(f) + ``` """ try: # use PIL im = Image.open(f) @@ -488,9 +491,12 @@ def delete_dsstore(path): Args: path (str, optional): The directory path where the ".DS_store" files should be deleted. - Usage: + Example: + ```python from ultralytics.data.utils import delete_dsstore - delete_dsstore('/Users/glennjocher/Downloads/dataset') + + delete_dsstore('path/to/dir') + ``` Note: ".DS_store" files are created by the Apple operating system and contain metadata about folders and files. They @@ -505,17 +511,18 @@ def delete_dsstore(path): def zip_directory(dir, use_zipfile_library=True): """ - Zips a directory and saves the archive to the specified output path. + Zips a directory and saves the archive to the specified output path. Equivalent to 'zip -r coco8.zip coco8/' Args: dir (str): The path to the directory to be zipped. use_zipfile_library (bool): Whether to use zipfile library or shutil for zipping. - Usage: + Example: + ```python from ultralytics.data.utils import zip_directory - zip_directory('/Users/glennjocher/Downloads/playground') - zip -r coco8-pose.zip coco8-pose + zip_directory('/path/to/dir') + ``` """ delete_dsstore(dir) if use_zipfile_library: @@ -538,9 +545,12 @@ def autosplit(path=DATASETS_DIR / 'coco128/images', weights=(0.9, 0.1, 0.0), ann weights (list | tuple, optional): Train, validation, and test split fractions. Defaults to (0.9, 0.1, 0.0). annotated_only (bool, optional): If True, only images with an associated txt file are used. Defaults to False. - Usage: - from utils.dataloaders import autosplit + Example: + ```python + from ultralytics.utils.dataloaders import autosplit + autosplit() + ``` """ path = Path(path) # images dir diff --git a/ultralytics/utils/ops.py b/ultralytics/utils/ops.py index 5f65cdf7f5..d729526223 100644 --- a/ultralytics/utils/ops.py +++ b/ultralytics/utils/ops.py @@ -357,14 +357,15 @@ def scale_image(masks, im0_shape, ratio_pad=None): def xyxy2xywh(x): """ - Convert bounding box coordinates from (x1, y1, x2, y2) format to (x, y, width, height) format. + Convert bounding box coordinates from (x1, y1, x2, y2) format to (x, y, width, height) format where (x1, y1) is the + top-left corner and (x2, y2) is the bottom-right corner. Args: x (np.ndarray | torch.Tensor): The input bounding box coordinates in (x1, y1, x2, y2) format. Returns: - y (np.ndarray | torch.Tensor): The bounding box coordinates in (x, y, width, height) format. + y (np.ndarray | torch.Tensor): The bounding box coordinates in (x, y, width, height) format. """ - y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x) y[..., 0] = (x[..., 0] + x[..., 2]) / 2 # x center y[..., 1] = (x[..., 1] + x[..., 3]) / 2 # y center y[..., 2] = x[..., 2] - x[..., 0] # width @@ -382,11 +383,13 @@ def xywh2xyxy(x): Returns: y (np.ndarray | torch.Tensor): The bounding box coordinates in (x1, y1, x2, y2) format. """ - y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) - y[..., 0] = x[..., 0] - x[..., 2] / 2 # top left x - y[..., 1] = x[..., 1] - x[..., 3] / 2 # top left y - y[..., 2] = x[..., 0] + x[..., 2] / 2 # bottom right x - y[..., 3] = x[..., 1] + x[..., 3] / 2 # bottom right y + y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x) + dw = x[..., 2] / 2 # half-width + dh = x[..., 3] / 2 # half-height + y[..., 0] = x[..., 0] - dw # top left x + y[..., 1] = x[..., 1] - dh # top left y + y[..., 2] = x[..., 0] + dw # bottom right x + y[..., 3] = x[..., 1] + dh # bottom right y return y @@ -404,7 +407,7 @@ def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0): y (np.ndarray | torch.Tensor): The coordinates of the bounding box in the format [x1, y1, x2, y2] where x1,y1 is the top-left corner, x2,y2 is the bottom-right corner of the bounding box. """ - y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x) y[..., 0] = w * (x[..., 0] - x[..., 2] / 2) + padw # top left x y[..., 1] = h * (x[..., 1] - x[..., 3] / 2) + padh # top left y y[..., 2] = w * (x[..., 0] + x[..., 2] / 2) + padw # bottom right x @@ -428,7 +431,7 @@ def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0): """ if clip: clip_boxes(x, (h - eps, w - eps)) # warning: inplace clip - y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x) y[..., 0] = ((x[..., 0] + x[..., 2]) / 2) / w # x center y[..., 1] = ((x[..., 1] + x[..., 3]) / 2) / h # y center y[..., 2] = (x[..., 2] - x[..., 0]) / w # width @@ -449,7 +452,7 @@ def xyn2xy(x, w=640, h=640, padw=0, padh=0): Returns: y (np.ndarray | torch.Tensor): The x and y coordinates of the top left corner of the bounding box """ - y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x) y[..., 0] = w * x[..., 0] + padw # top left x y[..., 1] = h * x[..., 1] + padh # top left y return y @@ -464,7 +467,7 @@ def xywh2ltwh(x): Returns: y (np.ndarray | torch.Tensor): The bounding box coordinates in the xyltwh format """ - y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x) y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y return y @@ -479,7 +482,7 @@ def xyxy2ltwh(x): Returns: y (np.ndarray | torch.Tensor): The bounding box coordinates in the xyltwh format. """ - y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x) y[:, 2] = x[:, 2] - x[:, 0] # width y[:, 3] = x[:, 3] - x[:, 1] # height return y @@ -492,12 +495,91 @@ def ltwh2xywh(x): Args: x (torch.Tensor): the input tensor """ - y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x) y[:, 0] = x[:, 0] + x[:, 2] / 2 # center x y[:, 1] = x[:, 1] + x[:, 3] / 2 # center y return y +def xyxyxyxy2xywhr(corners): + """ + Convert batched Oriented Bounding Boxes (OBB) from [xy1, xy2, xy3, xy4] to [xywh, rotation]. + + Args: + corners (numpy.ndarray | torch.Tensor): Input corners of shape (n, 8). + + Returns: + (numpy.ndarray | torch.Tensor): Converted data in [cx, cy, w, h, rotation] format of shape (n, 5). + """ + if isinstance(corners, torch.Tensor): + is_numpy = False + atan2 = torch.atan2 + sqrt = torch.sqrt + else: + is_numpy = True + atan2 = np.arctan2 + sqrt = np.sqrt + + x1, y1, x2, y2, x3, y3, x4, y4 = corners.T + cx = (x1 + x3) / 2 + cy = (y1 + y3) / 2 + dx21 = x2 - x1 + dy21 = y2 - y1 + + w = sqrt(dx21 ** 2 + dy21 ** 2) + h = sqrt((x2 - x3) ** 2 + (y2 - y3) ** 2) + + rotation = atan2(-dy21, dx21) + rotation *= 180.0 / math.pi # radians to degrees + + return np.vstack((cx, cy, w, h, rotation)).T if is_numpy else torch.stack((cx, cy, w, h, rotation), dim=1) + + +def xywhr2xyxyxyxy(center): + """ + Convert batched Oriented Bounding Boxes (OBB) from [xywh, rotation] to [xy1, xy2, xy3, xy4]. + + Args: + center (numpy.ndarray | torch.Tensor): Input data in [cx, cy, w, h, rotation] format of shape (n, 5). + + Returns: + (numpy.ndarray | torch.Tensor): Converted corner points of shape (n, 8). + """ + if isinstance(center, torch.Tensor): + is_numpy = False + cos = torch.cos + sin = torch.sin + else: + is_numpy = True + cos = np.cos + sin = np.sin + + cx, cy, w, h, rotation = center.T + rotation *= math.pi / 180.0 # degrees to radians + + dx = w / 2 + dy = h / 2 + + cos_rot = cos(rotation) + sin_rot = sin(rotation) + dx_cos_rot = dx * cos_rot + dx_sin_rot = dx * sin_rot + dy_cos_rot = dy * cos_rot + dy_sin_rot = dy * sin_rot + + x1 = cx - dx_cos_rot - dy_sin_rot + y1 = cy + dx_sin_rot - dy_cos_rot + x2 = cx + dx_cos_rot - dy_sin_rot + y2 = cy - dx_sin_rot - dy_cos_rot + x3 = cx + dx_cos_rot + dy_sin_rot + y3 = cy - dx_sin_rot + dy_cos_rot + x4 = cx - dx_cos_rot + dy_sin_rot + y4 = cy + dx_sin_rot + dy_cos_rot + + return np.vstack((x1, y1, x2, y2, x3, y3, x4, y4)).T if is_numpy else torch.stack( + (x1, y1, x2, y2, x3, y3, x4, y4), dim=1) + + def ltwh2xyxy(x): """ It converts the bounding box from [x1, y1, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right @@ -508,7 +590,7 @@ def ltwh2xyxy(x): Returns: y (np.ndarray | torch.Tensor): the xyxy coordinates of the bounding boxes. """ - y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x) y[:, 2] = x[:, 2] + x[:, 0] # width y[:, 3] = x[:, 3] + x[:, 1] # height return y