|
|
|
@ -1,6 +1,5 @@ |
|
|
|
|
# Ultralytics YOLO 🚀, AGPL-3.0 license |
|
|
|
|
|
|
|
|
|
import contextlib |
|
|
|
|
import hashlib |
|
|
|
|
import json |
|
|
|
|
import os |
|
|
|
@ -49,13 +48,14 @@ def get_hash(paths): |
|
|
|
|
return h.hexdigest() # return hash |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def exif_size(img): |
|
|
|
|
def exif_size(img: Image.Image): |
|
|
|
|
"""Returns exif-corrected PIL size.""" |
|
|
|
|
s = img.size # (width, height) |
|
|
|
|
with contextlib.suppress(Exception): |
|
|
|
|
rotation = dict(img._getexif().items())[orientation] |
|
|
|
|
exif = img.getexif() |
|
|
|
|
if exif: |
|
|
|
|
rotation = exif.get(274, None) # the key for the orientation tag in the EXIF data is 274 (in decimal) |
|
|
|
|
if rotation in [6, 8]: # rotation 270 or 90 |
|
|
|
|
s = (s[1], s[0]) |
|
|
|
|
s = s[1], s[0] |
|
|
|
|
return s |
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -190,7 +190,21 @@ def polygons2masks_overlap(imgsz, segments, downsample_ratio=1): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def check_det_dataset(dataset, autodownload=True): |
|
|
|
|
"""Download, check and/or unzip dataset if not found locally.""" |
|
|
|
|
""" |
|
|
|
|
Download, verify, and/or unzip a dataset if not found locally. |
|
|
|
|
|
|
|
|
|
This function checks the availability of a specified dataset, and if not found, it has the option to download and |
|
|
|
|
unzip the dataset. It then reads and parses the accompanying YAML data, ensuring key requirements are met and also |
|
|
|
|
resolves paths related to the dataset. |
|
|
|
|
|
|
|
|
|
Args: |
|
|
|
|
dataset (str): Path to the dataset or dataset descriptor (like a YAML file). |
|
|
|
|
autodownload (bool, optional): Whether to automatically download the dataset if not found. Defaults to True. |
|
|
|
|
|
|
|
|
|
Returns: |
|
|
|
|
(dict): Parsed dataset information and paths. |
|
|
|
|
""" |
|
|
|
|
|
|
|
|
|
data = check_file(dataset) |
|
|
|
|
|
|
|
|
|
# Download (optional) |
|
|
|
@ -327,7 +341,7 @@ def check_cls_dataset(dataset: str, split=''): |
|
|
|
|
return {'train': train_set, 'val': val_set or test_set, 'test': test_set or val_set, 'nc': nc, 'names': names} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class HUBDatasetStats(): |
|
|
|
|
class HUBDatasetStats: |
|
|
|
|
""" |
|
|
|
|
A class for generating HUB dataset JSON and `-hub` dataset directory. |
|
|
|
|
|
|
|
|
@ -371,11 +385,10 @@ class HUBDatasetStats(): |
|
|
|
|
def _find_yaml(dir): |
|
|
|
|
"""Return data.yaml file.""" |
|
|
|
|
files = list(dir.glob('*.yaml')) or list(dir.rglob('*.yaml')) # try root level first and then recursive |
|
|
|
|
assert files, f'No *.yaml file found in {dir}' |
|
|
|
|
assert files, f'No *.yaml file found in {dir.resolve()}' |
|
|
|
|
if len(files) > 1: |
|
|
|
|
files = [f for f in files if f.stem == dir.stem] # prefer *.yaml files that match dir name |
|
|
|
|
assert files, f'Multiple *.yaml files found in {dir}, only 1 *.yaml file allowed' |
|
|
|
|
assert len(files) == 1, f'Multiple *.yaml files found: {files}, only 1 *.yaml file allowed in {dir}' |
|
|
|
|
assert len(files) == 1, f"Expected 1 *.yaml file in '{dir.resolve()}', but found {len(files)}.\n{files}" |
|
|
|
|
return files[0] |
|
|
|
|
|
|
|
|
|
def _unzip(self, path): |
|
|
|
@ -478,6 +491,7 @@ def compress_one_image(f, f_new=None, max_dim=1920, quality=50): |
|
|
|
|
compress_one_image(f) |
|
|
|
|
``` |
|
|
|
|
""" |
|
|
|
|
|
|
|
|
|
try: # use PIL |
|
|
|
|
im = Image.open(f) |
|
|
|
|
r = max_dim / max(im.height, im.width) # ratio |
|
|
|
@ -546,18 +560,18 @@ def zip_directory(dir, use_zipfile_library=True): |
|
|
|
|
shutil.make_archive(dir, 'zip', dir) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def autosplit(path=DATASETS_DIR / 'coco128/images', weights=(0.9, 0.1, 0.0), annotated_only=False): |
|
|
|
|
def autosplit(path=DATASETS_DIR / 'coco8/images', weights=(0.9, 0.1, 0.0), annotated_only=False): |
|
|
|
|
""" |
|
|
|
|
Autosplit a dataset into train/val/test splits and save the resulting splits into autosplit_*.txt files. |
|
|
|
|
|
|
|
|
|
Args: |
|
|
|
|
path (Path, optional): Path to images directory. Defaults to DATASETS_DIR / 'coco128/images'. |
|
|
|
|
path (Path, optional): Path to images directory. Defaults to DATASETS_DIR / 'coco8/images'. |
|
|
|
|
weights (list | tuple, optional): Train, validation, and test split fractions. Defaults to (0.9, 0.1, 0.0). |
|
|
|
|
annotated_only (bool, optional): If True, only images with an associated txt file are used. Defaults to False. |
|
|
|
|
|
|
|
|
|
Example: |
|
|
|
|
```python |
|
|
|
|
from ultralytics.utils.dataloaders import autosplit |
|
|
|
|
from ultralytics.data.utils import autosplit |
|
|
|
|
|
|
|
|
|
autosplit() |
|
|
|
|
``` |
|
|
|
|