Fix PIL Image `exif_size()` function (#4355)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2 years ago · 834f94f899
parent c940d29d4f
commit 834f94f899
6 changed files with 55 additions and 23 deletions
--- a/setup.cfg
+++ b/setup.cfg
@ -15,9 +15,6 @@ addopts =
    --doctest-modules
    --durations=25
    --color=yes
-    --cov=ultralytics/
-    --cov-report=xml
-    --no-cov-on-fail

 [coverage:run]
 source = ultralytics/
--- a/tests/test_python.py
+++ b/tests/test_python.py
@ -1,5 +1,4 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
-
 from pathlib import Path

 import cv2
@ -221,3 +220,24 @@ def test_results():
            print(r.path)
            for k in r.keys:
                print(getattr(r, k))
+
+
+def test_data_utils():
+    # Test functions in ultralytics/data/utils.py
+    from ultralytics.data.utils import autosplit, zip_directory
+
+    # from ultralytics.utils.files import WorkingDirectory
+    # with WorkingDirectory(ROOT.parent / 'tests'):
+
+    autosplit()
+    zip_directory(ROOT / 'assets')  # zip
+    Path(ROOT / 'assets.zip').unlink()  # delete zip
+
+    # from ultralytics.data.utils import HUBDatasetStats
+    # from ultralytics.utils.downloads import download
+    # Path('coco8.zip').unlink(missing_ok=True)
+    # download('https://github.com/ultralytics/hub/raw/master/example_datasets/coco8.zip', unzip=False)
+    # shutil.move('coco8.zip', 'tests')
+    # stats = HUBDatasetStats('tests/coco8.zip', task='detect')
+    # stats.get_json(save=False)
+    # stats.process_images()
--- a/ultralytics/data/loaders.py
+++ b/ultralytics/data/loaders.py
@ -405,14 +405,14 @@ def get_best_youtube_url(url, use_pafy=True):
    if use_pafy:
        check_requirements(('pafy', 'youtube_dl==2020.12.2'))
        import pafy  # noqa
-        return pafy.new(url).getbest(preftype='mp4').url
+        return pafy.new(url).getbestvideo(preftype='mp4').url
    else:
        check_requirements('yt-dlp')
        import yt_dlp
        with yt_dlp.YoutubeDL({'quiet': True}) as ydl:
            info_dict = ydl.extract_info(url, download=False)  # extract info
        for f in info_dict.get('formats', None):
-            if f['vcodec'] != 'none' and f['acodec'] == 'none' and f['ext'] == 'mp4':
+            if f['vcodec'] != 'none' and f['acodec'] == 'none' and f['ext'] == 'mp4' and f.get('width') > 1280:
                return f.get('url', None)


--- a/ultralytics/data/utils.py
+++ b/ultralytics/data/utils.py
@ -1,6 +1,5 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license

-import contextlib
 import hashlib
 import json
 import os
@ -49,13 +48,14 @@ def get_hash(paths):
    return h.hexdigest()  # return hash


-def exif_size(img):
+def exif_size(img: Image.Image):
    """Returns exif-corrected PIL size."""
    s = img.size  # (width, height)
-    with contextlib.suppress(Exception):
-        rotation = dict(img._getexif().items())[orientation]
+    exif = img.getexif()
+    if exif:
+        rotation = exif.get(274, None)  # the key for the orientation tag in the EXIF data is 274 (in decimal)
        if rotation in [6, 8]:  # rotation 270 or 90
-            s = (s[1], s[0])
+            s = s[1], s[0]
    return s


@ -190,7 +190,21 @@ def polygons2masks_overlap(imgsz, segments, downsample_ratio=1):


 def check_det_dataset(dataset, autodownload=True):
-    """Download, check and/or unzip dataset if not found locally."""
+    """
+    Download, verify, and/or unzip a dataset if not found locally.
+
+    This function checks the availability of a specified dataset, and if not found, it has the option to download and
+    unzip the dataset. It then reads and parses the accompanying YAML data, ensuring key requirements are met and also
+    resolves paths related to the dataset.
+
+    Args:
+        dataset (str): Path to the dataset or dataset descriptor (like a YAML file).
+        autodownload (bool, optional): Whether to automatically download the dataset if not found. Defaults to True.
+
+    Returns:
+        (dict): Parsed dataset information and paths.
+    """
+
    data = check_file(dataset)

    # Download (optional)
@ -327,7 +341,7 @@ def check_cls_dataset(dataset: str, split=''):
    return {'train': train_set, 'val': val_set or test_set, 'test': test_set or val_set, 'nc': nc, 'names': names}


-class HUBDatasetStats():
+class HUBDatasetStats:
    """
    A class for generating HUB dataset JSON and `-hub` dataset directory.

@ -371,11 +385,10 @@ class HUBDatasetStats():
    def _find_yaml(dir):
        """Return data.yaml file."""
        files = list(dir.glob('*.yaml')) or list(dir.rglob('*.yaml'))  # try root level first and then recursive
-        assert files, f'No *.yaml file found in {dir}'
+        assert files, f'No *.yaml file found in {dir.resolve()}'
        if len(files) > 1:
            files = [f for f in files if f.stem == dir.stem]  # prefer *.yaml files that match dir name
-            assert files, f'Multiple *.yaml files found in {dir}, only 1 *.yaml file allowed'
-        assert len(files) == 1, f'Multiple *.yaml files found: {files}, only 1 *.yaml file allowed in {dir}'
+        assert len(files) == 1, f"Expected 1 *.yaml file in '{dir.resolve()}', but found {len(files)}.\n{files}"
        return files[0]

    def _unzip(self, path):
@ -478,6 +491,7 @@ def compress_one_image(f, f_new=None, max_dim=1920, quality=50):
            compress_one_image(f)
        ```
    """
+
    try:  # use PIL
        im = Image.open(f)
        r = max_dim / max(im.height, im.width)  # ratio
@ -546,18 +560,18 @@ def zip_directory(dir, use_zipfile_library=True):
        shutil.make_archive(dir, 'zip', dir)


-def autosplit(path=DATASETS_DIR / 'coco128/images', weights=(0.9, 0.1, 0.0), annotated_only=False):
+def autosplit(path=DATASETS_DIR / 'coco8/images', weights=(0.9, 0.1, 0.0), annotated_only=False):
    """
    Autosplit a dataset into train/val/test splits and save the resulting splits into autosplit_*.txt files.

    Args:
-        path (Path, optional): Path to images directory. Defaults to DATASETS_DIR / 'coco128/images'.
+        path (Path, optional): Path to images directory. Defaults to DATASETS_DIR / 'coco8/images'.
        weights (list | tuple, optional): Train, validation, and test split fractions. Defaults to (0.9, 0.1, 0.0).
        annotated_only (bool, optional): If True, only images with an associated txt file are used. Defaults to False.

    Example:
        ```python
-        from ultralytics.utils.dataloaders import autosplit
+        from ultralytics.data.utils import autosplit

        autosplit()
        ```
--- a/ultralytics/engine/model.py
+++ b/ultralytics/engine/model.py
@ -282,6 +282,8 @@ class Model:
        overrides['rect'] = True  # rect batches as default
        overrides.update(kwargs)
        overrides['mode'] = 'val'
+        if overrides.get('imgsz') is None:
+            overrides['imgsz'] = self.model.args['imgsz']  # use trained imgsz unless custom value is passed
        args = get_cfg(cfg=DEFAULT_CFG, overrides=overrides)
        args.data = data or args.data
        if 'task' in overrides:
@ -289,8 +291,6 @@ class Model:
        else:
            args.task = self.task
        validator = validator or self.smart_load('validator')
-        if args.imgsz == DEFAULT_CFG.imgsz and not isinstance(self.model, (str, Path)):
-            args.imgsz = self.model.args['imgsz']  # use trained imgsz unless custom value is passed
        args.imgsz = check_imgsz(args.imgsz, max_dim=1)

        validator = validator(args=args, _callbacks=self.callbacks)
--- a/ultralytics/engine/trainer.py
+++ b/ultralytics/engine/trainer.py
@ -5,6 +5,7 @@ Train a model on a dataset
 Usage:
    $ yolo mode=train model=yolov8n.pt data=coco128.yaml imgsz=640 epochs=100 batch=16
 """
+
 import math
 import os
 import subprocess
@ -48,8 +49,8 @@ class BaseTrainer:
        callbacks (defaultdict): Dictionary of callbacks.
        save_dir (Path): Directory to save results.
        wdir (Path): Directory to save weights.
-        last (Path): Path to last checkpoint.
-        best (Path): Path to best checkpoint.
+        last (Path): Path to the last checkpoint.
+        best (Path): Path to the best checkpoint.
        save_period (int): Save checkpoint every x epochs (disabled if < 1).
        batch_size (int): Batch size for training.
        epochs (int): Number of epochs to train for.