`ultralytics 8.0.217` HUB YAML `path` improvements (#6556)

Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com>
pull/6570/head v8.0.217
Glenn Jocher 12 months ago committed by GitHub
parent 8f1c3f3d1e
commit 40a349bceb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 25
      docs/en/guides/kfold-cross-validation.md
  2. 2
      docs/hi/models/sam.md
  3. 41
      ultralytics/data/utils.py

@ -24,19 +24,18 @@ Without further ado, let's dive in!
- This guide assumes that annotation files are locally available.
- For our demonstration, we use the [Fruit Detection](https://www.kaggle.com/datasets/lakshaytyagi01/fruit-detection/code) dataset.
- This dataset contains a total of 8479 images.
- It includes 6 class labels, each with its total instance counts listed below.
| Class Label | Instance Count |
|:------------|:--------------:|
| Apple | 7049 |
| Grapes | 7202 |
| Pineapple | 1613 |
| Orange | 15549 |
| Banana | 3536 |
| Watermelon | 1976 |
- For our demonstration, we use the [Fruit Detection](https://www.kaggle.com/datasets/lakshaytyagi01/fruit-detection/code) dataset.
- This dataset contains a total of 8479 images.
- It includes 6 class labels, each with its total instance counts listed below.
| Class Label | Instance Count |
|:------------|:--------------:|
| Apple | 7049 |
| Grapes | 7202 |
| Pineapple | 1613 |
| Orange | 15549 |
| Banana | 3536 |
| Watermelon | 1976 |
- Necessary Python packages include:

@ -140,7 +140,7 @@ Segment Anything Model क उपयग उपसित ड
| SAM क सबस, SAM-b | 358 MB | 94.7 M | 51096 ms/im |
| [इल SAM](mobile-sam.md) | 40.7 MB | 10.1 M | 46122 ms/im |
| [अगगमशन व FastSAM-s, YOLOv8 बकबन सहि](fast-sam.md) | 23.7 MB | 11.8 M | 115 ms/im |
| Ultralytics [8न-seg](../टक/सगमट.md) | **6.7 MB** (53.4 ग) | **3.4 M** (27.9x कम) | **59 ms/im** (866x तज) |
| Ultralytics [8न-seg](yolov8.md) | **6.7 MB** (53.4 ग) | **3.4 M** (27.9x कम) | **59 ms/im** (866x तज) |
यह तलनडल क आकर और गति दसय अतर दि। जह SAM सवचित सगमशन किए अदिय कषमतरसत करत, वह Ultralytics वियमन सगमशन मनदलनमक आकर, गति और सलन कषमत समरथन परदन करत

@ -18,7 +18,7 @@ from PIL import Image, ImageOps
from ultralytics.nn.autobackend import check_class_names
from ultralytics.utils import (DATASETS_DIR, LOGGER, NUM_THREADS, ROOT, SETTINGS_YAML, TQDM, clean_url, colorstr,
emojis, yaml_load)
emojis, yaml_load, yaml_save)
from ultralytics.utils.checks import check_file, check_font, is_ascii
from ultralytics.utils.downloads import download, safe_download, unzip_file
from ultralytics.utils.ops import segments2boxes
@ -250,28 +250,26 @@ def check_det_dataset(dataset, autodownload=True):
(dict): Parsed dataset information and paths.
"""
data = check_file(dataset)
file = check_file(dataset)
# Download (optional)
extract_dir = ''
if isinstance(data, (str, Path)) and (zipfile.is_zipfile(data) or is_tarfile(data)):
new_dir = safe_download(data, dir=DATASETS_DIR, unzip=True, delete=False)
data = find_dataset_yaml(DATASETS_DIR / new_dir)
extract_dir, autodownload = data.parent, False
if zipfile.is_zipfile(file) or is_tarfile(file):
new_dir = safe_download(file, dir=DATASETS_DIR, unzip=True, delete=False)
file = find_dataset_yaml(DATASETS_DIR / new_dir)
extract_dir, autodownload = file.parent, False
# Read YAML (optional)
if isinstance(data, (str, Path)):
data = yaml_load(data, append_filename=True) # dictionary
# Read YAML
data = yaml_load(file, append_filename=True) # dictionary
# Checks
for k in 'train', 'val':
if k not in data:
if k == 'val' and 'validation' in data:
LOGGER.info("WARNING ⚠ renaming data YAML 'validation' key to 'val' to match YOLO format.")
data['val'] = data.pop('validation') # replace 'validation' key with 'val' key
else:
if k != 'val' or 'validation' not in data:
raise SyntaxError(
emojis(f"{dataset} '{k}:' key missing ❌.\n'train' and 'val' are required in all data YAMLs."))
LOGGER.info("WARNING ⚠ renaming data YAML 'validation' key to 'val' to match YOLO format.")
data['val'] = data.pop('validation') # replace 'validation' key with 'val' key
if 'names' not in data and 'nc' not in data:
raise SyntaxError(emojis(f"{dataset} key missing ❌.\n either 'names' or 'nc' are required in all data YAMLs."))
if 'names' in data and 'nc' in data and len(data['names']) != data['nc']:
@ -285,9 +283,10 @@ def check_det_dataset(dataset, autodownload=True):
# Resolve paths
path = Path(extract_dir or data.get('path') or Path(data.get('yaml_file', '')).parent) # dataset root
if not path.is_absolute():
path = (DATASETS_DIR / path).resolve()
# Set paths
data['path'] = path # download scripts
for k in 'train', 'val', 'test':
if data.get(k): # prepend path
@ -404,7 +403,7 @@ class HUBDatasetStats:
A class for generating HUB dataset JSON and `-hub` dataset directory.
Args:
path (str): Path to data.yaml or data.zip (with data.yaml inside data.zip). Default is 'coco128.yaml'.
path (str): Path to data.yaml or data.zip (with data.yaml inside data.zip). Default is 'coco8.yaml'.
task (str): Dataset task. Options are 'detect', 'segment', 'pose', 'classify'. Default is 'detect'.
autodownload (bool): Attempt to download dataset if not found locally. Default is False.
@ -424,7 +423,7 @@ class HUBDatasetStats:
```
"""
def __init__(self, path='coco128.yaml', task='detect', autodownload=False):
def __init__(self, path='coco8.yaml', task='detect', autodownload=False):
"""Initialize class."""
path = Path(path).resolve()
LOGGER.info(f'Starting HUB dataset checks for {path}....')
@ -437,10 +436,12 @@ class HUBDatasetStats:
else: # detect, segment, pose
zipped, data_dir, yaml_path = self._unzip(Path(path))
try:
# data = yaml_load(check_yaml(yaml_path)) # data dict
data = check_det_dataset(yaml_path, autodownload) # data dict
if zipped:
data['path'] = data_dir
# Load YAML with checks
data = yaml_load(yaml_path)
data['path'] = '' # strip path since YAML should be in dataset root for all HUB datasets
yaml_save(yaml_path, data)
data = check_det_dataset(yaml_path, autodownload) # dict
data['path'] = data_dir # YAML path should be set to '' (relative) or parent (absolute)
except Exception as e:
raise Exception('error/HUB/dataset_stats/init') from e

Loading…
Cancel
Save