|
|
|
@ -59,7 +59,7 @@ def exif_size(img: Image.Image): |
|
|
|
|
|
|
|
|
|
def verify_image(args): |
|
|
|
|
"""Verify one image.""" |
|
|
|
|
im_file, prefix = args |
|
|
|
|
(im_file, cls), prefix = args |
|
|
|
|
# Number (found, corrupt), message |
|
|
|
|
nf, nc, msg = 0, 0, '' |
|
|
|
|
try: |
|
|
|
@ -79,7 +79,7 @@ def verify_image(args): |
|
|
|
|
except Exception as e: |
|
|
|
|
nc = 1 |
|
|
|
|
msg = f'{prefix}WARNING ⚠️ {im_file}: ignoring corrupt image/label: {e}' |
|
|
|
|
return im_file, nf, nc, msg |
|
|
|
|
return (im_file, cls), nf, nc, msg |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def verify_image_label(args): |
|
|
|
@ -321,7 +321,7 @@ def check_cls_dataset(dataset: str, split=''): |
|
|
|
|
dataset = Path(dataset) |
|
|
|
|
data_dir = (dataset if dataset.is_dir() else (DATASETS_DIR / dataset)).resolve() |
|
|
|
|
if not data_dir.is_dir(): |
|
|
|
|
LOGGER.info(f'\nDataset not found ⚠️, missing path {data_dir}, attempting download...') |
|
|
|
|
LOGGER.warning(f'\nDataset not found ⚠️, missing path {data_dir}, attempting download...') |
|
|
|
|
t = time.time() |
|
|
|
|
if str(dataset) == 'imagenet': |
|
|
|
|
subprocess.run(f"bash {ROOT / 'data/scripts/get_imagenet.sh'}", shell=True, check=True) |
|
|
|
@ -335,9 +335,9 @@ def check_cls_dataset(dataset: str, split=''): |
|
|
|
|
data_dir / 'validation').exists() else None # data/test or data/val |
|
|
|
|
test_set = data_dir / 'test' if (data_dir / 'test').exists() else None # data/val or data/test |
|
|
|
|
if split == 'val' and not val_set: |
|
|
|
|
LOGGER.info("WARNING ⚠️ Dataset 'split=val' not found, using 'split=test' instead.") |
|
|
|
|
LOGGER.warning("WARNING ⚠️ Dataset 'split=val' not found, using 'split=test' instead.") |
|
|
|
|
elif split == 'test' and not test_set: |
|
|
|
|
LOGGER.info("WARNING ⚠️ Dataset 'split=test' not found, using 'split=val' instead.") |
|
|
|
|
LOGGER.warning("WARNING ⚠️ Dataset 'split=test' not found, using 'split=val' instead.") |
|
|
|
|
|
|
|
|
|
nc = len([x for x in (data_dir / 'train').glob('*') if x.is_dir()]) # number of classes |
|
|
|
|
names = [x.name for x in (data_dir / 'train').iterdir() if x.is_dir()] # class names list |
|
|
|
@ -345,13 +345,22 @@ def check_cls_dataset(dataset: str, split=''): |
|
|
|
|
|
|
|
|
|
# Print to console |
|
|
|
|
for k, v in {'train': train_set, 'val': val_set, 'test': test_set}.items(): |
|
|
|
|
prefix = f'{colorstr(k)} {v}...' |
|
|
|
|
if v is None: |
|
|
|
|
LOGGER.info(f'{colorstr(k)}: {v}') |
|
|
|
|
LOGGER.info(prefix) |
|
|
|
|
else: |
|
|
|
|
files = [path for path in v.rglob('*.*') if path.suffix[1:].lower() in IMG_FORMATS] |
|
|
|
|
nf = len(files) # number of files |
|
|
|
|
nd = len({file.parent for file in files}) # number of directories |
|
|
|
|
LOGGER.info(f'{colorstr(k)}: {v}... found {nf} images in {nd} classes ✅ ') # keep trailing space |
|
|
|
|
if nf == 0: |
|
|
|
|
if k == 'train': |
|
|
|
|
raise FileNotFoundError(emojis(f"{dataset} '{k}:' no training images found ❌ ")) |
|
|
|
|
else: |
|
|
|
|
LOGGER.warning(f'{prefix} found {nf} images in {nd} classes: WARNING ⚠️ no images found') |
|
|
|
|
elif nd != nc: |
|
|
|
|
LOGGER.warning(f'{prefix} found {nf} images in {nd} classes: ERROR ❌️ requires {nc} classes, not {nd}') |
|
|
|
|
else: |
|
|
|
|
LOGGER.info(f'{prefix} found {nf} images in {nd} classes ✅ ') |
|
|
|
|
|
|
|
|
|
return {'train': train_set, 'val': val_set or test_set, 'test': test_set or val_set, 'nc': nc, 'names': names} |
|
|
|
|
|
|
|
|
|