@ -18,7 +18,7 @@ from PIL import Image, ImageOps
from ultralytics . nn . autobackend import check_class_names
from ultralytics . utils import ( DATASETS_DIR , LOGGER , NUM_THREADS , ROOT , SETTINGS_YAML , TQDM , clean_url , colorstr ,
emojis , yaml_load )
emojis , yaml_load , yaml_save )
from ultralytics . utils . checks import check_file , check_font , is_ascii
from ultralytics . utils . downloads import download , safe_download , unzip_file
from ultralytics . utils . ops import segments2boxes
@ -250,28 +250,26 @@ def check_det_dataset(dataset, autodownload=True):
( dict ) : Parsed dataset information and paths .
"""
data = check_file ( dataset )
file = check_file ( dataset )
# Download (optional)
extract_dir = ' '
if isinstance ( data , ( str , Path ) ) and ( zipfile . is_zipfile ( data ) or is_tarfile ( data ) ) :
new_dir = safe_download ( data , dir = DATASETS_DIR , unzip = True , delete = False )
data = find_dataset_yaml ( DATASETS_DIR / new_dir )
extract_dir , autodownload = data . parent , False
if zipfile . is_zipfile ( file ) or is_tarfile ( file ) :
new_dir = safe_download ( file , dir = DATASETS_DIR , unzip = True , delete = False )
file = find_dataset_yaml ( DATASETS_DIR / new_dir )
extract_dir , autodownload = file . parent , False
# Read YAML (optional)
if isinstance ( data , ( str , Path ) ) :
data = yaml_load ( data , append_filename = True ) # dictionary
# Read YAML
data = yaml_load ( file , append_filename = True ) # dictionary
# Checks
for k in ' train ' , ' val ' :
if k not in data :
if k == ' val ' and ' validation ' in data :
LOGGER . info ( " WARNING ⚠️ renaming data YAML ' validation ' key to ' val ' to match YOLO format. " )
data [ ' val ' ] = data . pop ( ' validation ' ) # replace 'validation' key with 'val' key
else :
if k != ' val ' or ' validation ' not in data :
raise SyntaxError (
emojis ( f " { dataset } ' { k } : ' key missing ❌. \n ' train ' and ' val ' are required in all data YAMLs. " ) )
LOGGER . info ( " WARNING ⚠️ renaming data YAML ' validation ' key to ' val ' to match YOLO format. " )
data [ ' val ' ] = data . pop ( ' validation ' ) # replace 'validation' key with 'val' key
if ' names ' not in data and ' nc ' not in data :
raise SyntaxError ( emojis ( f " { dataset } key missing ❌. \n either ' names ' or ' nc ' are required in all data YAMLs. " ) )
if ' names ' in data and ' nc ' in data and len ( data [ ' names ' ] ) != data [ ' nc ' ] :
@ -285,9 +283,10 @@ def check_det_dataset(dataset, autodownload=True):
# Resolve paths
path = Path ( extract_dir or data . get ( ' path ' ) or Path ( data . get ( ' yaml_file ' , ' ' ) ) . parent ) # dataset root
if not path . is_absolute ( ) :
path = ( DATASETS_DIR / path ) . resolve ( )
# Set paths
data [ ' path ' ] = path # download scripts
for k in ' train ' , ' val ' , ' test ' :
if data . get ( k ) : # prepend path
@ -404,7 +403,7 @@ class HUBDatasetStats:
A class for generating HUB dataset JSON and ` - hub ` dataset directory .
Args :
path ( str ) : Path to data . yaml or data . zip ( with data . yaml inside data . zip ) . Default is ' coco12 8.yaml ' .
path ( str ) : Path to data . yaml or data . zip ( with data . yaml inside data . zip ) . Default is ' coco8.yaml ' .
task ( str ) : Dataset task . Options are ' detect ' , ' segment ' , ' pose ' , ' classify ' . Default is ' detect ' .
autodownload ( bool ) : Attempt to download dataset if not found locally . Default is False .
@ -424,7 +423,7 @@ class HUBDatasetStats:
` ` `
"""
def __init__ ( self , path = ' coco12 8.yaml ' , task = ' detect ' , autodownload = False ) :
def __init__ ( self , path = ' coco8.yaml ' , task = ' detect ' , autodownload = False ) :
""" Initialize class. """
path = Path ( path ) . resolve ( )
LOGGER . info ( f ' Starting HUB dataset checks for { path } .... ' )
@ -437,10 +436,12 @@ class HUBDatasetStats:
else : # detect, segment, pose
zipped , data_dir , yaml_path = self . _unzip ( Path ( path ) )
try :
# data = yaml_load(check_yaml(yaml_path)) # data dict
data = check_det_dataset ( yaml_path , autodownload ) # data dict
if zipped :
data [ ' path ' ] = data_dir
# Load YAML with checks
data = yaml_load ( yaml_path )
data [ ' path ' ] = ' ' # strip path since YAML should be in dataset root for all HUB datasets
yaml_save ( yaml_path , data )
data = check_det_dataset ( yaml_path , autodownload ) # dict
data [ ' path ' ] = data_dir # YAML path should be set to '' (relative) or parent (absolute)
except Exception as e :
raise Exception ( ' error/HUB/dataset_stats/init ' ) from e