diff --git a/ultralytics/__init__.py b/ultralytics/__init__.py index 825e90c267..8ca07774b9 100644 --- a/ultralytics/__init__.py +++ b/ultralytics/__init__.py @@ -2,4 +2,4 @@ __version__ = "8.0.0.dev0" from ultralytics.yolo.engine.model import YOLO -__all__ = ["__version__", "YOLO"] # allow simpler import +__all__ = ["__version__", "YOLO", "hub"] # allow simpler import diff --git a/ultralytics/hub/__init__.py b/ultralytics/hub/__init__.py index e69de29bb2..937c22d5dc 100644 --- a/ultralytics/hub/__init__.py +++ b/ultralytics/hub/__init__.py @@ -0,0 +1,131 @@ +import os +import shutil + +import psutil +import requests +from IPython import display # to display images and clear console output + +from ultralytics.hub.auth import Auth +from ultralytics.hub.session import HubTrainingSession +from ultralytics.hub.utils import PREFIX, split_key +from ultralytics.yolo.utils import LOGGER, emojis, is_colab +from ultralytics.yolo.utils.torch_utils import select_device +from ultralytics.yolo.v8.detect import DetectionTrainer + + +def checks(verbose=True): + if is_colab(): + shutil.rmtree('sample_data', ignore_errors=True) # remove colab /sample_data directory + + if verbose: + # System info + gib = 1 << 30 # bytes per GiB + ram = psutil.virtual_memory().total + total, used, free = shutil.disk_usage("/") + display.clear_output() + s = f'({os.cpu_count()} CPUs, {ram / gib:.1f} GB RAM, {(total - free) / gib:.1f}/{total / gib:.1f} GB disk)' + else: + s = '' + + select_device(newline=False) + LOGGER.info(f'Setup complete ✅ {s}') + + +def start(key=''): + # Start training models with Ultralytics HUB. Usage: from src.ultralytics import start; start('API_KEY') + def request_api_key(attempts=0): + """Prompt the user to input their API key""" + import getpass + + max_attempts = 3 + tries = f"Attempt {str(attempts + 1)} of {max_attempts}" if attempts > 0 else "" + LOGGER.info(f"{PREFIX}Login. {tries}") + input_key = getpass.getpass("Enter your Ultralytics HUB API key:\n") + auth.api_key, model_id = split_key(input_key) + if not auth.authenticate(): + attempts += 1 + LOGGER.warning(f"{PREFIX}Invalid API key ⚠️\n") + if attempts < max_attempts: + return request_api_key(attempts) + raise ConnectionError(emojis(f"{PREFIX}Failed to authenticate ❌")) + else: + return model_id + + try: + api_key, model_id = split_key(key) + auth = Auth(api_key) # attempts cookie login if no api key is present + attempts = 1 if len(key) else 0 + if not auth.get_state(): + if len(key): + LOGGER.warning(f"{PREFIX}Invalid API key ⚠️\n") + model_id = request_api_key(attempts) + LOGGER.info(f"{PREFIX}Authenticated ✅") + if not model_id: + raise ConnectionError(emojis('Connecting with global API key is not currently supported. ❌')) + session = HubTrainingSession(model_id=model_id, auth=auth) + session.check_disk_space() + + # TODO: refactor, hardcoded for v8 + args = session.model.copy() + args.pop("id") + args.pop("status") + args.pop("weights") + args["data"] = "coco128.yaml" + args["model"] = "yolov8n.yaml" + args["batch_size"] = 16 + args["imgsz"] = 64 + + trainer = DetectionTrainer(overrides=args) + session.register_callbacks(trainer) + setattr(trainer, 'hub_session', session) + trainer.train() + except Exception as e: + LOGGER.warning(f"{PREFIX}{e}") + + +def reset_model(key=''): + # Reset a trained model to an untrained state + api_key, model_id = split_key(key) + r = requests.post('https://api.ultralytics.com/model-reset', json={"apiKey": api_key, "modelId": model_id}) + + if r.status_code == 200: + LOGGER.info(f"{PREFIX}model reset successfully") + return + LOGGER.warning(f"{PREFIX}model reset failure {r.status_code} {r.reason}") + + +def export_model(key='', format='torchscript'): + # Export a model to all formats + api_key, model_id = split_key(key) + formats = ('torchscript', 'onnx', 'openvino', 'engine', 'coreml', 'saved_model', 'pb', 'tflite', 'edgetpu', 'tfjs', + 'ultralytics_tflite', 'ultralytics_coreml') + assert format in formats, f"ERROR: Unsupported export format '{format}' passed, valid formats are {formats}" + + r = requests.post('https://api.ultralytics.com/export', + json={ + "apiKey": api_key, + "modelId": model_id, + "format": format}) + assert r.status_code == 200, f"{PREFIX}{format} export failure {r.status_code} {r.reason}" + LOGGER.info(f"{PREFIX}{format} export started ✅") + + +def get_export(key='', format='torchscript'): + # Get an exported model dictionary with download URL + api_key, model_id = split_key(key) + formats = ('torchscript', 'onnx', 'openvino', 'engine', 'coreml', 'saved_model', 'pb', 'tflite', 'edgetpu', 'tfjs', + 'ultralytics_tflite', 'ultralytics_coreml') + assert format in formats, f"ERROR: Unsupported export format '{format}' passed, valid formats are {formats}" + + r = requests.post('https://api.ultralytics.com/get-export', + json={ + "apiKey": api_key, + "modelId": model_id, + "format": format}) + assert r.status_code == 200, f"{PREFIX}{format} get_export failure {r.status_code} {r.reason}" + return r.json() + + +# temp. For checking +if __name__ == "__main__": + start(key="b3fba421be84a20dbe68644e14436d1cce1b0a0aaa_HeMfHgvHsseMPhdq7Ylz") diff --git a/ultralytics/hub/auth.py b/ultralytics/hub/auth.py new file mode 100644 index 0000000000..b7a81aff92 --- /dev/null +++ b/ultralytics/hub/auth.py @@ -0,0 +1,69 @@ +import requests + +from ultralytics.hub.config import HUB_API_ROOT +from ultralytics.hub.utils import request_with_credentials +from ultralytics.yolo.utils import is_colab + +API_KEY_PATH = "https://hub.ultralytics.com/settings?tab=api+keys" + + +class Auth: + id_token = api_key = model_key = False + + def __init__(self, api_key=None): + self.api_key = self._clean_api_key(api_key) + self.authenticate() if self.api_key else self.auth_with_cookies() + + @staticmethod + def _clean_api_key(key: str) -> str: + """Strip model from key if present""" + separator = "_" + return key.split(separator)[0] if separator in key else key + + def authenticate(self) -> bool: + """Attempt to authenticate with server""" + try: + header = self.get_auth_header() + if header: + r = requests.post(f"{HUB_API_ROOT}/v1/auth", headers=header) + if not r.json().get('success', False): + raise ConnectionError("Unable to authenticate.") + return True + raise ConnectionError("User has not authenticated locally.") + except ConnectionError: + self.id_token = self.api_key = False # reset invalid + return False + + def auth_with_cookies(self) -> bool: + """ + Attempt to fetch authentication via cookies and set id_token. + User must be logged in to HUB and running in a supported browser. + """ + if not is_colab(): + return False # Currently only works with Colab + try: + authn = request_with_credentials(f"{HUB_API_ROOT}/v1/auth/auto") + if authn.get("success", False): + self.id_token = authn.get("data", {}).get("idToken", None) + self.authenticate() + return True + raise ConnectionError("Unable to fetch browser authentication details.") + except ConnectionError: + self.id_token = False # reset invalid + return False + + def get_auth_header(self): + if self.id_token: + return {"authorization": f"Bearer {self.id_token}"} + elif self.api_key: + return {"x-api-key": self.api_key} + else: + return None + + def get_state(self) -> bool: + """Get the authentication state""" + return self.id_token or self.api_key + + def set_api_key(self, key: str): + """Get the authentication state""" + self.api_key = key diff --git a/ultralytics/hub/config.py b/ultralytics/hub/config.py new file mode 100644 index 0000000000..0d219eb529 --- /dev/null +++ b/ultralytics/hub/config.py @@ -0,0 +1,12 @@ +import os + +# Global variables +REPO_URL = "https://github.com/ultralytics/yolov5.git" +REPO_BRANCH = "ultralytics/HUB" # "master" + +ENVIRONMENT = os.environ.get("ULTRALYTICS_ENV", "production") +if ENVIRONMENT == 'production': + HUB_API_ROOT = "https://api.ultralytics.com" +else: + HUB_API_ROOT = "http://127.0.0.1:8000" + print(f'Connected to development server on {HUB_API_ROOT}') diff --git a/ultralytics/hub/session.py b/ultralytics/hub/session.py new file mode 100644 index 0000000000..cdad027a5a --- /dev/null +++ b/ultralytics/hub/session.py @@ -0,0 +1,121 @@ +import signal +import sys +from pathlib import Path +from time import sleep + +import requests + +from ultralytics import __version__ +from ultralytics.hub.config import HUB_API_ROOT +from ultralytics.hub.utils import check_dataset_disk_space, smart_request +from ultralytics.yolo.utils import LOGGER, is_colab, threaded + +AGENT_NAME = f'python-{__version__}-colab' if is_colab() else f'python-{__version__}-local' + +session = None + + +def signal_handler(signum, frame): + """ Confirm exit """ + global hub_logger + LOGGER.info(f'Signal received. {signum} {frame}') + if isinstance(session, HubTrainingSession): + hub_logger.alive = False + del hub_logger + sys.exit(signum) + + +signal.signal(signal.SIGTERM, signal_handler) +signal.signal(signal.SIGINT, signal_handler) + + +class HubTrainingSession: + + def __init__(self, model_id, auth): + self.agent_id = None # identifies which instance is communicating with server + self.model_id = model_id + self.api_url = f'{HUB_API_ROOT}/v1/models/{model_id}' + self.auth_header = auth.get_auth_header() + self.rate_limits = {'metrics': 3.0, 'ckpt': 900.0, 'heartbeat': 300.0} # rate limits (seconds) + self.t = {} # rate limit timers (seconds) + self.metrics_queue = {} # metrics queue + self.alive = True # for heartbeats + self.model = self._get_model() + self._heartbeats() # start heartbeats + + def __del__(self): + # Class destructor + self.alive = False + + def upload_metrics(self): + payload = {"metrics": self.metrics_queue.copy(), "type": "metrics"} + smart_request(f'{self.api_url}', json=payload, headers=self.auth_header, code=2) + + def upload_model(self, epoch, weights, is_best=False, map=0.0, final=False): + # Upload a model to HUB + file = None + if Path(weights).is_file(): + with open(weights, "rb") as f: + file = f.read() + if final: + smart_request(f'{self.api_url}/upload', + data={ + "epoch": epoch, + "type": "final", + "map": map}, + files={"best.pt": file}, + headers=self.auth_header, + retry=10, + timeout=3600, + code=4) + else: + smart_request(f'{self.api_url}/upload', + data={ + "epoch": epoch, + "type": "epoch", + "isBest": bool(is_best)}, + headers=self.auth_header, + files={"last.pt": file}, + code=3) + + def _get_model(self): + # Returns model from database by id + api_url = f"{HUB_API_ROOT}/v1/models/{self.model_id}" + headers = self.auth_header + + try: + r = smart_request(api_url, method="get", headers=headers, thread=False, code=0) + data = r.json().get("data", None) + if not data: + return + assert data['data'], 'ERROR: Dataset may still be processing. Please wait a minute and try again.' # RF fix + self.model_id = data["id"] + + return data + except requests.exceptions.ConnectionError as e: + raise ConnectionRefusedError('ERROR: The HUB server is not online. Please try again later.') from e + + def check_disk_space(self): + if not check_dataset_disk_space(self.model['data']): + raise MemoryError("Not enough disk space") + + # COMMENT: Should not be needed as HUB is now considered an integration and is in integrations_callbacks + # import ultralytics.yolo.utils.callbacks.hub as hub_callbacks + # @staticmethod + # def register_callbacks(trainer): + # for k, v in hub_callbacks.callbacks.items(): + # trainer.add_callback(k, v) + + @threaded + def _heartbeats(self): + while self.alive: + r = smart_request(f'{HUB_API_ROOT}/v1/agent/heartbeat/models/{self.model_id}', + json={ + "agent": AGENT_NAME, + "agentId": self.agent_id}, + headers=self.auth_header, + retry=0, + code=5, + thread=False) + self.agent_id = r.json().get('data', {}).get('agentId', None) + sleep(self.rate_limits['heartbeat']) diff --git a/ultralytics/hub/utils.py b/ultralytics/hub/utils.py new file mode 100644 index 0000000000..bc6dd69c68 --- /dev/null +++ b/ultralytics/hub/utils.py @@ -0,0 +1,139 @@ +import shutil +import threading +import time +import uuid + +import requests + +from ultralytics.hub.config import HUB_API_ROOT +from ultralytics.yolo.utils import LOGGER, RANK, SETTINGS, colorstr, emojis + +PREFIX = colorstr('Ultralytics: ') +HELP_MSG = 'If this issue persists please visit https://github.com/ultralytics/hub/issues for assistance.' + + +def check_dataset_disk_space(url='https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128.zip', sf=2.0): + # Check that url fits on disk with safety factor sf, i.e. require 2GB free if url size is 1GB with sf=2.0 + gib = 1 << 30 # bytes per GiB + data = int(requests.head(url).headers['Content-Length']) / gib # dataset size (GB) + total, used, free = (x / gib for x in shutil.disk_usage("/")) # bytes + LOGGER.info(f'{PREFIX}{data:.3f} GB dataset, {free:.1f}/{total:.1f} GB free disk space') + if data * sf < free: + return True # sufficient space + LOGGER.warning(f'{PREFIX}WARNING: Insufficient free disk space {free:.1f} GB < {data * sf:.3f} GB required, ' + f'training cancelled ❌. Please free {data * sf - free:.1f} GB additional disk space and try again.') + return False # insufficient space + + +def request_with_credentials(url: str) -> any: + """ Make a ajax request with cookies attached """ + from google.colab import output # noqa + from IPython import display # noqa + display.display( + display.Javascript(""" + window._hub_tmp = new Promise((resolve, reject) => { + const timeout = setTimeout(() => reject("Failed authenticating existing browser session"), 5000) + fetch("%s", { + method: 'POST', + credentials: 'include' + }) + .then((response) => resolve(response.json())) + .then((json) => { + clearTimeout(timeout); + }).catch((err) => { + clearTimeout(timeout); + reject(err); + }); + }); + """ % url)) + return output.eval_js("_hub_tmp") + + +# Deprecated TODO: eliminate this function? +def split_key(key: str = '') -> tuple[str, str]: + """ + Verify and split a 'api_key[sep]model_id' string, sep is one of '.' or '_' + + Args: + key (str): The model key to split. If not provided, the user will be prompted to enter it. + + Returns: + Tuple[str, str]: A tuple containing the API key and model ID. + """ + + import getpass + + error_string = emojis(f'{PREFIX}Invalid API key ⚠️\n') # error string + if not key: + key = getpass.getpass('Enter model key: ') + sep = '_' if '_' in key else '.' if '.' in key else None # separator + assert sep, error_string + api_key, model_id = key.split(sep) + assert len(api_key) and len(model_id), error_string + return api_key, model_id + + +def smart_request(*args, retry=3, timeout=30, thread=True, code=-1, method="post", **kwargs): + """ + Makes an HTTP request using the 'requests' library, with exponential backoff retries up to a specified timeout. + + Args: + *args: Positional arguments to be passed to the requests function specified in method. + retry (int, optional): Number of retries to attempt before giving up. Default is 3. + timeout (int, optional): Timeout in seconds after which the function will give up retrying. Default is 30. + thread (bool, optional): Whether to execute the request in a separate daemon thread. Default is True. + code (int, optional): An identifier for the request, used for logging purposes. Default is -1. + method (str, optional): The HTTP method to use for the request. Choices are 'post' and 'get'. Default is 'post'. + **kwargs: Keyword arguments to be passed to the requests function specified in method. + + Returns: + requests.Response: The HTTP response object. If the request is executed in a separate thread, returns None. + """ + retry_codes = (408, 500) # retry only these codes + methods = {'post': requests.post, 'get': requests.get} # request methods + + def fcn(*args, **kwargs): + t0 = time.time() + for i in range(retry + 1): + if (time.time() - t0) > timeout: + break + r = methods[method](*args, **kwargs) # i.e. post(url, data, json, files) + if r.status_code == 200: + break + try: + m = r.json().get('message', 'No JSON message.') + except Exception: + m = 'Unable to read JSON.' + if i == 0: + if r.status_code in retry_codes: + m += f' Retrying {retry}x for {timeout}s.' if retry else '' + elif r.status_code == 429: # rate limit + h = r.headers # response headers + m = f"Rate limit reached ({h['X-RateLimit-Remaining']}/{h['X-RateLimit-Limit']}). " \ + f"Please retry after {h['Retry-After']}s." + LOGGER.warning(f"{PREFIX}{m} {HELP_MSG} ({r.status_code} #{code})") + if r.status_code not in retry_codes: + return r + time.sleep(2 ** i) # exponential standoff + return r + + if thread: + threading.Thread(target=fcn, args=args, kwargs=kwargs, daemon=True).start() + else: + return fcn(*args, **kwargs) + + +def sync_analytics(cfg, enabled=False): + """ + Sync analytics data if enabled in the global settings + + Args: + cfg (DictConfig): Configuration for the task and mode. + enabled (bool): For debugging. + """ + if SETTINGS['sync'] and RANK in {-1, 0} and enabled: + cfg = dict(cfg) # convert type from DictConfig to dict + cfg['uuid'] = uuid.getnode() # add the device UUID to the configuration data + + # Send a request to the HUB API to sync the analytics data + smart_request(f'{HUB_API_ROOT}/analytics', data=cfg, headers=None, code=3, retry=0) diff --git a/ultralytics/yolo/cli.py b/ultralytics/yolo/cli.py index 4f883a5e3b..996d4cc278 100644 --- a/ultralytics/yolo/cli.py +++ b/ultralytics/yolo/cli.py @@ -3,46 +3,48 @@ from pathlib import Path import hydra -import ultralytics -from ultralytics import yolo +from ultralytics import hub, yolo +from ultralytics.yolo.utils import DEFAULT_CONFIG, LOGGER, colorstr -from .utils import DEFAULT_CONFIG, LOGGER, colorstr +DIR = Path(__file__).parent -@hydra.main(version_base=None, config_path="configs", config_name="default") +@hydra.main(version_base=None, config_path=str(DEFAULT_CONFIG.parent.relative_to(DIR)), config_name=DEFAULT_CONFIG.name) def cli(cfg): - cwd = Path().cwd() - LOGGER.info(f"{colorstr(f'Ultralytics YOLO v{ultralytics.__version__}')}") + """ + Run a specified task and mode with the given configuration. + + Args: + cfg (DictConfig): Configuration for the task and mode. + """ + # LOGGER.info(f"{colorstr(f'Ultralytics YOLO v{ultralytics.__version__}')}") task, mode = cfg.task.lower(), cfg.mode.lower() - if task == "init": # special case - shutil.copy2(DEFAULT_CONFIG, cwd) + # Special case for initializing the configuration + if task == "init": + shutil.copy2(DEFAULT_CONFIG, Path.cwd()) LOGGER.info(f""" - {colorstr("YOLO:")} configuration saved to {cwd / DEFAULT_CONFIG.name}. + {colorstr("YOLO:")} configuration saved to {Path.cwd() / DEFAULT_CONFIG.name}. To run experiments using custom configuration: yolo task='task' mode='mode' --config-name config_file.yaml """) return - elif task == "detect": - module = yolo.v8.detect - elif task == "segment": - module = yolo.v8.segment - elif task == "classify": - module = yolo.v8.classify - elif task == "export": - func = yolo.engine.exporter.export - else: - raise SyntaxError("task not recognized. Choices are `'detect', 'segment', 'classify'`") - - if mode == "train": - func = module.train - elif mode == "val": - func = module.val - elif mode == "predict": - func = module.predict - elif mode == "export": - func = yolo.engine.exporter.export - else: - raise SyntaxError("mode not recognized. Choices are `'train', 'val', 'predict', 'export'`") + # Mapping from task to module + task_module_map = {"detect": yolo.v8.detect, "segment": yolo.v8.segment, "classify": yolo.v8.classify} + module = task_module_map.get(task) + if not module: + raise SyntaxError(f"task not recognized. Choices are {', '.join(task_module_map.keys())}") + + # Mapping from mode to function + mode_func_map = { + "train": module.train, + "val": module.val, + "predict": module.predict, + "export": yolo.engine.exporter.export, + "checks": hub.checks} + func = mode_func_map.get(mode) + if not func: + raise SyntaxError(f"mode not recognized. Choices are {', '.join(mode_func_map.keys())}") + func(cfg) diff --git a/ultralytics/yolo/configs/default.yaml b/ultralytics/yolo/configs/default.yaml index 96b9c49267..47d165920d 100644 --- a/ultralytics/yolo/configs/default.yaml +++ b/ultralytics/yolo/configs/default.yaml @@ -8,6 +8,7 @@ mode: "train" # choices=['train', 'val', 'predict'] # mode to run task in. model: null # i.e. yolov5s.pt, yolo.yaml. Path to model file data: null # i.e. coco128.yaml. Path to data file epochs: 100 # number of epochs to train for +patience: 50 # TODO: epochs to wait for no observable improvement for early stopping of training batch_size: 16 # number of images per batch imgsz: 640 # size of input images save: True # save checkpoints diff --git a/ultralytics/yolo/engine/exporter.py b/ultralytics/yolo/engine/exporter.py index efd12521c0..71e9cb8fba 100644 --- a/ultralytics/yolo/engine/exporter.py +++ b/ultralytics/yolo/engine/exporter.py @@ -71,8 +71,7 @@ from ultralytics.nn.tasks import ClassificationModel, DetectionModel, Segmentati from ultralytics.yolo.configs import get_config from ultralytics.yolo.data.dataloaders.stream_loaders import LoadImages from ultralytics.yolo.data.utils import check_dataset -from ultralytics.yolo.utils import DEFAULT_CONFIG, LOGGER, colorstr, get_default_args, yaml_save -from ultralytics.yolo.utils.callbacks import default_callbacks +from ultralytics.yolo.utils import DEFAULT_CONFIG, LOGGER, callbacks, colorstr, get_default_args, yaml_save from ultralytics.yolo.utils.checks import check_imgsz, check_requirements, check_version, check_yaml from ultralytics.yolo.utils.files import file_size, increment_path from ultralytics.yolo.utils.ops import Profile @@ -138,16 +137,15 @@ class Exporter: """ if overrides is None: overrides = {} + if 'batch_size' not in overrides: + overrides['batch_size'] = 1 # set default export batch size self.args = get_config(config, overrides) project = self.args.project or f"runs/{self.args.task}" name = self.args.name or "exp" # hardcode mode as export doesn't require it self.save_dir = increment_path(Path(project) / name, exist_ok=self.args.exist_ok) self.save_dir.mkdir(parents=True, exist_ok=True) - - # callbacks - self.callbacks = defaultdict([]) - for callback, func in default_callbacks.items(): - self.add_callback(callback, func) + self.callbacks = defaultdict(list, {k: [v] for k, v in callbacks.default_callbacks.items()}) # add callbacks + callbacks.add_integration_callbacks(self) @smart_inference_mode() def __call__(self, model=None): @@ -173,7 +171,6 @@ class Exporter: assert self.device.type == 'cpu', '--optimize not compatible with cuda devices, i.e. use --device cpu' # Input - self.args.batch_size = 1 # TODO: resolve this issue, default 16 not fit for export im = torch.zeros(self.args.batch_size, 3, *self.imgsz).to(self.device) file = Path(getattr(model, 'yaml_file', None) or Path(model.yaml['yaml_file']).name) @@ -765,18 +762,6 @@ class Exporter: LOGGER.info(f'{prefix} pipeline success') return model - def add_callback(self, event: str, callback): - """ - appends the given callback - """ - self.callbacks[event].append(callback) - - def set_callback(self, event: str, callback): - """ - overrides the existing callbacks with the given callback - """ - self.callbacks[event] = [callback] - def run_callbacks(self, event: str): for callback in self.callbacks.get(event, []): callback(self) diff --git a/ultralytics/yolo/engine/predictor.py b/ultralytics/yolo/engine/predictor.py index 8dd685127c..42130e9408 100644 --- a/ultralytics/yolo/engine/predictor.py +++ b/ultralytics/yolo/engine/predictor.py @@ -35,8 +35,7 @@ from ultralytics.nn.autobackend import AutoBackend from ultralytics.yolo.configs import get_config from ultralytics.yolo.data.dataloaders.stream_loaders import LoadImages, LoadScreenshots, LoadStreams from ultralytics.yolo.data.utils import IMG_FORMATS, VID_FORMATS -from ultralytics.yolo.utils import DEFAULT_CONFIG, LOGGER, colorstr, ops -from ultralytics.yolo.utils.callbacks import default_callbacks +from ultralytics.yolo.utils import DEFAULT_CONFIG, LOGGER, callbacks, colorstr, ops from ultralytics.yolo.utils.checks import check_file, check_imgsz, check_imshow from ultralytics.yolo.utils.files import increment_path from ultralytics.yolo.utils.torch_utils import select_device, smart_inference_mode @@ -90,11 +89,8 @@ class BasePredictor: self.view_img = None self.annotator = None self.data_path = None - - # callbacks - self.callbacks = defaultdict([]) - for callback, func in default_callbacks.items(): - self.add_callback(callback, func) + self.callbacks = defaultdict(list, {k: [v] for k, v in callbacks.default_callbacks.items()}) # add callbacks + callbacks.add_integration_callbacks(self) def preprocess(self, img): pass @@ -227,18 +223,6 @@ class BasePredictor: self.vid_writer[idx] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) self.vid_writer[idx].write(im0) - def add_callback(self, event: str, callback): - """ - appends the given callback - """ - self.callbacks[event].append(callback) - - def set_callback(self, event: str, callback): - """ - overrides the existing callbacks with the given callback - """ - self.callbacks[event] = [callback] - def run_callbacks(self, event: str): for callback in self.callbacks.get(event, []): callback(self) diff --git a/ultralytics/yolo/engine/trainer.py b/ultralytics/yolo/engine/trainer.py index 4c7915d120..ba144a9ebf 100644 --- a/ultralytics/yolo/engine/trainer.py +++ b/ultralytics/yolo/engine/trainer.py @@ -21,11 +21,10 @@ from torch.optim import lr_scheduler from tqdm import tqdm import ultralytics.yolo.utils as utils -import ultralytics.yolo.utils.callbacks as callbacks from ultralytics import __version__ from ultralytics.yolo.configs import get_config from ultralytics.yolo.data.utils import check_dataset, check_dataset_yaml -from ultralytics.yolo.utils import DEFAULT_CONFIG, LOGGER, RANK, TQDM_BAR_FORMAT, colorstr, yaml_save +from ultralytics.yolo.utils import DEFAULT_CONFIG, LOGGER, RANK, TQDM_BAR_FORMAT, callbacks, colorstr, yaml_save from ultralytics.yolo.utils.checks import check_file, print_args from ultralytics.yolo.utils.dist import ddp_cleanup, generate_ddp_command from ultralytics.yolo.utils.files import get_latest_run, increment_path @@ -88,7 +87,7 @@ class BaseTrainer: self.model = None self.callbacks = defaultdict(list) - # dirs + # Dirs project = self.args.project or f"runs/{self.args.task}" name = self.args.name or f"{self.args.mode}" self.save_dir = increment_path(Path(project) / name, exist_ok=self.args.exist_ok if RANK in {-1, 0} else True) @@ -104,7 +103,7 @@ class BaseTrainer: if RANK == -1: print_args(dict(self.args)) - # device + # Device self.device = utils.torch_utils.select_device(self.args.device, self.batch_size) self.amp = self.device.type != 'cpu' self.scaler = amp.GradScaler(enabled=self.amp) @@ -123,7 +122,7 @@ class BaseTrainer: self.lf = None self.scheduler = None - # epoch level metrics + # Epoch level metrics self.best_fitness = None self.fitness = None self.loss = None @@ -131,20 +130,20 @@ class BaseTrainer: self.loss_names = None self.csv = self.save_dir / 'results.csv' - for callback, func in callbacks.default_callbacks.items(): - self.add_callback(callback, func) + # Callbacks + self.callbacks = defaultdict(list, {k: [v] for k, v in callbacks.default_callbacks.items()}) # add callbacks if RANK in {0, -1}: callbacks.add_integration_callbacks(self) def add_callback(self, event: str, callback): """ - appends the given callback + Appends the given callback. TODO: unused, consider removing """ self.callbacks[event].append(callback) def set_callback(self, event: str, callback): """ - overrides the existing callbacks with the given callback + Overrides the existing callbacks with the given callback. TODO: unused, consider removing """ self.callbacks[event] = [callback] @@ -469,7 +468,7 @@ class BaseTrainer: self.validator.args.save_json = True self.metrics = self.validator(model=f) self.metrics.pop('fitness', None) - self.run_callbacks('on_val_end') + self.run_callbacks('on_fit_epoch_end') def check_resume(self): resume = self.args.resume diff --git a/ultralytics/yolo/engine/validator.py b/ultralytics/yolo/engine/validator.py index dde017b14d..d4f36b5c52 100644 --- a/ultralytics/yolo/engine/validator.py +++ b/ultralytics/yolo/engine/validator.py @@ -8,8 +8,7 @@ from tqdm import tqdm from ultralytics.nn.autobackend import AutoBackend from ultralytics.yolo.data.utils import check_dataset, check_dataset_yaml -from ultralytics.yolo.utils import DEFAULT_CONFIG, LOGGER, RANK, TQDM_BAR_FORMAT -from ultralytics.yolo.utils.callbacks import default_callbacks +from ultralytics.yolo.utils import DEFAULT_CONFIG, LOGGER, RANK, TQDM_BAR_FORMAT, callbacks from ultralytics.yolo.utils.checks import check_imgsz from ultralytics.yolo.utils.files import increment_path from ultralytics.yolo.utils.ops import Profile @@ -66,10 +65,7 @@ class BaseValidator: exist_ok=self.args.exist_ok if RANK in {-1, 0} else True) (self.save_dir / 'labels' if self.args.save_txt else self.save_dir).mkdir(parents=True, exist_ok=True) - # callbacks - self.callbacks = defaultdict(list) - for callback, func in default_callbacks.items(): - self.add_callback(callback, func) + self.callbacks = defaultdict(list, {k: [v] for k, v in callbacks.default_callbacks.items()}) # add callbacks @smart_inference_mode() def __call__(self, trainer=None, model=None): @@ -77,7 +73,6 @@ class BaseValidator: Supports validation of a pre-trained model if passed or a model being trained if trainer is passed (trainer gets priority). """ - self.run_callbacks('on_val_start') self.training = trainer is not None if self.training: self.device = trainer.device @@ -89,6 +84,8 @@ class BaseValidator: self.loss = torch.zeros_like(trainer.loss_items, device=trainer.device) self.args.plots = trainer.epoch == trainer.epochs - 1 # always plot final epoch else: + callbacks.add_integration_callbacks(self) + self.run_callbacks('on_val_start') assert model is not None, "Either trainer or model is needed for validation" self.device = select_device(self.args.device, self.args.batch_size) self.args.half &= self.device.type != 'cpu' @@ -167,18 +164,6 @@ class BaseValidator: stats = self.eval_json(stats) # update stats return stats - def add_callback(self, event: str, callback): - """ - appends the given callback - """ - self.callbacks[event].append(callback) - - def set_callback(self, event: str, callback): - """ - overrides the existing callbacks with the given callback - """ - self.callbacks[event] = [callback] - def run_callbacks(self, event: str): for callback in self.callbacks.get(event, []): callback(self) diff --git a/ultralytics/yolo/utils/__init__.py b/ultralytics/yolo/utils/__init__.py index fb6bb36d85..9e6fd3e8b1 100644 --- a/ultralytics/yolo/utils/__init__.py +++ b/ultralytics/yolo/utils/__init__.py @@ -249,26 +249,6 @@ def threaded(func): return wrapper -def get_settings(file=USER_CONFIG_DIR / 'settings.yaml'): - """ - Function that loads a global settings YAML, or creates it and populates it with default values if it does not exist. - - If the datasets or weights directories are set to None, the current working directory will be used. - The 'sync' setting determines whether analytics will be synced to help with YOLO development. - """ - from ultralytics.yolo.utils.torch_utils import torch_distributed_zero_first - - with torch_distributed_zero_first(RANK): - if not file.exists(): - settings = { - 'datasets_dir': None, # default datasets directory. If None, current working directory is used. - 'weights_dir': None, # default weights directory. If None, current working directory is used. - 'sync': True} # sync analytics to help with YOLO development - yaml_save(file, settings) - - return yaml_load(file) - - def yaml_save(file='data.yaml', data=None): """ Save YAML data to a file. @@ -305,6 +285,26 @@ def yaml_load(file='data.yaml'): return {**yaml.safe_load(f), 'yaml_file': file} +def get_settings(file=USER_CONFIG_DIR / 'settings.yaml'): + """ + Function that loads a global settings YAML, or creates it and populates it with default values if it does not exist. + + If the datasets or weights directories are set to None, the current working directory will be used. + The 'sync' setting determines whether analytics will be synced to help with YOLO development. + """ + from ultralytics.yolo.utils.torch_utils import torch_distributed_zero_first + + with torch_distributed_zero_first(RANK): + if not file.exists(): + settings = { + 'datasets_dir': None, # default datasets directory. If None, current working directory is used. + 'weights_dir': None, # default weights directory. If None, current working directory is used. + 'sync': True} # sync analytics to help with YOLO development + yaml_save(file, settings) + + return yaml_load(file) + + # Run below code on utils init ----------------------------------------------------------------------------------------- # Set logger diff --git a/ultralytics/yolo/utils/callbacks/base.py b/ultralytics/yolo/utils/callbacks/base.py index 7cdb11476c..cd12b3cdc7 100644 --- a/ultralytics/yolo/utils/callbacks/base.py +++ b/ultralytics/yolo/utils/callbacks/base.py @@ -135,11 +135,12 @@ default_callbacks = { 'on_export_end': on_export_end} -def add_integration_callbacks(trainer): +def add_integration_callbacks(instance): from .clearml import callbacks as clearml_callbacks - from .tb import callbacks as tb_callbacks + from .hub import callbacks as hub_callbacks + from .tensorboard import callbacks as tb_callbacks from .wb import callbacks as wb_callbacks - for x in clearml_callbacks, tb_callbacks, wb_callbacks: + for x in clearml_callbacks, hub_callbacks, tb_callbacks, wb_callbacks: for k, v in x.items(): - trainer.add_callback(k, v) # add_callback(name, func) + instance.callbacks[k].append(v) # callback[name].append(func) diff --git a/ultralytics/yolo/utils/callbacks/hub.py b/ultralytics/yolo/utils/callbacks/hub.py new file mode 100644 index 0000000000..a26074e6bb --- /dev/null +++ b/ultralytics/yolo/utils/callbacks/hub.py @@ -0,0 +1,80 @@ +import json +from time import time + +import torch + +from ultralytics.hub.utils import PREFIX, sync_analytics +from ultralytics.yolo.utils import LOGGER + + +def on_pretrain_routine_end(trainer): + session = getattr(trainer, 'hub_session', None) + if session: + # Start timer for upload rate limit + LOGGER.info(f"{PREFIX}View model at https://hub.ultralytics.com/models/{session.model_id} 🚀") + session.t = {'metrics': time(), 'ckpt': time()} # start timer on self.rate_limit + + +def on_fit_epoch_end(trainer): + session = getattr(trainer, 'hub_session', None) + if session: + # Upload metrics after val end + metrics = trainer.metrics + for k, v in metrics.items(): + if isinstance(v, torch.Tensor): + metrics[k] = v.item() + + session.metrics_queue[trainer.epoch] = json.dumps(metrics) # json string + if time() - session.t['metrics'] > session.rate_limits['metrics']: + session.upload_metrics() + session.t['metrics'] = time() # reset timer + session.metrics_queue = {} # reset queue + + +def on_model_save(trainer): + session = getattr(trainer, 'hub_session', None) + if session: + # Upload checkpoints with rate limiting + is_best = trainer.best_fitness == trainer.fitness + if time() - session.t['ckpt'] > session.rate_limits['ckpt']: + LOGGER.info(f"{PREFIX}Uploading checkpoint {session.model_id}") + session.upload_model(trainer.epoch, trainer.last, is_best) + session.t['ckpt'] = time() # reset timer + + +def on_train_end(trainer): + session = getattr(trainer, 'hub_session', None) + if session: + # Upload final model and metrics with exponential standoff + LOGGER.info(f"{PREFIX}Training completed successfully ✅\n" + f"{PREFIX}Uploading final {session.model_id}") + session.upload_model(trainer.epoch, trainer.best, map=trainer.metrics['metrics/mAP50(B)'], final=True) + session.alive = False # stop heartbeats + LOGGER.info(f"{PREFIX}View model at https://hub.ultralytics.com/models/{session.model_id} 🚀") + + +def on_train_start(trainer): + sync_analytics(trainer.args) + + +def on_val_start(validator): + sync_analytics(validator.args) + + +def on_predict_start(predictor): + sync_analytics(predictor.args) + + +def on_export_start(exporter): + sync_analytics(exporter.args) + + +callbacks = { + "on_pretrain_routine_end": on_pretrain_routine_end, + "on_fit_epoch_end": on_fit_epoch_end, + "on_model_save": on_model_save, + "on_train_end": on_train_end, + "on_train_start": on_train_start, + "on_val_start": on_val_start, + "on_predict_start": on_predict_start, + "on_export_start": on_export_start} diff --git a/ultralytics/yolo/utils/callbacks/tb.py b/ultralytics/yolo/utils/callbacks/tensorboard.py similarity index 100% rename from ultralytics/yolo/utils/callbacks/tb.py rename to ultralytics/yolo/utils/callbacks/tensorboard.py