From 25307552100e4c03c8fec7b0f7286b4244018e15 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sat, 28 Sep 2024 20:31:21 +0200 Subject: [PATCH] `ultralytics 8.2.103` Windows Benchmarks CI (#16523) Signed-off-by: UltralyticsAssistant Co-authored-by: UltralyticsAssistant --- .github/workflows/ci.yaml | 2 +- ultralytics/__init__.py | 2 +- ultralytics/utils/benchmarks.py | 169 +++++++++++++++++++------------- 3 files changed, 105 insertions(+), 68 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 5501283260..0ba1cee566 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -98,7 +98,7 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-latest, macos-14] + os: [ubuntu-latest, windows-latest, macos-14] python-version: ["3.11"] model: [yolov8n] steps: diff --git a/ultralytics/__init__.py b/ultralytics/__init__.py index 75f5097804..a91448fc9e 100644 --- a/ultralytics/__init__.py +++ b/ultralytics/__init__.py @@ -1,6 +1,6 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license -__version__ = "8.2.102" +__version__ = "8.2.103" import os diff --git a/ultralytics/utils/benchmarks.py b/ultralytics/utils/benchmarks.py index 53ad62c5fe..b28ebee542 100644 --- a/ultralytics/utils/benchmarks.py +++ b/ultralytics/utils/benchmarks.py @@ -43,36 +43,40 @@ from ultralytics.utils import ARM64, ASSETS, IS_JETSON, IS_RASPBERRYPI, LINUX, L from ultralytics.utils.checks import IS_PYTHON_3_12, check_requirements, check_yolo from ultralytics.utils.downloads import safe_download from ultralytics.utils.files import file_size -from ultralytics.utils.torch_utils import select_device +from ultralytics.utils.torch_utils import get_cpu_info, select_device def benchmark( - model=WEIGHTS_DIR / "yolov8n.pt", data=None, imgsz=160, half=False, int8=False, device="cpu", verbose=False + model=WEIGHTS_DIR / "yolov8n.pt", + data=None, + imgsz=160, + half=False, + int8=False, + device="cpu", + verbose=False, + eps=1e-3, ): """ Benchmark a YOLO model across different formats for speed and accuracy. Args: - model (str | Path | optional): Path to the model file or directory. Default is - Path(SETTINGS['weights_dir']) / 'yolov8n.pt'. - data (str, optional): Dataset to evaluate on, inherited from TASK2DATA if not passed. Default is None. - imgsz (int, optional): Image size for the benchmark. Default is 160. - half (bool, optional): Use half-precision for the model if True. Default is False. - int8 (bool, optional): Use int8-precision for the model if True. Default is False. - device (str, optional): Device to run the benchmark on, either 'cpu' or 'cuda'. Default is 'cpu'. - verbose (bool | float | optional): If True or a float, assert benchmarks pass with given metric. - Default is False. + model (str | Path): Path to the model file or directory. + data (str | None): Dataset to evaluate on, inherited from TASK2DATA if not passed. + imgsz (int): Image size for the benchmark. + half (bool): Use half-precision for the model if True. + int8 (bool): Use int8-precision for the model if True. + device (str): Device to run the benchmark on, either 'cpu' or 'cuda'. + verbose (bool | float): If True or a float, assert benchmarks pass with given metric. + eps (float): Epsilon value for divide by zero prevention. Returns: - df (pandas.DataFrame): A pandas DataFrame with benchmark results for each format, including file size, - metric, and inference time. + (pandas.DataFrame): A pandas DataFrame with benchmark results for each format, including file size, metric, + and inference time. - Example: - ```python - from ultralytics.utils.benchmarks import benchmark - - benchmark(model="yolov8n.pt", imgsz=640) - ``` + Examples: + Benchmark a YOLO model with default settings: + >>> from ultralytics.utils.benchmarks import benchmark + >>> benchmark(model="yolov8n.pt", imgsz=640) """ import pandas as pd # scope for faster 'import ultralytics' @@ -106,6 +110,7 @@ def benchmark( if i in {11}: # Paddle assert not isinstance(model, YOLOWorld), "YOLOWorldv2 Paddle exports not supported yet" assert not is_end2end, "End-to-end models not supported by PaddlePaddle yet" + assert LINUX or MACOS, "Windows Paddle exports not supported yet" if i in {12}: # NCNN assert not isinstance(model, YOLOWorld), "YOLOWorldv2 NCNN exports not supported yet" if "cpu" in device.type: @@ -138,7 +143,7 @@ def benchmark( data=data, batch=1, imgsz=imgsz, plots=False, device=device, half=half, int8=int8, verbose=False ) metric, speed = results.results_dict[key], results.speed["inference"] - fps = round((1000 / speed), 2) # frames per second + fps = round(1000 / (speed + eps), 2) # frames per second y.append([name, "✅", round(file_size(filename), 1), round(metric, 4), round(speed, 2), fps]) except Exception as e: if verbose: @@ -165,10 +170,10 @@ def benchmark( class RF100Benchmark: - """Benchmark YOLO model performance across formats for speed and accuracy.""" + """Benchmark YOLO model performance across various formats for speed and accuracy.""" def __init__(self): - """Function for initialization of RF100Benchmark.""" + """Initialize the RF100Benchmark class for benchmarking YOLO model performance across various formats.""" self.ds_names = [] self.ds_cfg_list = [] self.rf = None @@ -180,6 +185,11 @@ class RF100Benchmark: Args: api_key (str): The API key. + + Examples: + Set the Roboflow API key for accessing datasets: + >>> benchmark = RF100Benchmark() + >>> benchmark.set_key("your_roboflow_api_key") """ check_requirements("roboflow") from roboflow import Roboflow @@ -188,10 +198,15 @@ class RF100Benchmark: def parse_dataset(self, ds_link_txt="datasets_links.txt"): """ - Parse dataset links and downloads datasets. + Parse dataset links and download datasets. Args: - ds_link_txt (str): Path to dataset_links file. + ds_link_txt (str): Path to the file containing dataset links. + + Examples: + >>> benchmark = RF100Benchmark() + >>> benchmark.set_key("api_key") + >>> benchmark.parse_dataset("datasets_links.txt") """ (shutil.rmtree("rf-100"), os.mkdir("rf-100")) if os.path.exists("rf-100") else os.mkdir("rf-100") os.chdir("rf-100") @@ -217,10 +232,13 @@ class RF100Benchmark: @staticmethod def fix_yaml(path): """ - Function to fix YAML train and val path. + Fixes the train and validation paths in a given YAML file. Args: - path (str): YAML file path. + path (str): Path to the YAML file to be fixed. + + Examples: + >>> RF100Benchmark.fix_yaml("path/to/data.yaml") """ with open(path) as file: yaml_data = yaml.safe_load(file) @@ -231,13 +249,21 @@ class RF100Benchmark: def evaluate(self, yaml_path, val_log_file, eval_log_file, list_ind): """ - Model evaluation on validation results. + Evaluate model performance on validation results. Args: - yaml_path (str): YAML file path. - val_log_file (str): val_log_file path. - eval_log_file (str): eval_log_file path. - list_ind (int): Index for current dataset. + yaml_path (str): Path to the YAML configuration file. + val_log_file (str): Path to the validation log file. + eval_log_file (str): Path to the evaluation log file. + list_ind (int): Index of the current dataset in the list. + + Returns: + (float): The mean average precision (mAP) value for the evaluated model. + + Examples: + Evaluate a model on a specific dataset + >>> benchmark = RF100Benchmark() + >>> benchmark.evaluate("path/to/data.yaml", "path/to/val_log.txt", "path/to/eval_log.txt", 0) """ skip_symbols = ["🚀", "⚠️", "💡", "❌"] with open(yaml_path) as stream: @@ -285,21 +311,23 @@ class ProfileModels: This class profiles the performance of different models, returning results such as model speed and FLOPs. Attributes: - paths (list): Paths of the models to profile. - num_timed_runs (int): Number of timed runs for the profiling. Default is 100. - num_warmup_runs (int): Number of warmup runs before profiling. Default is 10. - min_time (float): Minimum number of seconds to profile for. Default is 60. - imgsz (int): Image size used in the models. Default is 640. + paths (List[str]): Paths of the models to profile. + num_timed_runs (int): Number of timed runs for the profiling. + num_warmup_runs (int): Number of warmup runs before profiling. + min_time (float): Minimum number of seconds to profile for. + imgsz (int): Image size used in the models. + half (bool): Flag to indicate whether to use FP16 half-precision for TensorRT profiling. + trt (bool): Flag to indicate whether to profile using TensorRT. + device (torch.device): Device used for profiling. Methods: - profile(): Profiles the models and prints the result. + profile: Profiles the models and prints the result. - Example: - ```python - from ultralytics.utils.benchmarks import ProfileModels - - ProfileModels(["yolov8n.yaml", "yolov8s.yaml"], imgsz=640).profile() - ``` + Examples: + Profile models and print results + >>> from ultralytics.utils.benchmarks import ProfileModels + >>> profiler = ProfileModels(["yolov8n.yaml", "yolov8s.yaml"], imgsz=640) + >>> profiler.profile() """ def __init__( @@ -317,17 +345,23 @@ class ProfileModels: Initialize the ProfileModels class for profiling models. Args: - paths (list): List of paths of the models to be profiled. - num_timed_runs (int, optional): Number of timed runs for the profiling. Default is 100. - num_warmup_runs (int, optional): Number of warmup runs before the actual profiling starts. Default is 10. - min_time (float, optional): Minimum time in seconds for profiling a model. Default is 60. - imgsz (int, optional): Size of the image used during profiling. Default is 640. - half (bool, optional): Flag to indicate whether to use FP16 half-precision for TensorRT profiling. - trt (bool, optional): Flag to indicate whether to profile using TensorRT. Default is True. - device (torch.device, optional): Device used for profiling. If None, it is determined automatically. + paths (List[str]): List of paths of the models to be profiled. + num_timed_runs (int): Number of timed runs for the profiling. + num_warmup_runs (int): Number of warmup runs before the actual profiling starts. + min_time (float): Minimum time in seconds for profiling a model. + imgsz (int): Size of the image used during profiling. + half (bool): Flag to indicate whether to use FP16 half-precision for TensorRT profiling. + trt (bool): Flag to indicate whether to profile using TensorRT. + device (torch.device | None): Device used for profiling. If None, it is determined automatically. Notes: - FP16 'half' argument option removed for ONNX as slower on CPU than FP32 + FP16 'half' argument option removed for ONNX as slower on CPU than FP32. + + Examples: + Initialize and profile models + >>> from ultralytics.utils.benchmarks import ProfileModels + >>> profiler = ProfileModels(["yolov8n.yaml", "yolov8s.yaml"], imgsz=640) + >>> profiler.profile() """ self.paths = paths self.num_timed_runs = num_timed_runs @@ -339,7 +373,7 @@ class ProfileModels: self.device = device or torch.device(0 if torch.cuda.is_available() else "cpu") def profile(self): - """Logs the benchmarking results of a model, checks metrics against floor and returns the results.""" + """Profiles YOLO models for speed and accuracy across various formats including ONNX and TensorRT.""" files = self.get_files() if not files: @@ -404,7 +438,7 @@ class ProfileModels: @staticmethod def iterative_sigma_clipping(data, sigma=2, max_iters=3): - """Applies an iterative sigma clipping algorithm to the given data times number of iterations.""" + """Applies iterative sigma clipping to data to remove outliers based on specified sigma and iteration count.""" data = np.array(data) for _ in range(max_iters): mean, std = np.mean(data), np.std(data) @@ -415,7 +449,7 @@ class ProfileModels: return data def profile_tensorrt_model(self, engine_file: str, eps: float = 1e-3): - """Profiles the TensorRT model, measuring average run time and standard deviation among runs.""" + """Profiles YOLO model performance with TensorRT, measuring average run time and standard deviation.""" if not self.trt or not Path(engine_file).is_file(): return 0.0, 0.0 @@ -499,7 +533,7 @@ class ProfileModels: return np.mean(run_times), np.std(run_times) def generate_table_row(self, model_name, t_onnx, t_engine, model_info): - """Generates a formatted string for a table row that includes model performance and metric details.""" + """Generates a table row string with model performance metrics including inference times and model details.""" layers, params, gradients, flops = model_info return ( f"| {model_name:18s} | {self.imgsz} | - | {t_onnx[0]:.2f} ± {t_onnx[1]:.2f} ms | {t_engine[0]:.2f} ± " @@ -508,7 +542,7 @@ class ProfileModels: @staticmethod def generate_results_dict(model_name, t_onnx, t_engine, model_info): - """Generates a dictionary of model details including name, parameters, GFLOPS and speed metrics.""" + """Generates a dictionary of profiling results including model name, parameters, GFLOPs, and speed metrics.""" layers, params, gradients, flops = model_info return { "model/name": model_name, @@ -520,16 +554,19 @@ class ProfileModels: @staticmethod def print_table(table_rows): - """Formats and prints a comparison table for different models with given statistics and performance data.""" + """Prints a formatted table of model profiling results, including speed and accuracy metrics.""" gpu = torch.cuda.get_device_name(0) if torch.cuda.is_available() else "GPU" - header = ( - f"| Model | size
(pixels) | mAPval
50-95 | Speed
CPU ONNX
(ms) | " - f"Speed
{gpu} TensorRT
(ms) | params
(M) | FLOPs
(B) |" - ) - separator = ( - "|-------------|---------------------|--------------------|------------------------------|" - "-----------------------------------|------------------|-----------------|" - ) + headers = [ + "Model", + "size
(pixels)", + "mAPval
50-95", + f"Speed
CPU ({get_cpu_info()}) ONNX
(ms)", + f"Speed
{gpu} TensorRT
(ms)", + "params
(M)", + "FLOPs
(B)", + ] + header = "|" + "|".join(f" {h} " for h in headers) + "|" + separator = "|" + "|".join("-" * (len(h) + 2) for h in headers) + "|" print(f"\n\n{header}") print(separator)