From 7638c5ce4daa09a77fbd731f53a77eb4076ea5d4 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Mon, 11 Mar 2024 18:50:29 +0100 Subject: [PATCH] Integrate OpenVINO `CUMULATIVE_THROUGHPUT` mode batched inference (#8834) Signed-off-by: Glenn Jocher --- ultralytics/engine/predictor.py | 3 ++- ultralytics/engine/validator.py | 2 +- ultralytics/nn/autobackend.py | 8 ++++++-- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/ultralytics/engine/predictor.py b/ultralytics/engine/predictor.py index 24babc73b..a9244fe73 100644 --- a/ultralytics/engine/predictor.py +++ b/ultralytics/engine/predictor.py @@ -294,11 +294,12 @@ class BasePredictor: def setup_model(self, model, verbose=True): """Initialize YOLO model with given parameters and set it to evaluation mode.""" self.model = AutoBackend( - model or self.args.model, + weights=model or self.args.model, device=select_device(self.args.device, verbose=verbose), dnn=self.args.dnn, data=self.args.data, fp16=self.args.half, + batch=self.args.batch, fuse=True, verbose=verbose, ) diff --git a/ultralytics/engine/validator.py b/ultralytics/engine/validator.py index 41be54c19..17666e385 100644 --- a/ultralytics/engine/validator.py +++ b/ultralytics/engine/validator.py @@ -122,7 +122,7 @@ class BaseValidator: else: callbacks.add_integration_callbacks(self) model = AutoBackend( - model or self.args.model, + weights=model or self.args.model, device=select_device(self.args.device, self.args.batch), dnn=self.args.dnn, data=self.args.data, diff --git a/ultralytics/nn/autobackend.py b/ultralytics/nn/autobackend.py index 95568ad15..bebfdbe02 100644 --- a/ultralytics/nn/autobackend.py +++ b/ultralytics/nn/autobackend.py @@ -86,6 +86,7 @@ class AutoBackend(nn.Module): dnn=False, data=None, fp16=False, + batch=1, fuse=True, verbose=True, ): @@ -98,6 +99,7 @@ class AutoBackend(nn.Module): dnn (bool): Use OpenCV DNN module for ONNX inference. Defaults to False. data (str | Path | optional): Path to the additional data.yaml file containing class names. Optional. fp16 (bool): Enable half-precision inference. Supported only on specific backends. Defaults to False. + batch (int): Batch-size to assume for inference. fuse (bool): Fuse Conv2D + BatchNorm layers for optimization. Defaults to True. verbose (bool): Enable verbose logging. Defaults to True. """ @@ -204,7 +206,9 @@ class AutoBackend(nn.Module): if batch_dim.is_static: batch_size = batch_dim.get_length() - inference_mode = "LATENCY" # either 'LATENCY', 'THROUGHPUT' (not recommended), or 'CUMULATIVE_THROUGHPUT' + # OpenVINO inference modes are 'LATENCY', 'THROUGHPUT' (not recommended), or 'CUMULATIVE_THROUGHPUT' + inference_mode = "CUMULATIVE_THROUGHPUT" if batch > 1 else "LATENCY" + LOGGER.info(f"Using OpenVINO {inference_mode} mode for batch-size={batch_size} inference...") ov_compiled_model = core.compile_model( ov_model, device_name="AUTO", # AUTO selects best available device, do not modify @@ -454,7 +458,7 @@ class AutoBackend(nn.Module): # Start async inference with userdata=i to specify the position in results list async_queue.start_async(inputs={self.input_name: im[i : i + 1]}, userdata=i) # keep image as BCHW async_queue.wait_all() # wait for all inference requests to complete - y = [list(r.values()) for r in results][0] + y = np.concatenate([list(r.values())[0] for r in results]) else: # inference_mode = "LATENCY", optimized for fastest first result at batch-size 1 y = list(self.ov_compiled_model(im).values())