From ca5e9daed1b27f6b74c86ce74f9d4432e8b51741 Mon Sep 17 00:00:00 2001 From: Mohammed Yasin <32206511+Y-T-G@users.noreply.github.com> Date: Tue, 29 Oct 2024 20:57:07 +0800 Subject: [PATCH] Faster ONNX inference with bindings (#17184) Co-authored-by: UltralyticsAssistant Co-authored-by: Burhan <62214284+Burhan-Q@users.noreply.github.com> Co-authored-by: Ultralytics Assistant <135830346+UltralyticsAssistant@users.noreply.github.com> Co-authored-by: Glenn Jocher --- ultralytics/nn/autobackend.py | 42 ++++++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/ultralytics/nn/autobackend.py b/ultralytics/nn/autobackend.py index 9e6d38b49..b9312fefd 100644 --- a/ultralytics/nn/autobackend.py +++ b/ultralytics/nn/autobackend.py @@ -189,10 +189,32 @@ class AutoBackend(nn.Module): check_requirements("numpy==1.23.5") import onnxruntime - providers = ["CUDAExecutionProvider", "CPUExecutionProvider"] if cuda else ["CPUExecutionProvider"] + providers = onnxruntime.get_available_providers() + if not cuda and "CUDAExecutionProvider" in providers: + providers.remove("CUDAExecutionProvider") + elif cuda and "CUDAExecutionProvider" not in providers: + LOGGER.warning("WARNING ⚠️ Failed to start ONNX Runtime session with CUDA. Falling back to CPU...") + device = torch.device("cpu") + cuda = False + LOGGER.info(f"Preferring ONNX Runtime {providers[0]}") session = onnxruntime.InferenceSession(w, providers=providers) output_names = [x.name for x in session.get_outputs()] metadata = session.get_modelmeta().custom_metadata_map + dynamic = isinstance(session.get_outputs()[0].shape[0], str) + if not dynamic: + io = session.io_binding() + bindings = [] + for output in session.get_outputs(): + y_tensor = torch.empty(output.shape, dtype=torch.float16 if fp16 else torch.float32).to(device) + io.bind_output( + name=output.name, + device_type=device.type, + device_id=device.index if cuda else 0, + element_type=np.float16 if fp16 else np.float32, + shape=tuple(y_tensor.shape), + buffer_ptr=y_tensor.data_ptr(), + ) + bindings.append(y_tensor) # OpenVINO elif xml: @@ -477,8 +499,22 @@ class AutoBackend(nn.Module): # ONNX Runtime elif self.onnx: - im = im.cpu().numpy() # torch to numpy - y = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im}) + if self.dynamic: + im = im.cpu().numpy() # torch to numpy + y = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im}) + else: + if not self.cuda: + im = im.cpu() + self.io.bind_input( + name="images", + device_type=im.device.type, + device_id=im.device.index if im.device.type == "cuda" else 0, + element_type=np.float16 if self.fp16 else np.float32, + shape=tuple(im.shape), + buffer_ptr=im.data_ptr(), + ) + self.session.run_with_iobinding(self.io) + y = self.bindings # OpenVINO elif self.xml: