From 8fd9a1a048b83975566ed82de9d481e015c72ad7 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Wed, 6 Sep 2023 22:00:04 +0200
Subject: [PATCH] `ultralytics 8.0.172` faster `LetterBox()` and Classify Tune
 fix (#4766)

Co-authored-by: BardJun <70683507+jy1002@users.noreply.github.com>
---
 docs/guides/sahi-tiled-inference.md     | 64 ++++++++++++++++++++-----
 tests/test_cuda.py                      |  3 +-
 ultralytics/__init__.py                 |  2 +-
 ultralytics/engine/predictor.py         |  4 +-
 ultralytics/models/rtdetr/predict.py    |  3 +-
 ultralytics/models/sam/predict.py       | 13 ++---
 ultralytics/models/yolo/classify/val.py |  1 +
 ultralytics/utils/callbacks/base.py     |  2 +-
 8 files changed, 67 insertions(+), 25 deletions(-)
diff --git a/docs/guides/sahi-tiled-inference.md b/docs/guides/sahi-tiled-inference.md
index a4f2f5a0..abed5bde 100644
--- a/docs/guides/sahi-tiled-inference.md
+++ b/docs/guides/sahi-tiled-inference.md
@@ -6,27 +6,44 @@ keywords: YOLOv8, SAHI, Sliced Inference, Object Detection, Ultralytics, Large S
 
 # Ultralytics Docs: Using YOLOv8 with SAHI for Sliced Inference
 
-Welcome to the Ultralytics documentation on how to use YOLOv8 with SAHI (Slicing Aided Hyper Inference). In this comprehensive guide, we'll discuss what SAHI is, the benefits of sliced inference, and how to use SAHI with YOLOv8 for object detection tasks.
+Welcome to the Ultralytics documentation on how to use YOLOv8 with [SAHI](https://github.com/obss/sahi) (Slicing Aided Hyper Inference). This comprehensive guide aims to furnish you with all the essential knowledge you'll need to implement SAHI alongside YOLOv8. We'll deep-dive into what SAHI is, why sliced inference is critical for large-scale applications, and how to integrate these functionalities with YOLOv8 for enhanced object detection performance.
 
-![SAHI Sliced Inference](https://raw.githubusercontent.com/obss/sahi/main/resources/sliced_inference.gif)
+<p align="center">
+  <img width="1024" src="https://raw.githubusercontent.com/obss/sahi/main/resources/sliced_inference.gif" alt="SAHI Sliced Inference Overview">
+</p>
 
-## Table of Contents
+## Introduction to SAHI
 
-1. [Introduction to SAHI](#introduction-to-sahi)
-2. [What is Sliced Inference?](#what-is-sliced-inference)
-3. [Installation and Preparation](#installation-and-preparation)
-4. [Standard Inference with YOLOv8](#standard-inference-with-yolov8)
-5. [Sliced Inference with YOLOv8](#sliced-inference-with-yolov8)
-6. [Handling Prediction Results](#handling-prediction-results)
-7. [Batch Prediction](#batch-prediction)
+SAHI (Slicing Aided Hyper Inference) is an innovative library designed to optimize object detection algorithms for large-scale and high-resolution imagery. Its core functionality lies in partitioning images into manageable slices, running object detection on each slice, and then stitching the results back together. SAHI is compatible with a range of object detection models, including the YOLO series, thereby offering flexibility while ensuring optimized use of computational resources.
 
-## Introduction to SAHI
+### Key Features of SAHI
 
-SAHI is a powerful library aimed at performing efficient and accurate object detection over slices of an image, particularly useful for large scale and high-resolution imagery. It integrates seamlessly with YOLO models and allows for a more efficient usage of computational resources.
+- **Seamless Integration**: SAHI integrates effortlessly with YOLO models, meaning you can start slicing and detecting without a lot of code modification.
+- **Resource Efficiency**: By breaking down large images into smaller parts, SAHI optimizes the memory usage, allowing you to run high-quality detection on hardware with limited resources.
+- **High Accuracy**: SAHI maintains the detection accuracy by employing smart algorithms to merge overlapping detection boxes during the stitching process.
 
 ## What is Sliced Inference?
 
-Sliced Inference is a technique that divides a large image into smaller slices, performs object detection on each slice, and then aggregates the results back onto the original image. This method is especially beneficial when dealing with high-resolution images as it significantly reduces the computational load without sacrificing detection accuracy.
+Sliced Inference refers to the practice of subdividing a large or high-resolution image into smaller segments (slices), conducting object detection on these slices, and then recompiling the slices to reconstruct the object locations on the original image. This technique is invaluable in scenarios where computational resources are limited or when working with extremely high-resolution images that could otherwise lead to memory issues.
+
+### Benefits of Sliced Inference
+
+- **Reduced Computational Burden**: Smaller image slices are faster to process, and they consume less memory, enabling smoother operation on lower-end hardware.
+
+- **Preserved Detection Quality**: Since each slice is treated independently, there is no reduction in the quality of object detection, provided the slices are large enough to capture the objects of interest.
+
+- **Enhanced Scalability**: The technique allows for object detection to be more easily scaled across different sizes and resolutions of images, making it ideal for a wide range of applications from satellite imagery to medical diagnostics.
+
+<table border="0">
+  <tr>
+    <th>YOLOv8 without SAHI</th>
+    <th>YOLOv8 with SAHI</th>
+  </tr>
+  <tr>
+    <td><img src="https://user-images.githubusercontent.com/26833433/266123241-260a9740-5998-4e9a-ad04-b39b7767e731.png" alt="YOLOv8 without SAHI" width="640"></td>
+    <td><img src="https://user-images.githubusercontent.com/26833433/266123245-55f696ad-ec74-4e71-9155-c211d693bb69.png" alt="YOLOv8 with SAHI" width="640"></td>
+  </tr>
+</table>
 
 ## Installation and Preparation
 
@@ -145,3 +162,24 @@ predict(
 ```
 
 That's it! Now you're equipped to use YOLOv8 with SAHI for both standard and sliced inference.
+
+## Citations and Acknowledgments
+
+If you use SAHI in your research or development work, please cite the original SAHI paper and acknowledge the authors:
+
+!!! note ""
+
+    === "BibTeX"
+
+        ```bibtex
+        @article{akyon2022sahi,
+          title={Slicing Aided Hyper Inference and Fine-tuning for Small Object Detection},
+          author={Akyon, Fatih Cagatay and Altinuc, Sinan Onur and Temizel, Alptekin},
+          journal={2022 IEEE International Conference on Image Processing (ICIP)},
+          doi={10.1109/ICIP46576.2022.9897990},
+          pages={966-970},
+          year={2022}
+        }
+        ```
+
+We extend our thanks to the SAHI research group for creating and maintaining this invaluable resource for the computer vision community. For more information about SAHI and its creators, visit the [SAHI GitHub repository](https://github.com/obss/sahi).
\ No newline at end of file
diff --git a/tests/test_cuda.py b/tests/test_cuda.py
index df92d821..44dc159c 100644
--- a/tests/test_cuda.py
+++ b/tests/test_cuda.py
@@ -94,7 +94,8 @@ def test_model_ray_tune():
 
 @pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available')
 def test_model_tune():
-    YOLO('yolov8n.pt').tune(data='coco8.yaml', imgsz=32, epochs=1, iterations=2, plots=False, device='cpu')
+    YOLO('yolov8n-pose.pt').tune(data='coco8-pose.yaml', imgsz=32, epochs=1, iterations=2, device='cpu')
+    YOLO('yolov8n-cls.pt').tune(data='imagenet10', imgsz=32, epochs=1, iterations=2, device='cpu')
 
 
 @pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason='CUDA is not available')
diff --git a/ultralytics/__init__.py b/ultralytics/__init__.py
index ad146d2a..1c2891dd 100644
--- a/ultralytics/__init__.py
+++ b/ultralytics/__init__.py
@@ -1,6 +1,6 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 
-__version__ = '8.0.171'
+__version__ = '8.0.172'
 
 from ultralytics.models import RTDETR, SAM, YOLO
 from ultralytics.models.fastsam import FastSAM
diff --git a/ultralytics/engine/predictor.py b/ultralytics/engine/predictor.py
index 05dfb43d..c6490904 100644
--- a/ultralytics/engine/predictor.py
+++ b/ultralytics/engine/predictor.py
@@ -143,8 +143,8 @@ class BasePredictor:
             (list): A list of transformed images.
         """
         same_shapes = all(x.shape == im[0].shape for x in im)
-        auto = same_shapes and self.model.pt
-        return [LetterBox(self.imgsz, auto=auto, stride=self.model.stride)(image=x) for x in im]
+        letterbox = LetterBox(self.imgsz, auto=same_shapes and self.model.pt, stride=self.model.stride)
+        return [letterbox(image=x) for x in im]
 
     def write_results(self, idx, results, batch):
         """Write inference results to a file or directory."""
diff --git a/ultralytics/models/rtdetr/predict.py b/ultralytics/models/rtdetr/predict.py
index d79d370e..33d5d7a2 100644
--- a/ultralytics/models/rtdetr/predict.py
+++ b/ultralytics/models/rtdetr/predict.py
@@ -58,4 +58,5 @@ class RTDETRPredictor(BasePredictor):
         Returns:
             (list): A list of transformed imgs.
         """
-        return [LetterBox(self.imgsz, auto=False, scaleFill=True)(image=x) for x in im]
+        letterbox = LetterBox(self.imgsz, auto=False, scaleFill=True)
+        return [letterbox(image=x) for x in im]
diff --git a/ultralytics/models/sam/predict.py b/ultralytics/models/sam/predict.py
index fbab9e59..e8a8197b 100644
--- a/ultralytics/models/sam/predict.py
+++ b/ultralytics/models/sam/predict.py
@@ -48,11 +48,11 @@ class Predictor(BasePredictor):
             im = np.ascontiguousarray(im)  # contiguous
             im = torch.from_numpy(im)
 
-        img = im.to(self.device)
-        img = img.half() if self.model.fp16 else img.float()  # uint8 to fp16/32
+        im = im.to(self.device)
+        im = im.half() if self.model.fp16 else im.float()  # uint8 to fp16/32
         if not_tensor:
-            img = (img - self.mean) / self.std
-        return img
+            im = (im - self.mean) / self.std
+        return im
 
     def pre_transform(self, im):
         """
@@ -64,8 +64,9 @@ class Predictor(BasePredictor):
         Returns:
             (list): A list of transformed images.
         """
-        assert len(im) == 1, 'SAM model has not supported batch inference yet!'
-        return [LetterBox(self.args.imgsz, auto=False, center=False)(image=x) for x in im]
+        assert len(im) == 1, 'SAM model does not currently support batched inference'
+        letterbox = LetterBox(self.args.imgsz, auto=False, center=False)
+        return [letterbox(image=x) for x in im]
 
     def inference(self, im, bboxes=None, points=None, labels=None, masks=None, multimask_output=False, *args, **kwargs):
         """
diff --git a/ultralytics/models/yolo/classify/val.py b/ultralytics/models/yolo/classify/val.py
index 456e64cf..0748e27f 100644
--- a/ultralytics/models/yolo/classify/val.py
+++ b/ultralytics/models/yolo/classify/val.py
@@ -70,6 +70,7 @@ class ClassificationValidator(BaseValidator):
                                            on_plot=self.on_plot)
         self.metrics.speed = self.speed
         self.metrics.confusion_matrix = self.confusion_matrix
+        self.metrics.save_dir = self.save_dir
 
     def get_stats(self):
         """Returns a dictionary of metrics obtained by processing targets and predictions."""
diff --git a/ultralytics/utils/callbacks/base.py b/ultralytics/utils/callbacks/base.py
index 2e676bf3..5922f88a 100644
--- a/ultralytics/utils/callbacks/base.py
+++ b/ultralytics/utils/callbacks/base.py
@@ -214,7 +214,7 @@ def add_integration_callbacks(instance):
         callbacks_list.extend([clear_cb, comet_cb, dvc_cb, mlflow_cb, neptune_cb, tune_cb, tb_cb, wb_cb])
 
     # Load export callbacks (patch to avoid CoreML protobuf error)
-    if 'Exporter' in instance.__class__.__name__:
+    if 'Exporter' in instance.__class__.__name__ and instance.args.format in ('coreml', 'mlmodel'):
         from .tensorboard import callbacks as tb_cb
         callbacks_list.append(tb_cb)