From 49dc00d61a85abc7de984804c2e0a275c60ba7e4 Mon Sep 17 00:00:00 2001
From: fcakyon <akyon@ee.bilkent.edu.tr>
Date: Tue, 30 Jul 2024 22:45:49 +0300
Subject: [PATCH] move crop_and_pad to ultralytics.utils

---
 ultralytics/solutions/action_recognition.py | 33 +----------------
 ultralytics/utils/__init__.py               | 39 +++++++++++++++++++--
 2 files changed, 37 insertions(+), 35 deletions(-)

diff --git a/ultralytics/solutions/action_recognition.py b/ultralytics/solutions/action_recognition.py
index 4ae366f478..a30a3f805f 100644
--- a/ultralytics/solutions/action_recognition.py
+++ b/ultralytics/solutions/action_recognition.py
@@ -11,6 +11,7 @@ from ultralytics.engine.results import Results
 from ultralytics.utils.checks import check_imshow, check_requirements
 from ultralytics.utils.plotting import Annotator
 from ultralytics.utils.torch_utils import select_device
+from ultralytics.utils import crop_and_pad
 
 
 class ActionRecognition:
@@ -457,38 +458,6 @@ class HuggingFaceVideoClassifier:
         return pred_labels, pred_confs
 
 
-def crop_and_pad(frame: np.ndarray, box: List[int], margin_percent: int = 10) -> np.ndarray:
-    """
-    Crop box with margin and take square crop from frame.
-
-    Args:
-        frame (ndarray): The input frame.
-        box (list): The bounding box coordinates.
-        margin_percent (int, optional): The percentage [0-100] of margin to add around the detected object. Defaults to 10.
-
-    Returns:
-        ndarray: The cropped and resized frame.
-    """
-    x1, y1, x2, y2 = map(int, box)
-    w, h = x2 - x1, y2 - y1
-
-    # Add margin
-    margin_x, margin_y = int(w * margin_percent / 100), int(h * margin_percent / 100)
-    x1, y1 = max(0, x1 - margin_x), max(0, y1 - margin_y)
-    x2, y2 = min(frame.shape[1], x2 + margin_x), min(frame.shape[0], y2 + margin_y)
-
-    # Take square crop from frame
-    size = max(y2 - y1, x2 - x1)
-    center_y, center_x = (y1 + y2) // 2, (x1 + x2) // 2
-    half_size = size // 2
-    square_crop = frame[
-        max(0, center_y - half_size) : min(frame.shape[0], center_y + half_size),
-        max(0, center_x - half_size) : min(frame.shape[1], center_x + half_size),
-    ]
-
-    return cv2.resize(square_crop, (224, 224), interpolation=cv2.INTER_LINEAR)
-
-
 if __name__ == "__main__":
     from ultralytics import YOLO
     # from ultralytics.solutions.action_recognition import ActionRecognition
diff --git a/ultralytics/utils/__init__.py b/ultralytics/utils/__init__.py
index 54cb175a16..cf119579cc 100644
--- a/ultralytics/utils/__init__.py
+++ b/ultralytics/utils/__init__.py
@@ -15,7 +15,7 @@ import urllib
 import uuid
 from pathlib import Path
 from types import SimpleNamespace
-from typing import Union
+from typing import List, Union
 
 import cv2
 import matplotlib.pyplot as plt
@@ -26,6 +26,9 @@ from tqdm import tqdm as tqdm_original
 
 from ultralytics import __version__
 
+# Apply monkey patches
+from ultralytics.utils.patches import imread, imshow, imwrite, torch_load, torch_save
+
 # PyTorch Multi-GPU DDP Constants
 RANK = int(os.getenv("RANK", -1))
 LOCAL_RANK = int(os.getenv("LOCAL_RANK", -1))  # https://pytorch.org/docs/stable/elastic/run.html
@@ -1073,10 +1076,40 @@ TESTS_RUNNING = is_pytest_running() or is_github_action_running()
 set_sentry()
 
 # Apply monkey patches
-from ultralytics.utils.patches import imread, imshow, imwrite, torch_load, torch_save
-
 torch.load = torch_load
 torch.save = torch_save
 if WINDOWS:
     # Apply cv2 patches for non-ASCII and non-UTF characters in image paths
     cv2.imread, cv2.imwrite, cv2.imshow = imread, imwrite, imshow
+
+
+def crop_and_pad(frame: np.ndarray, box: List[int], margin_percent: int = 10) -> np.ndarray:
+    """
+    Crop box with margin and take square crop from frame.
+
+    Args:
+        frame (ndarray): The input frame.
+        box (list): The bounding box coordinates.
+        margin_percent (int, optional): The percentage [0-100] of margin to add around the detected object. Defaults to 10.
+
+    Returns:
+        ndarray: The cropped and resized frame.
+    """
+    x1, y1, x2, y2 = map(int, box)
+    w, h = x2 - x1, y2 - y1
+
+    # Add margin
+    margin_x, margin_y = int(w * margin_percent / 100), int(h * margin_percent / 100)
+    x1, y1 = max(0, x1 - margin_x), max(0, y1 - margin_y)
+    x2, y2 = min(frame.shape[1], x2 + margin_x), min(frame.shape[0], y2 + margin_y)
+
+    # Take square crop from frame
+    size = max(y2 - y1, x2 - x1)
+    center_y, center_x = (y1 + y2) // 2, (x1 + x2) // 2
+    half_size = size // 2
+    square_crop = frame[
+        max(0, center_y - half_size) : min(frame.shape[0], center_y + half_size),
+        max(0, center_x - half_size) : min(frame.shape[1], center_x + half_size),
+    ]
+
+    return cv2.resize(square_crop, (224, 224), interpolation=cv2.INTER_LINEAR)