Code Refactor for Speed and Readability (#13450)

Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com> Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com>
8 months ago · 6367ff4748
parent 1b26838def
commit 6367ff4748
3 changed files with 35 additions and 28 deletions
--- a/ultralytics/data/split_dota.py
+++ b/ultralytics/data/split_dota.py
@ -86,7 +86,7 @@ def load_yolo_dota(data_root, split="train"):
    return annos


-def get_windows(im_size, crop_sizes=[1024], gaps=[200], im_rate_thr=0.6, eps=0.01):
+def get_windows(im_size, crop_sizes=(1024,), gaps=(200,), im_rate_thr=0.6, eps=0.01):
    """
    Get the coordinates of windows.

@ -95,6 +95,7 @@ def get_windows(im_size, crop_sizes=[1024], gaps=[200], im_rate_thr=0.6, eps=0.0
        crop_sizes (List(int)): Crop size of windows.
        gaps (List(int)): Gap between crops.
        im_rate_thr (float): Threshold of windows areas divided by image ares.
+        eps (float): Epsilon value for math operations.
    """
    h, w = im_size
    windows = []
@ -187,7 +188,7 @@ def crop_and_save(anno, windows, window_objs, im_dir, lb_dir):
                f.write(f"{int(lb[0])} {' '.join(formatted_coords)}\n")


-def split_images_and_labels(data_root, save_dir, split="train", crop_sizes=[1024], gaps=[200]):
+def split_images_and_labels(data_root, save_dir, split="train", crop_sizes=(1024,), gaps=(200,)):
    """
    Split both images and labels.

@ -217,7 +218,7 @@ def split_images_and_labels(data_root, save_dir, split="train", crop_sizes=[1024
        crop_and_save(anno, windows, window_objs, str(im_dir), str(lb_dir))


-def split_trainval(data_root, save_dir, crop_size=1024, gap=200, rates=[1.0]):
+def split_trainval(data_root, save_dir, crop_size=1024, gap=200, rates=(1.0,)):
    """
    Split train and val set of DOTA.

@ -247,7 +248,7 @@ def split_trainval(data_root, save_dir, crop_size=1024, gap=200, rates=[1.0]):
        split_images_and_labels(data_root, save_dir, split, crop_sizes, gaps)


-def split_test(data_root, save_dir, crop_size=1024, gap=200, rates=[1.0]):
+def split_test(data_root, save_dir, crop_size=1024, gap=200, rates=(1.0,)):
    """
    Split test set of DOTA, labels are not included within this set.

--- a/ultralytics/engine/predictor.py
+++ b/ultralytics/engine/predictor.py
@ -169,12 +169,18 @@ class BasePredictor:

    def predict_cli(self, source=None, model=None):
        """
-        Method used for CLI prediction.
+        Method used for Command Line Interface (CLI) prediction.

-        It uses always generator as outputs as not required by CLI mode.
+        This function is designed to run predictions using the CLI. It sets up the source and model, then processes
+        the inputs in a streaming manner. This method ensures that no outputs accumulate in memory by consuming the
+        generator without storing results.
+
+        Note:
+            Do not modify this function or remove the generator. The generator ensures that no outputs are
+            accumulated in memory, which is critical for preventing memory issues during long-running predictions.
        """
        gen = self.stream_inference(source, model)
-        for _ in gen:  # noqa, running CLI inference without accumulating any outputs (do not modify)
+        for _ in gen:  # sourcery skip: remove-empty-nested-block, noqa
            pass

    def setup_source(self, source):
--- a/ultralytics/models/sam/modules/tiny_encoder.py
+++ b/ultralytics/models/sam/modules/tiny_encoder.py
@ -383,44 +383,44 @@ class TinyViTBlock(nn.Module):
        """Applies attention-based transformation or padding to input 'x' before passing it through a local
        convolution.
        """
-        H, W = self.input_resolution
-        B, L, C = x.shape
-        assert L == H * W, "input feature has wrong size"
+        h, w = self.input_resolution
+        b, l, c = x.shape
+        assert l == h * w, "input feature has wrong size"
        res_x = x
-        if H == self.window_size and W == self.window_size:
+        if h == self.window_size and w == self.window_size:
            x = self.attn(x)
        else:
-            x = x.view(B, H, W, C)
-            pad_b = (self.window_size - H % self.window_size) % self.window_size
-            pad_r = (self.window_size - W % self.window_size) % self.window_size
+            x = x.view(b, h, w, c)
+            pad_b = (self.window_size - h % self.window_size) % self.window_size
+            pad_r = (self.window_size - w % self.window_size) % self.window_size
            padding = pad_b > 0 or pad_r > 0

            if padding:
                x = F.pad(x, (0, 0, 0, pad_r, 0, pad_b))

-            pH, pW = H + pad_b, W + pad_r
+            pH, pW = h + pad_b, w + pad_r
            nH = pH // self.window_size
            nW = pW // self.window_size
            # Window partition
            x = (
-                x.view(B, nH, self.window_size, nW, self.window_size, C)
+                x.view(b, nH, self.window_size, nW, self.window_size, c)
                .transpose(2, 3)
-                .reshape(B * nH * nW, self.window_size * self.window_size, C)
+                .reshape(b * nH * nW, self.window_size * self.window_size, c)
            )
            x = self.attn(x)
            # Window reverse
-            x = x.view(B, nH, nW, self.window_size, self.window_size, C).transpose(2, 3).reshape(B, pH, pW, C)
+            x = x.view(b, nH, nW, self.window_size, self.window_size, c).transpose(2, 3).reshape(b, pH, pW, c)

            if padding:
-                x = x[:, :H, :W].contiguous()
+                x = x[:, :h, :w].contiguous()

-            x = x.view(B, L, C)
+            x = x.view(b, l, c)

        x = res_x + self.drop_path(x)

-        x = x.transpose(1, 2).reshape(B, C, H, W)
+        x = x.transpose(1, 2).reshape(b, c, h, w)
        x = self.local_conv(x)
-        x = x.view(B, C, L).transpose(1, 2)
+        x = x.view(b, c, l).transpose(1, 2)

        return x + self.drop_path(self.mlp(x))

@ -565,10 +565,10 @@ class TinyViT(nn.Module):
        img_size=224,
        in_chans=3,
        num_classes=1000,
-        embed_dims=[96, 192, 384, 768],
-        depths=[2, 2, 6, 2],
-        num_heads=[3, 6, 12, 24],
-        window_sizes=[7, 7, 14, 7],
+        embed_dims=(96, 192, 384, 768),
+        depths=(2, 2, 6, 2),
+        num_heads=(3, 6, 12, 24),
+        window_sizes=(7, 7, 14, 7),
        mlp_ratio=4.0,
        drop_rate=0.0,
        drop_path_rate=0.1,
@ -732,8 +732,8 @@ class TinyViT(nn.Module):
        for i in range(start_i, len(self.layers)):
            layer = self.layers[i]
            x = layer(x)
-        B, _, C = x.shape
-        x = x.view(B, 64, 64, C)
+        batch, _, channel = x.shape
+        x = x.view(batch, 64, 64, channel)
        x = x.permute(0, 3, 1, 2)
        return self.neck(x)