From 6367ff47484d176759e16e8834fc85fb7ff73dd6 Mon Sep 17 00:00:00 2001 From: Paula Derrenger <107626595+pderrenger@users.noreply.github.com> Date: Sun, 9 Jun 2024 17:38:05 +0200 Subject: [PATCH] Code Refactor for Speed and Readability (#13450) Signed-off-by: Glenn Jocher Co-authored-by: Glenn Jocher --- ultralytics/data/split_dota.py | 9 ++-- ultralytics/engine/predictor.py | 12 ++++-- .../models/sam/modules/tiny_encoder.py | 42 +++++++++---------- 3 files changed, 35 insertions(+), 28 deletions(-) diff --git a/ultralytics/data/split_dota.py b/ultralytics/data/split_dota.py index f0a85d91f..e9cfc686f 100644 --- a/ultralytics/data/split_dota.py +++ b/ultralytics/data/split_dota.py @@ -86,7 +86,7 @@ def load_yolo_dota(data_root, split="train"): return annos -def get_windows(im_size, crop_sizes=[1024], gaps=[200], im_rate_thr=0.6, eps=0.01): +def get_windows(im_size, crop_sizes=(1024,), gaps=(200,), im_rate_thr=0.6, eps=0.01): """ Get the coordinates of windows. @@ -95,6 +95,7 @@ def get_windows(im_size, crop_sizes=[1024], gaps=[200], im_rate_thr=0.6, eps=0.0 crop_sizes (List(int)): Crop size of windows. gaps (List(int)): Gap between crops. im_rate_thr (float): Threshold of windows areas divided by image ares. + eps (float): Epsilon value for math operations. """ h, w = im_size windows = [] @@ -187,7 +188,7 @@ def crop_and_save(anno, windows, window_objs, im_dir, lb_dir): f.write(f"{int(lb[0])} {' '.join(formatted_coords)}\n") -def split_images_and_labels(data_root, save_dir, split="train", crop_sizes=[1024], gaps=[200]): +def split_images_and_labels(data_root, save_dir, split="train", crop_sizes=(1024,), gaps=(200,)): """ Split both images and labels. @@ -217,7 +218,7 @@ def split_images_and_labels(data_root, save_dir, split="train", crop_sizes=[1024 crop_and_save(anno, windows, window_objs, str(im_dir), str(lb_dir)) -def split_trainval(data_root, save_dir, crop_size=1024, gap=200, rates=[1.0]): +def split_trainval(data_root, save_dir, crop_size=1024, gap=200, rates=(1.0,)): """ Split train and val set of DOTA. @@ -247,7 +248,7 @@ def split_trainval(data_root, save_dir, crop_size=1024, gap=200, rates=[1.0]): split_images_and_labels(data_root, save_dir, split, crop_sizes, gaps) -def split_test(data_root, save_dir, crop_size=1024, gap=200, rates=[1.0]): +def split_test(data_root, save_dir, crop_size=1024, gap=200, rates=(1.0,)): """ Split test set of DOTA, labels are not included within this set. diff --git a/ultralytics/engine/predictor.py b/ultralytics/engine/predictor.py index ad261eac8..8597a60a2 100644 --- a/ultralytics/engine/predictor.py +++ b/ultralytics/engine/predictor.py @@ -169,12 +169,18 @@ class BasePredictor: def predict_cli(self, source=None, model=None): """ - Method used for CLI prediction. + Method used for Command Line Interface (CLI) prediction. - It uses always generator as outputs as not required by CLI mode. + This function is designed to run predictions using the CLI. It sets up the source and model, then processes + the inputs in a streaming manner. This method ensures that no outputs accumulate in memory by consuming the + generator without storing results. + + Note: + Do not modify this function or remove the generator. The generator ensures that no outputs are + accumulated in memory, which is critical for preventing memory issues during long-running predictions. """ gen = self.stream_inference(source, model) - for _ in gen: # noqa, running CLI inference without accumulating any outputs (do not modify) + for _ in gen: # sourcery skip: remove-empty-nested-block, noqa pass def setup_source(self, source): diff --git a/ultralytics/models/sam/modules/tiny_encoder.py b/ultralytics/models/sam/modules/tiny_encoder.py index 28b83f130..c56282e16 100644 --- a/ultralytics/models/sam/modules/tiny_encoder.py +++ b/ultralytics/models/sam/modules/tiny_encoder.py @@ -383,44 +383,44 @@ class TinyViTBlock(nn.Module): """Applies attention-based transformation or padding to input 'x' before passing it through a local convolution. """ - H, W = self.input_resolution - B, L, C = x.shape - assert L == H * W, "input feature has wrong size" + h, w = self.input_resolution + b, l, c = x.shape + assert l == h * w, "input feature has wrong size" res_x = x - if H == self.window_size and W == self.window_size: + if h == self.window_size and w == self.window_size: x = self.attn(x) else: - x = x.view(B, H, W, C) - pad_b = (self.window_size - H % self.window_size) % self.window_size - pad_r = (self.window_size - W % self.window_size) % self.window_size + x = x.view(b, h, w, c) + pad_b = (self.window_size - h % self.window_size) % self.window_size + pad_r = (self.window_size - w % self.window_size) % self.window_size padding = pad_b > 0 or pad_r > 0 if padding: x = F.pad(x, (0, 0, 0, pad_r, 0, pad_b)) - pH, pW = H + pad_b, W + pad_r + pH, pW = h + pad_b, w + pad_r nH = pH // self.window_size nW = pW // self.window_size # Window partition x = ( - x.view(B, nH, self.window_size, nW, self.window_size, C) + x.view(b, nH, self.window_size, nW, self.window_size, c) .transpose(2, 3) - .reshape(B * nH * nW, self.window_size * self.window_size, C) + .reshape(b * nH * nW, self.window_size * self.window_size, c) ) x = self.attn(x) # Window reverse - x = x.view(B, nH, nW, self.window_size, self.window_size, C).transpose(2, 3).reshape(B, pH, pW, C) + x = x.view(b, nH, nW, self.window_size, self.window_size, c).transpose(2, 3).reshape(b, pH, pW, c) if padding: - x = x[:, :H, :W].contiguous() + x = x[:, :h, :w].contiguous() - x = x.view(B, L, C) + x = x.view(b, l, c) x = res_x + self.drop_path(x) - x = x.transpose(1, 2).reshape(B, C, H, W) + x = x.transpose(1, 2).reshape(b, c, h, w) x = self.local_conv(x) - x = x.view(B, C, L).transpose(1, 2) + x = x.view(b, c, l).transpose(1, 2) return x + self.drop_path(self.mlp(x)) @@ -565,10 +565,10 @@ class TinyViT(nn.Module): img_size=224, in_chans=3, num_classes=1000, - embed_dims=[96, 192, 384, 768], - depths=[2, 2, 6, 2], - num_heads=[3, 6, 12, 24], - window_sizes=[7, 7, 14, 7], + embed_dims=(96, 192, 384, 768), + depths=(2, 2, 6, 2), + num_heads=(3, 6, 12, 24), + window_sizes=(7, 7, 14, 7), mlp_ratio=4.0, drop_rate=0.0, drop_path_rate=0.1, @@ -732,8 +732,8 @@ class TinyViT(nn.Module): for i in range(start_i, len(self.layers)): layer = self.layers[i] x = layer(x) - B, _, C = x.shape - x = x.view(B, 64, 64, C) + batch, _, channel = x.shape + x = x.view(batch, 64, 64, channel) x = x.permute(0, 3, 1, 2) return self.neck(x)