Code Refactor for Speed and Readability (#13450)

Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com>
Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com>
pull/13462/head
Paula Derrenger 8 months ago committed by GitHub
parent 1b26838def
commit 6367ff4748
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 9
      ultralytics/data/split_dota.py
  2. 12
      ultralytics/engine/predictor.py
  3. 42
      ultralytics/models/sam/modules/tiny_encoder.py

@ -86,7 +86,7 @@ def load_yolo_dota(data_root, split="train"):
return annos
def get_windows(im_size, crop_sizes=[1024], gaps=[200], im_rate_thr=0.6, eps=0.01):
def get_windows(im_size, crop_sizes=(1024,), gaps=(200,), im_rate_thr=0.6, eps=0.01):
"""
Get the coordinates of windows.
@ -95,6 +95,7 @@ def get_windows(im_size, crop_sizes=[1024], gaps=[200], im_rate_thr=0.6, eps=0.0
crop_sizes (List(int)): Crop size of windows.
gaps (List(int)): Gap between crops.
im_rate_thr (float): Threshold of windows areas divided by image ares.
eps (float): Epsilon value for math operations.
"""
h, w = im_size
windows = []
@ -187,7 +188,7 @@ def crop_and_save(anno, windows, window_objs, im_dir, lb_dir):
f.write(f"{int(lb[0])} {' '.join(formatted_coords)}\n")
def split_images_and_labels(data_root, save_dir, split="train", crop_sizes=[1024], gaps=[200]):
def split_images_and_labels(data_root, save_dir, split="train", crop_sizes=(1024,), gaps=(200,)):
"""
Split both images and labels.
@ -217,7 +218,7 @@ def split_images_and_labels(data_root, save_dir, split="train", crop_sizes=[1024
crop_and_save(anno, windows, window_objs, str(im_dir), str(lb_dir))
def split_trainval(data_root, save_dir, crop_size=1024, gap=200, rates=[1.0]):
def split_trainval(data_root, save_dir, crop_size=1024, gap=200, rates=(1.0,)):
"""
Split train and val set of DOTA.
@ -247,7 +248,7 @@ def split_trainval(data_root, save_dir, crop_size=1024, gap=200, rates=[1.0]):
split_images_and_labels(data_root, save_dir, split, crop_sizes, gaps)
def split_test(data_root, save_dir, crop_size=1024, gap=200, rates=[1.0]):
def split_test(data_root, save_dir, crop_size=1024, gap=200, rates=(1.0,)):
"""
Split test set of DOTA, labels are not included within this set.

@ -169,12 +169,18 @@ class BasePredictor:
def predict_cli(self, source=None, model=None):
"""
Method used for CLI prediction.
Method used for Command Line Interface (CLI) prediction.
It uses always generator as outputs as not required by CLI mode.
This function is designed to run predictions using the CLI. It sets up the source and model, then processes
the inputs in a streaming manner. This method ensures that no outputs accumulate in memory by consuming the
generator without storing results.
Note:
Do not modify this function or remove the generator. The generator ensures that no outputs are
accumulated in memory, which is critical for preventing memory issues during long-running predictions.
"""
gen = self.stream_inference(source, model)
for _ in gen: # noqa, running CLI inference without accumulating any outputs (do not modify)
for _ in gen: # sourcery skip: remove-empty-nested-block, noqa
pass
def setup_source(self, source):

@ -383,44 +383,44 @@ class TinyViTBlock(nn.Module):
"""Applies attention-based transformation or padding to input 'x' before passing it through a local
convolution.
"""
H, W = self.input_resolution
B, L, C = x.shape
assert L == H * W, "input feature has wrong size"
h, w = self.input_resolution
b, l, c = x.shape
assert l == h * w, "input feature has wrong size"
res_x = x
if H == self.window_size and W == self.window_size:
if h == self.window_size and w == self.window_size:
x = self.attn(x)
else:
x = x.view(B, H, W, C)
pad_b = (self.window_size - H % self.window_size) % self.window_size
pad_r = (self.window_size - W % self.window_size) % self.window_size
x = x.view(b, h, w, c)
pad_b = (self.window_size - h % self.window_size) % self.window_size
pad_r = (self.window_size - w % self.window_size) % self.window_size
padding = pad_b > 0 or pad_r > 0
if padding:
x = F.pad(x, (0, 0, 0, pad_r, 0, pad_b))
pH, pW = H + pad_b, W + pad_r
pH, pW = h + pad_b, w + pad_r
nH = pH // self.window_size
nW = pW // self.window_size
# Window partition
x = (
x.view(B, nH, self.window_size, nW, self.window_size, C)
x.view(b, nH, self.window_size, nW, self.window_size, c)
.transpose(2, 3)
.reshape(B * nH * nW, self.window_size * self.window_size, C)
.reshape(b * nH * nW, self.window_size * self.window_size, c)
)
x = self.attn(x)
# Window reverse
x = x.view(B, nH, nW, self.window_size, self.window_size, C).transpose(2, 3).reshape(B, pH, pW, C)
x = x.view(b, nH, nW, self.window_size, self.window_size, c).transpose(2, 3).reshape(b, pH, pW, c)
if padding:
x = x[:, :H, :W].contiguous()
x = x[:, :h, :w].contiguous()
x = x.view(B, L, C)
x = x.view(b, l, c)
x = res_x + self.drop_path(x)
x = x.transpose(1, 2).reshape(B, C, H, W)
x = x.transpose(1, 2).reshape(b, c, h, w)
x = self.local_conv(x)
x = x.view(B, C, L).transpose(1, 2)
x = x.view(b, c, l).transpose(1, 2)
return x + self.drop_path(self.mlp(x))
@ -565,10 +565,10 @@ class TinyViT(nn.Module):
img_size=224,
in_chans=3,
num_classes=1000,
embed_dims=[96, 192, 384, 768],
depths=[2, 2, 6, 2],
num_heads=[3, 6, 12, 24],
window_sizes=[7, 7, 14, 7],
embed_dims=(96, 192, 384, 768),
depths=(2, 2, 6, 2),
num_heads=(3, 6, 12, 24),
window_sizes=(7, 7, 14, 7),
mlp_ratio=4.0,
drop_rate=0.0,
drop_path_rate=0.1,
@ -732,8 +732,8 @@ class TinyViT(nn.Module):
for i in range(start_i, len(self.layers)):
layer = self.layers[i]
x = layer(x)
B, _, C = x.shape
x = x.view(B, 64, 64, C)
batch, _, channel = x.shape
x = x.view(batch, 64, 64, channel)
x = x.permute(0, 3, 1, 2)
return self.neck(x)

Loading…
Cancel
Save