Cleanup TorchVision related functions (#18790)

pull/18762/head^2
Mohammed Yasin 1 month ago committed by GitHub
parent 066c5443f5
commit 5306a8cc1f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 9
      ultralytics/cfg/models/11/yolo11-cls-resnet18.yaml
  2. 4
      ultralytics/nn/modules/block.py
  3. 2
      ultralytics/nn/modules/conv.py
  4. 6
      ultralytics/nn/tasks.py

@ -6,18 +6,11 @@
# Parameters
nc: 10 # number of classes
scales: # model compound scaling constants, i.e. 'model=yolo11n-cls.yaml' will call yolo11-cls.yaml with scale 'n'
# [depth, width, max_channels]
n: [0.33, 0.25, 1024]
s: [0.33, 0.50, 1024]
m: [0.67, 0.75, 1024]
l: [1.00, 1.00, 1024]
x: [1.00, 1.25, 1024]
# ResNet18 backbone
backbone:
# [from, repeats, module, args]
- [-1, 1, TorchVision, [512, "resnet18", "DEFAULT", True, 2]] # truncate two layers from the end
- [-1, 1, TorchVision, [512, resnet18, DEFAULT, True, 2]] # truncate two layers from the end
# YOLO11n head
head:

@ -1120,8 +1120,6 @@ class TorchVision(nn.Module):
m (nn.Module): The loaded torchvision model, possibly truncated and unwrapped.
Args:
c1 (int): Input channels.
c2 (): Output channels.
model (str): Name of the torchvision model to load.
weights (str, optional): Pre-trained weights to load. Default is "DEFAULT".
unwrap (bool, optional): If True, unwraps the model to a sequential containing all but the last `truncate` layers. Default is True.
@ -1129,7 +1127,7 @@ class TorchVision(nn.Module):
split (bool, optional): Returns output from intermediate child modules as list. Default is False.
"""
def __init__(self, c1, c2, model, weights="DEFAULT", unwrap=True, truncate=2, split=False):
def __init__(self, model, weights="DEFAULT", unwrap=True, truncate=2, split=False):
"""Load the model and weights from torchvision."""
import torchvision # scope for faster 'import ultralytics'

@ -336,7 +336,7 @@ class Concat(nn.Module):
class Index(nn.Module):
"""Returns a particular index of the input."""
def __init__(self, c1, c2, index=0):
def __init__(self, index=0):
"""Returns a particular index of the input."""
super().__init__()
self.index = index

@ -1060,12 +1060,16 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3)
m.legacy = legacy
elif m is RTDETRDecoder: # special case, channels arg must be passed in index 1
args.insert(1, [ch[x] for x in f])
elif m in frozenset({CBLinear, TorchVision, Index}):
elif m is CBLinear:
c2 = args[0]
c1 = ch[f]
args = [c1, c2, *args[1:]]
elif m is CBFuse:
c2 = ch[f[-1]]
elif m in frozenset({TorchVision, Index}):
c2 = args[0]
c1 = ch[f]
args = [*args[1:]]
else:
c2 = ch[f]

Loading…
Cancel
Save