added new v11 head

new-v11-head
Francesco Mattioli 2 months ago
parent fceaa40038
commit c902490fa6
  1. 2
      ultralytics/cfg/models/11/yolo11.yaml
  2. 3
      ultralytics/nn/modules/__init__.py
  3. 41
      ultralytics/nn/modules/head.py
  4. 3
      ultralytics/nn/tasks.py

@ -44,4 +44,4 @@ head:
- [[-1, 10], 1, Concat, [1]] # cat head P5
- [-1, 2, C3k2, [1024, True]] # 22 (P5/32-large)
- [[16, 19, 22], 1, Detect, [nc]] # Detect(P3, P4, P5)
- [[16, 19, 22], 1, v11Detect, [nc]] # Detect(P3, P4, P5)

@ -72,7 +72,7 @@ from .conv import (
RepConv,
SpatialAttention,
)
from .head import OBB, Classify, Detect, Pose, RTDETRDecoder, Segment, WorldDetect, v10Detect
from .head import OBB, Classify, Detect, Pose, RTDETRDecoder, Segment, WorldDetect, v10Detect, v11Detect
from .transformer import (
AIFI,
MLP,
@ -141,6 +141,7 @@ __all__ = (
"OBB",
"WorldDetect",
"v10Detect",
"v11Detect",
"ImagePoolingAttn",
"ContrastiveHead",
"BNContrastiveHead",

@ -41,14 +41,7 @@ class Detect(nn.Module):
self.cv2 = nn.ModuleList(
nn.Sequential(Conv(x, c2, 3), Conv(c2, c2, 3), nn.Conv2d(c2, 4 * self.reg_max, 1)) for x in ch
)
self.cv3 = nn.ModuleList(
nn.Sequential(
nn.Sequential(DWConv(x, x, 3), Conv(x, c3, 1)),
nn.Sequential(DWConv(c3, c3, 3), Conv(c3, c3, 1)),
nn.Conv2d(c3, self.nc, 1),
)
for x in ch
)
self.cv3 = nn.ModuleList(nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, self.nc, 1)) for x in ch)
self.dfl = DFL(self.reg_max) if self.reg_max > 1 else nn.Identity()
if self.end2end:
@ -595,3 +588,35 @@ class v10Detect(Detect):
for x in ch
)
self.one2one_cv3 = copy.deepcopy(self.cv3)
class v11Detect(Detect):
"""
v10 Detection head from https://arxiv.org/pdf/2405.14458.
Args:
nc (int): Number of classes.
ch (tuple): Tuple of channel sizes.
Attributes:
max_det (int): Maximum number of detections.
Methods:
__init__(self, nc=80, ch=()): Initializes the v10Detect object.
forward(self, x): Performs forward pass of the v10Detect module.
bias_init(self): Initializes biases of the Detect module.
"""
def __init__(self, nc=80, ch=()):
"""Initializes the v10Detect object with the specified number of classes and input channels."""
super().__init__(nc, ch)
c3 = max(ch[0], min(self.nc, 100)) # channels
# Light cls head
self.cv3 = nn.ModuleList(
nn.Sequential(
nn.Sequential(DWConv(x, x, 3), Conv(x, c3, 1)),
nn.Sequential(DWConv(c3, c3, 3), Conv(c3, c3, 1)),
nn.Conv2d(c3, self.nc, 1),
)
for x in ch
)

@ -60,6 +60,7 @@ from ultralytics.nn.modules import (
Segment,
WorldDetect,
v10Detect,
v11Detect,
)
from ultralytics.utils import DEFAULT_CFG_DICT, DEFAULT_CFG_KEYS, LOGGER, colorstr, emojis, yaml_load
from ultralytics.utils.checks import check_requirements, check_suffix, check_yaml
@ -1041,7 +1042,7 @@ def parse_model(d, ch, verbose=True): # model_dict, input_channels(3)
args = [ch[f]]
elif m is Concat:
c2 = sum(ch[x] for x in f)
elif m in {Detect, WorldDetect, Segment, Pose, OBB, ImagePoolingAttn, v10Detect}:
elif m in {Detect, WorldDetect, Segment, Pose, OBB, ImagePoolingAttn, v10Detect, v11Detect}:
args.append([ch[x] for x in f])
if m is Segment:
args[2] = make_divisible(min(args[2], max_channels) * width, 8)

Loading…
Cancel
Save