You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
124 lines
4.5 KiB
124 lines
4.5 KiB
import paddle |
|
import paddle.nn as nn |
|
import paddle.nn.functional as F |
|
from paddle import ParamAttr |
|
from paddle.regularizer import L2Decay |
|
from paddlers.models.ppdet.core.workspace import register |
|
|
|
|
|
def _de_sigmoid(x, eps=1e-7): |
|
x = paddle.clip(x, eps, 1. / eps) |
|
x = paddle.clip(1. / x - 1., eps, 1. / eps) |
|
x = -paddle.log(x) |
|
return x |
|
|
|
|
|
@register |
|
class YOLOv3Head(nn.Layer): |
|
__shared__ = ['num_classes', 'data_format'] |
|
__inject__ = ['loss'] |
|
|
|
def __init__(self, |
|
in_channels=[1024, 512, 256], |
|
anchors=[[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], |
|
[59, 119], [116, 90], [156, 198], [373, 326]], |
|
anchor_masks=[[6, 7, 8], [3, 4, 5], [0, 1, 2]], |
|
num_classes=80, |
|
loss='YOLOv3Loss', |
|
iou_aware=False, |
|
iou_aware_factor=0.4, |
|
data_format='NCHW'): |
|
""" |
|
Head for YOLOv3 network |
|
|
|
Args: |
|
num_classes (int): number of foreground classes |
|
anchors (list): anchors |
|
anchor_masks (list): anchor masks |
|
loss (object): YOLOv3Loss instance |
|
iou_aware (bool): whether to use iou_aware |
|
iou_aware_factor (float): iou aware factor |
|
data_format (str): data format, NCHW or NHWC |
|
""" |
|
super(YOLOv3Head, self).__init__() |
|
assert len(in_channels) > 0, "in_channels length should > 0" |
|
self.in_channels = in_channels |
|
self.num_classes = num_classes |
|
self.loss = loss |
|
|
|
self.iou_aware = iou_aware |
|
self.iou_aware_factor = iou_aware_factor |
|
|
|
self.parse_anchor(anchors, anchor_masks) |
|
self.num_outputs = len(self.anchors) |
|
self.data_format = data_format |
|
|
|
self.yolo_outputs = [] |
|
for i in range(len(self.anchors)): |
|
|
|
if self.iou_aware: |
|
num_filters = len(self.anchors[i]) * (self.num_classes + 6) |
|
else: |
|
num_filters = len(self.anchors[i]) * (self.num_classes + 5) |
|
name = 'yolo_output.{}'.format(i) |
|
conv = nn.Conv2D( |
|
in_channels=self.in_channels[i], |
|
out_channels=num_filters, |
|
kernel_size=1, |
|
stride=1, |
|
padding=0, |
|
data_format=data_format, |
|
bias_attr=ParamAttr(regularizer=L2Decay(0.))) |
|
conv.skip_quant = True |
|
yolo_output = self.add_sublayer(name, conv) |
|
self.yolo_outputs.append(yolo_output) |
|
|
|
def parse_anchor(self, anchors, anchor_masks): |
|
self.anchors = [[anchors[i] for i in mask] for mask in anchor_masks] |
|
self.mask_anchors = [] |
|
anchor_num = len(anchors) |
|
for masks in anchor_masks: |
|
self.mask_anchors.append([]) |
|
for mask in masks: |
|
assert mask < anchor_num, "anchor mask index overflow" |
|
self.mask_anchors[-1].extend(anchors[mask]) |
|
|
|
def forward(self, feats, targets=None): |
|
assert len(feats) == len(self.anchors) |
|
yolo_outputs = [] |
|
for i, feat in enumerate(feats): |
|
yolo_output = self.yolo_outputs[i](feat) |
|
if self.data_format == 'NHWC': |
|
yolo_output = paddle.transpose(yolo_output, [0, 3, 1, 2]) |
|
yolo_outputs.append(yolo_output) |
|
|
|
if self.training: |
|
return self.loss(yolo_outputs, targets, self.anchors) |
|
else: |
|
if self.iou_aware: |
|
y = [] |
|
for i, out in enumerate(yolo_outputs): |
|
na = len(self.anchors[i]) |
|
ioup, x = out[:, 0:na, :, :], out[:, na:, :, :] |
|
b, c, h, w = x.shape |
|
no = c // na |
|
x = x.reshape((b, na, no, h * w)) |
|
ioup = ioup.reshape((b, na, 1, h * w)) |
|
obj = x[:, :, 4:5, :] |
|
ioup = F.sigmoid(ioup) |
|
obj = F.sigmoid(obj) |
|
obj_t = (obj**(1 - self.iou_aware_factor)) * ( |
|
ioup**self.iou_aware_factor) |
|
obj_t = _de_sigmoid(obj_t) |
|
loc_t = x[:, :, :4, :] |
|
cls_t = x[:, :, 5:, :] |
|
y_t = paddle.concat([loc_t, obj_t, cls_t], axis=2) |
|
y_t = y_t.reshape((b, c, h, w)) |
|
y.append(y_t) |
|
return y |
|
else: |
|
return yolo_outputs |
|
|
|
@classmethod |
|
def from_config(cls, cfg, input_shape): |
|
return {'in_channels': [i.channels for i in input_shape], }
|
|
|