[Refactor]: Support batch inference traceable by ONNX in SSD, YOLOv3, FSAF, RetinaNet, and FCOS (#4699)

* Support batch infer in RetinaNet

* Suport batch multiclass_nms

* Revert multiclass_nms

* Fix api deprecated warning

* do not repeat anchors

* Move img_shapes

* Update Yolov3

* Support FCOS

* Support RPN

* Fix RPN topk_inds error

* make batch exportable to onnx for yolohead

* make fcos_head exportable to onnx with batch dim

* Support ATSS

* Support CornerNet and centripetalNet

* Update RetinaNet and delta_xywh

* Remove ugly code

* Remove ugly code of FCOS

* Remove ugly code of ATSS/YOLOV3

* Support RPN and revert bbox_head

* expand anchors to batch and remove BG class when use deploy_nms_pre

* Update

* Use dim=-1 instead of dim=2

* Rename anchor_head method

* Keep the original format output when nms is not use

* Rename method and unified code style

* Fix paa_head and unittest

* Fix FASF onnx export error

* Fix error

* fix single stage img_shapes for onnx

* move conf_thr

* fix rpn_head for onnx

* Add distance2bbox unittest

* Remove TODO

* Fix RPN

* Update docstrs

Co-authored-by: maningsheng <maningsheng@sensetime.com>
pull/3061/head^2
Haian Huang(深度眸) 4 years ago committed by GitHub
parent 895a8d51e0
commit 4febf34fc9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 1
      configs/_base_/models/ssd300.py
  2. 2
      mmdet/core/bbox/coder/delta_xywh_bbox_coder.py
  3. 7
      mmdet/core/bbox/coder/tblr_bbox_coder.py
  4. 42
      mmdet/core/bbox/transforms.py
  5. 183
      mmdet/models/dense_heads/anchor_head.py
  6. 190
      mmdet/models/dense_heads/atss_head.py
  7. 132
      mmdet/models/dense_heads/cascade_rpn_head.py
  8. 5
      mmdet/models/dense_heads/dense_test_mixins.py
  9. 181
      mmdet/models/dense_heads/fcos_head.py
  10. 97
      mmdet/models/dense_heads/gfl_head.py
  11. 102
      mmdet/models/dense_heads/paa_head.py
  12. 129
      mmdet/models/dense_heads/rpn_head.py
  13. 206
      mmdet/models/dense_heads/yolo_head.py
  14. 10
      tests/test_models/test_dense_heads/test_paa_head.py
  15. 2
      tests/test_models/test_forward.py
  16. 45
      tests/test_utils/test_misc.py

@ -42,6 +42,7 @@ model = dict(
neg_pos_ratio=3,
debug=False),
test_cfg=dict(
nms_pre=1000,
nms=dict(type='nms', iou_threshold=0.45),
min_bbox_size=0,
score_thr=0.02,

@ -179,7 +179,7 @@ def delta2bbox(rois,
>>> [ 1., 1., 1., 1.],
>>> [ 0., 0., 2., -1.],
>>> [ 0.7, -1.9, -0.5, 0.3]])
>>> delta2bbox(rois, deltas, max_shape=(32, 32))
>>> delta2bbox(rois, deltas, max_shape=(32, 32, 3))
tensor([[0.0000, 0.0000, 1.0000, 1.0000],
[0.1409, 0.1409, 2.8591, 2.8591],
[0.0000, 0.3161, 4.1945, 0.6839],

@ -168,9 +168,10 @@ def tblr2bboxes(priors,
if normalize_by_wh:
wh = priors[..., 2:4] - priors[..., 0:2]
w, h = torch.split(wh, 1, dim=-1)
loc_decode[..., :2] *= h # tb
loc_decode[..., 2:] *= w # lr
# Inplace operation with slice would failed for exporting to ONNX
th = h * loc_decode[..., :2] # tb
tw = w * loc_decode[..., 2:] # lr
loc_decode = torch.cat([th, tw], dim=-1)
# Cannot be exported using onnx when loc_decode.split(1, dim=-1)
top, bottom, left, right = loc_decode.split((1, 1, 1, 1), dim=-1)
xmin = prior_centers[..., 0].unsqueeze(-1) - left

@ -120,24 +120,40 @@ def distance2bbox(points, distance, max_shape=None):
"""Decode distance prediction to bounding box.
Args:
points (Tensor): Shape (n, 2), [x, y].
points (Tensor): Shape (B, N, 2) or (N, 2).
distance (Tensor): Distance from the given point to 4
boundaries (left, top, right, bottom).
max_shape (tuple): Shape of the image.
boundaries (left, top, right, bottom). Shape (B, N, 4) or (N, 4)
max_shape (Sequence[int] or torch.Tensor or Sequence[
Sequence[int]],optional): Maximum bounds for boxes, specifies
(H, W, C) or (H, W). If priors shape is (B, N, 4), then
the max_shape should be a Sequence[Sequence[int]]
and the length of max_shape should also be B.
Returns:
Tensor: Decoded bboxes.
Tensor: Boxes with shape (N, 4) or (B, N, 4)
"""
x1 = points[:, 0] - distance[:, 0]
y1 = points[:, 1] - distance[:, 1]
x2 = points[:, 0] + distance[:, 2]
y2 = points[:, 1] + distance[:, 3]
x1 = points[..., 0] - distance[..., 0]
y1 = points[..., 1] - distance[..., 1]
x2 = points[..., 0] + distance[..., 2]
y2 = points[..., 1] + distance[..., 3]
bboxes = torch.stack([x1, y1, x2, y2], -1)
if max_shape is not None:
x1 = x1.clamp(min=0, max=max_shape[1])
y1 = y1.clamp(min=0, max=max_shape[0])
x2 = x2.clamp(min=0, max=max_shape[1])
y2 = y2.clamp(min=0, max=max_shape[0])
return torch.stack([x1, y1, x2, y2], -1)
if not isinstance(max_shape, torch.Tensor):
max_shape = x1.new_tensor(max_shape)
max_shape = max_shape[..., :2].type_as(x1)
if max_shape.ndim == 2:
assert bboxes.ndim == 3
assert max_shape.size(0) == bboxes.size(0)
min_xy = x1.new_tensor(0)
max_xy = torch.cat([max_shape, max_shape],
dim=-1).flip(-1).unsqueeze(-2)
bboxes = torch.where(bboxes < min_xy, min_xy, bboxes)
bboxes = torch.where(bboxes > max_xy, max_xy, bboxes)
return bboxes
def bbox2distance(points, bbox, max_dis=None, eps=0.1):

@ -519,11 +519,11 @@ class AnchorHead(BaseDenseHead, BBoxTestMixin):
Returns:
list[tuple[Tensor, Tensor]]: Each item in result_list is 2-tuple.
The first item is an (n, 5) tensor, where the first 4 columns
are bounding box positions (tl_x, tl_y, br_x, br_y) and the
5-th column is a score between 0 and 1. The second item is a
(n,) tensor where each item is the predicted class labelof the
corresponding box.
The first item is an (n, 5) tensor, where 5 represent
(tl_x, tl_y, br_x, br_y, score) and the score between 0 and 1.
The shape of the second tensor in the tuple is (n,), and
each element represents the class label of the corresponding
box.
Example:
>>> import mmcv
@ -559,57 +559,57 @@ class AnchorHead(BaseDenseHead, BBoxTestMixin):
mlvl_anchors = self.anchor_generator.grid_anchors(
featmap_sizes, device=device)
result_list = []
for img_id in range(len(img_metas)):
cls_score_list = [
cls_scores[i][img_id].detach() for i in range(num_levels)
]
bbox_pred_list = [
bbox_preds[i][img_id].detach() for i in range(num_levels)
cls_score_list = [cls_scores[i].detach() for i in range(num_levels)]
bbox_pred_list = [bbox_preds[i].detach() for i in range(num_levels)]
if torch.onnx.is_in_onnx_export():
assert len(
img_metas
) == 1, 'Only support one input image while in exporting to ONNX'
img_shapes = img_metas[0]['img_shape_for_onnx']
else:
img_shapes = [
img_metas[i]['img_shape']
for i in range(cls_scores[0].shape[0])
]
# get origin input shape to support onnx dynamic shape
if torch.onnx.is_in_onnx_export():
img_shape = img_metas[img_id]['img_shape_for_onnx']
else:
img_shape = img_metas[img_id]['img_shape']
scale_factor = img_metas[img_id]['scale_factor']
if with_nms:
# some heads don't support with_nms argument
proposals = self._get_bboxes_single(cls_score_list,
bbox_pred_list,
mlvl_anchors, img_shape,
scale_factor, cfg, rescale)
else:
proposals = self._get_bboxes_single(cls_score_list,
bbox_pred_list,
mlvl_anchors, img_shape,
scale_factor, cfg, rescale,
with_nms)
result_list.append(proposals)
scale_factors = [
img_metas[i]['scale_factor'] for i in range(cls_scores[0].shape[0])
]
if with_nms:
# some heads don't support with_nms argument
result_list = self._get_bboxes(cls_score_list, bbox_pred_list,
mlvl_anchors, img_shapes,
scale_factors, cfg, rescale)
else:
result_list = self._get_bboxes(cls_score_list, bbox_pred_list,
mlvl_anchors, img_shapes,
scale_factors, cfg, rescale,
with_nms)
return result_list
def _get_bboxes_single(self,
cls_score_list,
bbox_pred_list,
mlvl_anchors,
img_shape,
scale_factor,
cfg,
rescale=False,
with_nms=True):
"""Transform outputs for a single batch item into bbox predictions.
def _get_bboxes(self,
cls_score_list,
bbox_pred_list,
mlvl_anchors,
img_shapes,
scale_factors,
cfg,
rescale=False,
with_nms=True):
"""Transform outputs for a batch item into bbox predictions.
Args:
cls_score_list (list[Tensor]): Box scores for a single scale level
Has shape (num_anchors * num_classes, H, W).
Has shape (N, num_anchors * num_classes, H, W).
bbox_pred_list (list[Tensor]): Box energies / deltas for a single
scale level with shape (num_anchors * 4, H, W).
scale level with shape (N, num_anchors * 4, H, W).
mlvl_anchors (list[Tensor]): Box reference for a single scale level
with shape (num_total_anchors, 4).
img_shape (tuple[int]): Shape of the input image,
(height, width, 3).
scale_factor (ndarray): Scale factor of the image arange as
(w_scale, h_scale, w_scale, h_scale).
img_shapes (list[tuple[int]]): Shape of the batch input image,
list[(height, width, 3)].
scale_factors (list[ndarray]): Scale factor of the batch
image arange as list[(w_scale, h_scale, w_scale, h_scale)].
cfg (mmcv.Config): Test / postprocessing configuration,
if None, test_cfg would be used.
rescale (bool): If True, return boxes in original image space.
@ -618,78 +618,113 @@ class AnchorHead(BaseDenseHead, BBoxTestMixin):
Default: True.
Returns:
Tensor: Labeled boxes in shape (n, 5), where the first 4 columns
are bounding box positions (tl_x, tl_y, br_x, br_y) and the
5-th column is a score between 0 and 1.
list[tuple[Tensor, Tensor]]: Each item in result_list is 2-tuple.
The first item is an (n, 5) tensor, where 5 represent
(tl_x, tl_y, br_x, br_y, score) and the score between 0 and 1.
The shape of the second tensor in the tuple is (n,), and
each element represents the class label of the corresponding
box.
"""
cfg = self.test_cfg if cfg is None else cfg
assert len(cls_score_list) == len(bbox_pred_list) == len(mlvl_anchors)
batch_size = cls_score_list[0].shape[0]
# convert to tensor to keep tracing
nms_pre_tensor = torch.tensor(
cfg.get('nms_pre', -1),
device=cls_score_list[0].device,
dtype=torch.long)
mlvl_bboxes = []
mlvl_scores = []
for cls_score, bbox_pred, anchors in zip(cls_score_list,
bbox_pred_list, mlvl_anchors):
assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
cls_score = cls_score.permute(1, 2,
0).reshape(-1, self.cls_out_channels)
cls_score = cls_score.permute(0, 2, 3,
1).reshape(batch_size, -1,
self.cls_out_channels)
if self.use_sigmoid_cls:
scores = cls_score.sigmoid()
else:
scores = cls_score.softmax(-1)
bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4)
bbox_pred = bbox_pred.permute(0, 2, 3,
1).reshape(batch_size, -1, 4)
anchors = anchors.expand_as(bbox_pred)
# Always keep topk op for dynamic input in onnx
if nms_pre_tensor > 0 and (torch.onnx.is_in_onnx_export()
or scores.shape[-2] > nms_pre_tensor):
from torch import _shape_as_tensor
# keep shape as tensor and get k
num_anchor = _shape_as_tensor(scores)[-2].to(nms_pre_tensor)
num_anchor = _shape_as_tensor(scores)[-2].to(
nms_pre_tensor.device)
nms_pre = torch.where(nms_pre_tensor < num_anchor,
nms_pre_tensor, num_anchor)
# Get maximum scores for foreground classes.
if self.use_sigmoid_cls:
max_scores, _ = scores.max(dim=1)
max_scores, _ = scores.max(-1)
else:
# remind that we set FG labels to [0, num_class-1]
# since mmdet v2.0
# BG cat_id: num_class
max_scores, _ = scores[:, :-1].max(dim=1)
max_scores, _ = scores[..., :-1].max(-1)
_, topk_inds = max_scores.topk(nms_pre)
anchors = anchors[topk_inds, :]
bbox_pred = bbox_pred[topk_inds, :]
scores = scores[topk_inds, :]
batch_inds = torch.arange(batch_size).view(
-1, 1).expand_as(topk_inds)
anchors = anchors[batch_inds, topk_inds, :]
bbox_pred = bbox_pred[batch_inds, topk_inds, :]
scores = scores[batch_inds, topk_inds, :]
bboxes = self.bbox_coder.decode(
anchors, bbox_pred, max_shape=img_shape)
anchors, bbox_pred, max_shape=img_shapes)
mlvl_bboxes.append(bboxes)
mlvl_scores.append(scores)
mlvl_bboxes = torch.cat(mlvl_bboxes)
batch_mlvl_bboxes = torch.cat(mlvl_bboxes, dim=1)
if rescale:
mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor)
mlvl_scores = torch.cat(mlvl_scores)
batch_mlvl_bboxes /= batch_mlvl_bboxes.new_tensor(
scale_factors).unsqueeze(1)
batch_mlvl_scores = torch.cat(mlvl_scores, dim=1)
# Set max number of box to be feed into nms in deployment
deploy_nms_pre = cfg.get('deploy_nms_pre', -1)
if deploy_nms_pre > 0 and torch.onnx.is_in_onnx_export():
max_scores, _ = mlvl_scores.max(dim=1)
_, topk_inds = max_scores.topk(deploy_nms_pre)
mlvl_scores = mlvl_scores[topk_inds, :]
mlvl_bboxes = mlvl_bboxes[topk_inds, :]
# Get maximum scores for foreground classes.
if self.use_sigmoid_cls:
batch_mlvl_scores, _ = batch_mlvl_scores.max(-1)
else:
# remind that we set FG labels to [0, num_class-1]
# since mmdet v2.0
# BG cat_id: num_class
batch_mlvl_scores, _ = batch_mlvl_scores[..., :-1].max(-1)
_, topk_inds = batch_mlvl_scores.topk(deploy_nms_pre)
batch_inds = torch.arange(batch_size).view(-1,
1).expand_as(topk_inds)
batch_mlvl_scores = batch_mlvl_scores[batch_inds, topk_inds]
batch_mlvl_bboxes = batch_mlvl_bboxes[batch_inds, topk_inds]
if self.use_sigmoid_cls:
# Add a dummy background class to the backend when using sigmoid
# remind that we set FG labels to [0, num_class-1] since mmdet v2.0
# BG cat_id: num_class
padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)
mlvl_scores = torch.cat([mlvl_scores, padding], dim=1)
padding = batch_mlvl_scores.new_zeros(batch_size,
batch_mlvl_scores.shape[1],
1)
batch_mlvl_scores = torch.cat([batch_mlvl_scores, padding], dim=-1)
if with_nms:
det_bboxes, det_labels = multiclass_nms(mlvl_bboxes, mlvl_scores,
cfg.score_thr, cfg.nms,
cfg.max_per_img)
return det_bboxes, det_labels
det_results = []
for (mlvl_bboxes, mlvl_scores) in zip(batch_mlvl_bboxes,
batch_mlvl_scores):
det_bbox, det_label = multiclass_nms(mlvl_bboxes, mlvl_scores,
cfg.score_thr, cfg.nms,
cfg.max_per_img)
det_results.append(tuple([det_bbox, det_label]))
else:
return mlvl_bboxes, mlvl_scores
det_results = [
tuple(mlvl_bs)
for mlvl_bs in zip(batch_mlvl_bboxes, batch_mlvl_scores)
]
return det_results
def aug_test(self, feats, img_metas, rescale=False):
"""Test function with test time augmentation.

@ -342,11 +342,11 @@ class ATSSHead(AnchorHead):
Returns:
list[tuple[Tensor, Tensor]]: Each item in result_list is 2-tuple.
The first item is an (n, 5) tensor, where the first 4 columns
are bounding box positions (tl_x, tl_y, br_x, br_y) and the
5-th column is a score between 0 and 1. The second item is a
(n,) tensor where each item is the predicted class label of the
corresponding box.
The first item is an (n, 5) tensor, where 5 represent
(tl_x, tl_y, br_x, br_y, score) and the score between 0 and 1.
The shape of the second tensor in the tuple is (n,), and
each element represents the class label of the corresponding
box.
"""
cfg = self.test_cfg if cfg is None else cfg
assert len(cls_scores) == len(bbox_preds)
@ -356,51 +356,47 @@ class ATSSHead(AnchorHead):
mlvl_anchors = self.anchor_generator.grid_anchors(
featmap_sizes, device=device)
result_list = []
for img_id in range(len(img_metas)):
cls_score_list = [
cls_scores[i][img_id].detach() for i in range(num_levels)
]
bbox_pred_list = [
bbox_preds[i][img_id].detach() for i in range(num_levels)
]
centerness_pred_list = [
centernesses[i][img_id].detach() for i in range(num_levels)
]
img_shape = img_metas[img_id]['img_shape']
scale_factor = img_metas[img_id]['scale_factor']
proposals = self._get_bboxes_single(cls_score_list, bbox_pred_list,
centerness_pred_list,
mlvl_anchors, img_shape,
scale_factor, cfg, rescale,
with_nms)
result_list.append(proposals)
cls_score_list = [cls_scores[i].detach() for i in range(num_levels)]
bbox_pred_list = [bbox_preds[i].detach() for i in range(num_levels)]
centerness_pred_list = [
centernesses[i].detach() for i in range(num_levels)
]
img_shapes = [
img_metas[i]['img_shape'] for i in range(cls_scores[0].shape[0])
]
scale_factors = [
img_metas[i]['scale_factor'] for i in range(cls_scores[0].shape[0])
]
result_list = self._get_bboxes(cls_score_list, bbox_pred_list,
centerness_pred_list, mlvl_anchors,
img_shapes, scale_factors, cfg, rescale,
with_nms)
return result_list
def _get_bboxes_single(self,
cls_scores,
bbox_preds,
centernesses,
mlvl_anchors,
img_shape,
scale_factor,
cfg,
rescale=False,
with_nms=True):
def _get_bboxes(self,
cls_scores,
bbox_preds,
centernesses,
mlvl_anchors,
img_shapes,
scale_factors,
cfg,
rescale=False,
with_nms=True):
"""Transform outputs for a single batch item into labeled boxes.
Args:
cls_scores (list[Tensor]): Box scores for a single scale level
with shape (num_anchors * num_classes, H, W).
with shape (N, num_anchors * num_classes, H, W).
bbox_preds (list[Tensor]): Box energies / deltas for a single
scale level with shape (num_anchors * 4, H, W).
scale level with shape (N, num_anchors * 4, H, W).
centernesses (list[Tensor]): Centerness for a single scale level
with shape (num_anchors * 1, H, W).
with shape (N, num_anchors * 1, H, W).
mlvl_anchors (list[Tensor]): Box reference for a single scale level
with shape (num_total_anchors, 4).
img_shape (tuple[int]): Shape of the input image,
(height, width, 3).
scale_factor (ndarray): Scale factor of the image arrange as
img_shapes (list[tuple[int]]): Shape of the input image,
list[(height, width, 3)].
scale_factors (list[ndarray]): Scale factor of the image arrange as
(w_scale, h_scale, w_scale, h_scale).
cfg (mmcv.Config | None): Test / postprocessing configuration,
if None, test_cfg would be used.
@ -410,64 +406,106 @@ class ATSSHead(AnchorHead):
Default: True.
Returns:
tuple(Tensor):
det_bboxes (Tensor): BBox predictions in shape (n, 5), where
the first 4 columns are bounding box positions
(tl_x, tl_y, br_x, br_y) and the 5-th column is a score
between 0 and 1.
det_labels (Tensor): A (n,) tensor where each item is the
predicted class label of the corresponding box.
list[tuple[Tensor, Tensor]]: Each item in result_list is 2-tuple.
The first item is an (n, 5) tensor, where 5 represent
(tl_x, tl_y, br_x, br_y, score) and the score between 0 and 1.
The shape of the second tensor in the tuple is (n,), and
each element represents the class label of the corresponding
box.
"""
assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors)
device = cls_scores[0].device
batch_size = cls_scores[0].shape[0]
# convert to tensor to keep tracing
nms_pre_tensor = torch.tensor(
cfg.get('nms_pre', -1), device=device, dtype=torch.long)
mlvl_bboxes = []
mlvl_scores = []
mlvl_centerness = []
for cls_score, bbox_pred, centerness, anchors in zip(
cls_scores, bbox_preds, centernesses, mlvl_anchors):
assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
scores = cls_score.permute(1, 2, 0).reshape(
-1, self.cls_out_channels).sigmoid()
bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4)
centerness = centerness.permute(1, 2, 0).reshape(-1).sigmoid()
nms_pre = cfg.get('nms_pre', -1)
if nms_pre > 0 and scores.shape[0] > nms_pre:
max_scores, _ = (scores * centerness[:, None]).max(dim=1)
scores = cls_score.permute(0, 2, 3, 1).reshape(
batch_size, -1, self.cls_out_channels).sigmoid()
centerness = centerness.permute(0, 2, 3,
1).reshape(batch_size,
-1).sigmoid()
bbox_pred = bbox_pred.permute(0, 2, 3,
1).reshape(batch_size, -1, 4)
# Always keep topk op for dynamic input in onnx
if nms_pre_tensor > 0 and (torch.onnx.is_in_onnx_export()
or scores.shape[-2] > nms_pre_tensor):
from torch import _shape_as_tensor
# keep shape as tensor and get k
num_anchor = _shape_as_tensor(scores)[-2].to(device)
nms_pre = torch.where(nms_pre_tensor < num_anchor,
nms_pre_tensor, num_anchor)
max_scores, _ = (scores * centerness[..., None]).max(-1)
_, topk_inds = max_scores.topk(nms_pre)
anchors = anchors[topk_inds, :]
bbox_pred = bbox_pred[topk_inds, :]
scores = scores[topk_inds, :]
centerness = centerness[topk_inds]
batch_inds = torch.arange(batch_size).view(
-1, 1).expand_as(topk_inds).long()
bbox_pred = bbox_pred[batch_inds, topk_inds, :]
scores = scores[batch_inds, topk_inds, :]
centerness = centerness[batch_inds, topk_inds]
else:
anchors = anchors.expand_as(bbox_pred)
bboxes = self.bbox_coder.decode(
anchors, bbox_pred, max_shape=img_shape)
anchors, bbox_pred, max_shape=img_shapes)
mlvl_bboxes.append(bboxes)
mlvl_scores.append(scores)
mlvl_centerness.append(centerness)
mlvl_bboxes = torch.cat(mlvl_bboxes)
batch_mlvl_bboxes = torch.cat(mlvl_bboxes, dim=1)
if rescale:
mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor)
mlvl_scores = torch.cat(mlvl_scores)
# Add a dummy background class to the backend when using sigmoid
batch_mlvl_bboxes /= batch_mlvl_bboxes.new_tensor(
scale_factors).unsqueeze(1)
batch_mlvl_scores = torch.cat(mlvl_scores, dim=1)
batch_mlvl_centerness = torch.cat(mlvl_centerness, dim=1)
# Set max number of box to be feed into nms in deployment
deploy_nms_pre = cfg.get('deploy_nms_pre', -1)
if deploy_nms_pre > 0 and torch.onnx.is_in_onnx_export():
batch_mlvl_scores, _ = (
batch_mlvl_scores *
batch_mlvl_centerness.unsqueeze(2).expand_as(batch_mlvl_scores)
).max(-1)
_, topk_inds = batch_mlvl_scores.topk(deploy_nms_pre)
batch_inds = torch.arange(batch_size).view(-1,
1).expand_as(topk_inds)
batch_mlvl_scores = batch_mlvl_scores[batch_inds, topk_inds, :]
batch_mlvl_bboxes = batch_mlvl_bboxes[batch_inds, topk_inds, :]
batch_mlvl_centerness = batch_mlvl_centerness[batch_inds,
topk_inds]
# remind that we set FG labels to [0, num_class-1] since mmdet v2.0
# BG cat_id: num_class
padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)
mlvl_scores = torch.cat([mlvl_scores, padding], dim=1)
mlvl_centerness = torch.cat(mlvl_centerness)
padding = batch_mlvl_scores.new_zeros(batch_size,
batch_mlvl_scores.shape[1], 1)
batch_mlvl_scores = torch.cat([batch_mlvl_scores, padding], dim=-1)
if with_nms:
det_bboxes, det_labels = multiclass_nms(
mlvl_bboxes,
mlvl_scores,
cfg.score_thr,
cfg.nms,
cfg.max_per_img,
score_factors=mlvl_centerness)
return det_bboxes, det_labels
det_results = []
for (mlvl_bboxes, mlvl_scores,
mlvl_centerness) in zip(batch_mlvl_bboxes, batch_mlvl_scores,
batch_mlvl_centerness):
det_bbox, det_label = multiclass_nms(
mlvl_bboxes,
mlvl_scores,
cfg.score_thr,
cfg.nms,
cfg.max_per_img,
score_factors=mlvl_centerness)
det_results.append(tuple([det_bbox, det_label]))
else:
return mlvl_bboxes, mlvl_scores, mlvl_centerness
det_results = [
tuple(mlvl_bs)
for mlvl_bs in zip(batch_mlvl_bboxes, batch_mlvl_scores,
batch_mlvl_centerness)
]
return det_results
def get_targets(self,
anchor_list,

@ -1,9 +1,12 @@
from __future__ import division
import copy
import warnings
import torch
import torch.nn as nn
from mmcv import ConfigDict
from mmcv.cnn import normal_init
from mmcv.ops import DeformConv2d
from mmcv.ops import DeformConv2d, batched_nms
from mmdet.core import (RegionAssigner, build_assigner, build_sampler,
images_to_levels, multi_apply)
@ -536,6 +539,133 @@ class StageCascadeRPNHead(RPNHead):
new_anchor_list.append(mlvl_anchors)
return new_anchor_list
# TODO: temporary plan
def _get_bboxes_single(self,
cls_scores,
bbox_preds,
mlvl_anchors,
img_shape,
scale_factor,
cfg,
rescale=False):
"""Transform outputs for a single batch item into bbox predictions.
Args:
cls_scores (list[Tensor]): Box scores for each scale level
Has shape (num_anchors * num_classes, H, W).
bbox_preds (list[Tensor]): Box energies / deltas for each scale
level with shape (num_anchors * 4, H, W).
mlvl_anchors (list[Tensor]): Box reference for each scale level
with shape (num_total_anchors, 4).
img_shape (tuple[int]): Shape of the input image,
(height, width, 3).
scale_factor (ndarray): Scale factor of the image arange as
(w_scale, h_scale, w_scale, h_scale).
cfg (mmcv.Config): Test / postprocessing configuration,
if None, test_cfg would be used.
rescale (bool): If True, return boxes in original image space.
Returns:
Tensor: Labeled boxes have the shape of (n,5), where the
first 4 columns are bounding box positions
(tl_x, tl_y, br_x, br_y) and the 5-th column is a score
between 0 and 1.
"""
cfg = self.test_cfg if cfg is None else cfg
cfg = copy.deepcopy(cfg)
# bboxes from different level should be independent during NMS,
# level_ids are used as labels for batched NMS to separate them
level_ids = []
mlvl_scores = []
mlvl_bbox_preds = []
mlvl_valid_anchors = []
for idx in range(len(cls_scores)):
rpn_cls_score = cls_scores[idx]
rpn_bbox_pred = bbox_preds[idx]
assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:]
rpn_cls_score = rpn_cls_score.permute(1, 2, 0)
if self.use_sigmoid_cls:
rpn_cls_score = rpn_cls_score.reshape(-1)
scores = rpn_cls_score.sigmoid()
else:
rpn_cls_score = rpn_cls_score.reshape(-1, 2)
# We set FG labels to [0, num_class-1] and BG label to
# num_class in RPN head since mmdet v2.5, which is unified to
# be consistent with other head since mmdet v2.0. In mmdet v2.0
# to v2.4 we keep BG label as 0 and FG label as 1 in rpn head.
scores = rpn_cls_score.softmax(dim=1)[:, 0]
rpn_bbox_pred = rpn_bbox_pred.permute(1, 2, 0).reshape(-1, 4)
anchors = mlvl_anchors[idx]
if cfg.nms_pre > 0 and scores.shape[0] > cfg.nms_pre:
# sort is faster than topk
# _, topk_inds = scores.topk(cfg.nms_pre)
if torch.onnx.is_in_onnx_export():
# sort op will be converted to TopK in onnx
# and k<=3480 in TensorRT
_, topk_inds = scores.topk(cfg.nms_pre)
scores = scores[topk_inds]
else:
ranked_scores, rank_inds = scores.sort(descending=True)
topk_inds = rank_inds[:cfg.nms_pre]
scores = ranked_scores[:cfg.nms_pre]
rpn_bbox_pred = rpn_bbox_pred[topk_inds, :]
anchors = anchors[topk_inds, :]
mlvl_scores.append(scores)
mlvl_bbox_preds.append(rpn_bbox_pred)
mlvl_valid_anchors.append(anchors)
level_ids.append(
scores.new_full((scores.size(0), ), idx, dtype=torch.long))
scores = torch.cat(mlvl_scores)
anchors = torch.cat(mlvl_valid_anchors)
rpn_bbox_pred = torch.cat(mlvl_bbox_preds)
proposals = self.bbox_coder.decode(
anchors, rpn_bbox_pred, max_shape=img_shape)
ids = torch.cat(level_ids)
# Skip nonzero op while exporting to ONNX
if cfg.min_bbox_size > 0 and (not torch.onnx.is_in_onnx_export()):
w = proposals[:, 2] - proposals[:, 0]
h = proposals[:, 3] - proposals[:, 1]
valid_inds = torch.nonzero(
(w >= cfg.min_bbox_size)
& (h >= cfg.min_bbox_size),
as_tuple=False).squeeze()
if valid_inds.sum().item() != len(proposals):
proposals = proposals[valid_inds, :]
scores = scores[valid_inds]
ids = ids[valid_inds]
# deprecate arguments warning
if 'nms' not in cfg or 'max_num' in cfg or 'nms_thr' in cfg:
warnings.warn(
'In rpn_proposal or test_cfg, '
'nms_thr has been moved to a dict named nms as '
'iou_threshold, max_num has been renamed as max_per_img, '
'name of original arguments and the way to specify '
'iou_threshold of NMS will be deprecated.')
if 'nms' not in cfg:
cfg.nms = ConfigDict(dict(type='nms', iou_threshold=cfg.nms_thr))
if 'max_num' in cfg:
if 'max_per_img' in cfg:
assert cfg.max_num == cfg.max_per_img, f'You ' \
f'set max_num and ' \
f'max_per_img at the same time, but get {cfg.max_num} ' \
f'and {cfg.max_per_img} respectively' \
'Please delete max_num which will be deprecated.'
else:
cfg.max_per_img = cfg.max_num
if 'nms_thr' in cfg:
assert cfg.nms.iou_threshold == cfg.nms_thr, f'You set' \
f' iou_threshold in nms and ' \
f'nms_thr at the same time, but get' \
f' {cfg.nms.iou_threshold} and {cfg.nms_thr}' \
f' respectively. Please delete the nms_thr ' \
f'which will be deprecated.'
dets, keep = batched_nms(proposals, scores, ids, cfg.nms)
return dets[:cfg.max_per_img]
@HEADS.register_module()
class CascadeRPNHead(BaseDenseHead):

@ -54,7 +54,10 @@ class BBoxTestMixin(object):
# check with_nms argument
gb_sig = signature(self.get_bboxes)
gb_args = [p.name for p in gb_sig.parameters.values()]
gbs_sig = signature(self._get_bboxes_single)
if hasattr(self, '_get_bboxes'):
gbs_sig = signature(self._get_bboxes)
else:
gbs_sig = signature(self._get_bboxes_single)
gbs_args = [p.name for p in gbs_sig.parameters.values()]
assert ('with_nms' in gb_args) and ('with_nms' in gbs_args), \
f'{self.__class__.__name__}' \

@ -282,11 +282,11 @@ class FCOSHead(AnchorFreeHead):
Returns:
list[tuple[Tensor, Tensor]]: Each item in result_list is 2-tuple.
The first item is an (n, 5) tensor, where the first 4 columns
are bounding box positions (tl_x, tl_y, br_x, br_y) and the
5-th column is a score between 0 and 1. The second item is a
(n,) tensor where each item is the predicted class label of the
corresponding box.
The first item is an (n, 5) tensor, where 5 represent
(tl_x, tl_y, br_x, br_y, score) and the score between 0 and 1.
The shape of the second tensor in the tuple is (n,), and
each element represents the class label of the corresponding
box.
"""
assert len(cls_scores) == len(bbox_preds)
num_levels = len(cls_scores)
@ -294,49 +294,55 @@ class FCOSHead(AnchorFreeHead):
featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]
mlvl_points = self.get_points(featmap_sizes, bbox_preds[0].dtype,
bbox_preds[0].device)
result_list = []
for img_id in range(len(img_metas)):
cls_score_list = [
cls_scores[i][img_id].detach() for i in range(num_levels)
]
bbox_pred_list = [
bbox_preds[i][img_id].detach() for i in range(num_levels)
]
centerness_pred_list = [
centernesses[i][img_id].detach() for i in range(num_levels)
cls_score_list = [cls_scores[i].detach() for i in range(num_levels)]
bbox_pred_list = [bbox_preds[i].detach() for i in range(num_levels)]
centerness_pred_list = [
centernesses[i].detach() for i in range(num_levels)
]
if torch.onnx.is_in_onnx_export():
assert len(
img_metas
) == 1, 'Only support one input image while in exporting to ONNX'
img_shapes = img_metas[0]['img_shape_for_onnx']
else:
img_shapes = [
img_metas[i]['img_shape']
for i in range(cls_scores[0].shape[0])
]
img_shape = img_metas[img_id]['img_shape']
scale_factor = img_metas[img_id]['scale_factor']
det_bboxes = self._get_bboxes_single(
cls_score_list, bbox_pred_list, centerness_pred_list,
mlvl_points, img_shape, scale_factor, cfg, rescale, with_nms)
result_list.append(det_bboxes)
scale_factors = [
img_metas[i]['scale_factor'] for i in range(cls_scores[0].shape[0])
]
result_list = self._get_bboxes(cls_score_list, bbox_pred_list,
centerness_pred_list, mlvl_points,
img_shapes, scale_factors, cfg, rescale,
with_nms)
return result_list
def _get_bboxes_single(self,
cls_scores,
bbox_preds,
centernesses,
mlvl_points,
img_shape,
scale_factor,
cfg,
rescale=False,
with_nms=True):
def _get_bboxes(self,
cls_scores,
bbox_preds,
centernesses,
mlvl_points,
img_shapes,
scale_factors,
cfg,
rescale=False,
with_nms=True):
"""Transform outputs for a single batch item into bbox predictions.
Args:
cls_scores (list[Tensor]): Box scores for a single scale level
with shape (num_points * num_classes, H, W).
with shape (N, num_points * num_classes, H, W).
bbox_preds (list[Tensor]): Box energies / deltas for a single scale
level with shape (num_points * 4, H, W).
level with shape (N, num_points * 4, H, W).
centernesses (list[Tensor]): Centerness for a single scale level
with shape (num_points * 4, H, W).
with shape (N, num_points * 4, H, W).
mlvl_points (list[Tensor]): Box reference for a single scale level
with shape (num_total_points, 4).
img_shape (tuple[int]): Shape of the input image,
(height, width, 3).
scale_factor (ndarray): Scale factor of the image arrange as
img_shapes (list[tuple[int]]): Shape of the input image,
list[(height, width, 3)].
scale_factors (list[ndarray]): Scale factor of the image arrange as
(w_scale, h_scale, w_scale, h_scale).
cfg (mmcv.Config | None): Test / postprocessing configuration,
if None, test_cfg would be used.
@ -356,59 +362,96 @@ class FCOSHead(AnchorFreeHead):
"""
cfg = self.test_cfg if cfg is None else cfg
assert len(cls_scores) == len(bbox_preds) == len(mlvl_points)
device = cls_scores[0].device
batch_size = cls_scores[0].shape[0]
# convert to tensor to keep tracing
nms_pre_tensor = torch.tensor(
cfg.get('nms_pre', -1), device=device, dtype=torch.long)
mlvl_bboxes = []
mlvl_scores = []
mlvl_centerness = []
for cls_score, bbox_pred, centerness, points in zip(
cls_scores, bbox_preds, centernesses, mlvl_points):
assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
scores = cls_score.permute(1, 2, 0).reshape(
-1, self.cls_out_channels).sigmoid()
centerness = centerness.permute(1, 2, 0).reshape(-1).sigmoid()
bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4)
nms_pre = cfg.get('nms_pre', -1)
if nms_pre > 0 and scores.shape[0] > nms_pre:
max_scores, _ = (scores * centerness[:, None]).max(dim=1)
scores = cls_score.permute(0, 2, 3, 1).reshape(
batch_size, -1, self.cls_out_channels).sigmoid()
centerness = centerness.permute(0, 2, 3,
1).reshape(batch_size,
-1).sigmoid()
bbox_pred = bbox_pred.permute(0, 2, 3,
1).reshape(batch_size, -1, 4)
# Always keep topk op for dynamic input in onnx
if nms_pre_tensor > 0 and (torch.onnx.is_in_onnx_export()
or scores.shape[-2] > nms_pre_tensor):
from torch import _shape_as_tensor
# keep shape as tensor and get k
num_anchor = _shape_as_tensor(scores)[-2].to(device)
nms_pre = torch.where(nms_pre_tensor < num_anchor,
nms_pre_tensor, num_anchor)
max_scores, _ = (scores * centerness[..., None]).max(-1)
_, topk_inds = max_scores.topk(nms_pre)
points = points[topk_inds, :]
bbox_pred = bbox_pred[topk_inds, :]
scores = scores[topk_inds, :]
centerness = centerness[topk_inds]
bboxes = distance2bbox(points, bbox_pred, max_shape=img_shape)
batch_inds = torch.arange(batch_size).view(
-1, 1).expand_as(topk_inds).long()
bbox_pred = bbox_pred[batch_inds, topk_inds, :]
scores = scores[batch_inds, topk_inds, :]
centerness = centerness[batch_inds, topk_inds]
bboxes = distance2bbox(points, bbox_pred, max_shape=img_shapes)
mlvl_bboxes.append(bboxes)
mlvl_scores.append(scores)
mlvl_centerness.append(centerness)
mlvl_bboxes = torch.cat(mlvl_bboxes)
batch_mlvl_bboxes = torch.cat(mlvl_bboxes, dim=1)
if rescale:
mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor)
mlvl_scores = torch.cat(mlvl_scores)
mlvl_centerness = torch.cat(mlvl_centerness)
batch_mlvl_bboxes /= batch_mlvl_bboxes.new_tensor(
scale_factors).unsqueeze(1)
batch_mlvl_scores = torch.cat(mlvl_scores, dim=1)
batch_mlvl_centerness = torch.cat(mlvl_centerness, dim=1)
# Set max number of box to be feed into nms in deployment
deploy_nms_pre = cfg.get('deploy_nms_pre', -1)
if deploy_nms_pre > 0 and torch.onnx.is_in_onnx_export():
max_scores, _ = (mlvl_scores * mlvl_centerness[:, None]).max(dim=1)
_, topk_inds = max_scores.topk(deploy_nms_pre)
mlvl_scores = mlvl_scores[topk_inds, :]
mlvl_bboxes = mlvl_bboxes[topk_inds, :]
mlvl_centerness = mlvl_centerness[topk_inds]
padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)
batch_mlvl_scores, _ = (
batch_mlvl_scores *
batch_mlvl_centerness.unsqueeze(2).expand_as(batch_mlvl_scores)
).max(-1)
_, topk_inds = batch_mlvl_scores.topk(deploy_nms_pre)
batch_inds = torch.arange(batch_mlvl_scores.shape[0]).view(
-1, 1).expand_as(topk_inds)
batch_mlvl_scores = batch_mlvl_scores[batch_inds, topk_inds, :]
batch_mlvl_bboxes = batch_mlvl_bboxes[batch_inds, topk_inds, :]
batch_mlvl_centerness = batch_mlvl_centerness[batch_inds,
topk_inds]
# remind that we set FG labels to [0, num_class-1] since mmdet v2.0
# BG cat_id: num_class
mlvl_scores = torch.cat([mlvl_scores, padding], dim=1)
padding = batch_mlvl_scores.new_zeros(batch_size,
batch_mlvl_scores.shape[1], 1)
batch_mlvl_scores = torch.cat([batch_mlvl_scores, padding], dim=-1)
if with_nms:
det_bboxes, det_labels = multiclass_nms(
mlvl_bboxes,
mlvl_scores,
cfg.score_thr,
cfg.nms,
cfg.max_per_img,
score_factors=mlvl_centerness)
return det_bboxes, det_labels
det_results = []
for (mlvl_bboxes, mlvl_scores,
mlvl_centerness) in zip(batch_mlvl_bboxes, batch_mlvl_scores,
batch_mlvl_centerness):
det_bbox, det_label = multiclass_nms(
mlvl_bboxes,
mlvl_scores,
cfg.score_thr,
cfg.nms,
cfg.max_per_img,
score_factors=mlvl_centerness)
det_results.append(tuple([det_bbox, det_label]))
else:
return mlvl_bboxes, mlvl_scores, mlvl_centerness
det_results = [
tuple(mlvl_bs)
for mlvl_bs in zip(batch_mlvl_bboxes, batch_mlvl_scores,
batch_mlvl_centerness)
]
return det_results
def _get_points_single(self,
featmap_size,

@ -202,8 +202,8 @@ class GFLHead(AnchorHead):
Returns:
Tensor: Anchor centers with shape (N, 2), "xy" format.
"""
anchors_cx = (anchors[:, 2] + anchors[:, 0]) / 2
anchors_cy = (anchors[:, 3] + anchors[:, 1]) / 2
anchors_cx = (anchors[..., 2] + anchors[..., 0]) / 2
anchors_cy = (anchors[..., 3] + anchors[..., 1]) / 2
return torch.stack([anchors_cx, anchors_cy], dim=-1)
def loss_single(self, anchors, cls_score, bbox_pred, labels, label_weights,
@ -368,28 +368,28 @@ class GFLHead(AnchorHead):
return dict(
loss_cls=losses_cls, loss_bbox=losses_bbox, loss_dfl=losses_dfl)
def _get_bboxes_single(self,
cls_scores,
bbox_preds,
mlvl_anchors,
img_shape,
scale_factor,
cfg,
rescale=False,
with_nms=True):
def _get_bboxes(self,
cls_scores,
bbox_preds,
mlvl_anchors,
img_shapes,
scale_factors,
cfg,
rescale=False,
with_nms=True):
"""Transform outputs for a single batch item into labeled boxes.
Args:
cls_scores (list[Tensor]): Box scores for a single scale level
has shape (num_classes, H, W).
has shape (N, num_classes, H, W).
bbox_preds (list[Tensor]): Box distribution logits for a single
scale level with shape (4*(n+1), H, W), n is max value of
scale level with shape (N, 4*(n+1), H, W), n is max value of
integral set.
mlvl_anchors (list[Tensor]): Box reference for a single scale level
with shape (num_total_anchors, 4).
img_shape (tuple[int]): Shape of the input image,
(height, width, 3).
scale_factor (ndarray): Scale factor of the image arange as
img_shapes (list[tuple[int]]): Shape of the input image,
list[(height, width, 3)].
scale_factors (list[ndarray]): Scale factor of the image arange as
(w_scale, h_scale, w_scale, h_scale).
cfg (mmcv.Config | None): Test / postprocessing configuration,
if None, test_cfg would be used.
@ -399,16 +399,17 @@ class GFLHead(AnchorHead):
Default: True.
Returns:
tuple(Tensor):
det_bboxes (Tensor): Bbox predictions in shape (N, 5), where
the first 4 columns are bounding box positions
(tl_x, tl_y, br_x, br_y) and the 5-th column is a score
between 0 and 1.
det_labels (Tensor): A (N,) tensor where each item is the
predicted class label of the corresponding box.
list[tuple[Tensor, Tensor]]: Each item in result_list is 2-tuple.
The first item is an (n, 5) tensor, where 5 represent
(tl_x, tl_y, br_x, br_y, score) and the score between 0 and 1.
The shape of the second tensor in the tuple is (n,), and
each element represents the class label of the corresponding
box.
"""
cfg = self.test_cfg if cfg is None else cfg
assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors)
batch_size = cls_scores[0].shape[0]
mlvl_bboxes = []
mlvl_scores = []
for cls_score, bbox_pred, stride, anchors in zip(
@ -416,43 +417,57 @@ class GFLHead(AnchorHead):
mlvl_anchors):
assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
assert stride[0] == stride[1]
scores = cls_score.permute(0, 2, 3, 1).reshape(
batch_size, -1, self.cls_out_channels).sigmoid()
bbox_pred = bbox_pred.permute(0, 2, 3, 1)
scores = cls_score.permute(1, 2, 0).reshape(
-1, self.cls_out_channels).sigmoid()
bbox_pred = bbox_pred.permute(1, 2, 0)
bbox_pred = self.integral(bbox_pred) * stride[0]
bbox_pred = bbox_pred.reshape(batch_size, -1, 4)
nms_pre = cfg.get('nms_pre', -1)
if nms_pre > 0 and scores.shape[0] > nms_pre:
max_scores, _ = scores.max(dim=1)
if nms_pre > 0 and scores.shape[1] > nms_pre:
max_scores, _ = scores.max(-1)
_, topk_inds = max_scores.topk(nms_pre)
batch_inds = torch.arange(batch_size).view(
-1, 1).expand_as(topk_inds).long()
anchors = anchors[topk_inds, :]
bbox_pred = bbox_pred[topk_inds, :]
scores = scores[topk_inds, :]
bbox_pred = bbox_pred[batch_inds, topk_inds, :]
scores = scores[batch_inds, topk_inds, :]
else:
anchors = anchors.expand_as(bbox_pred)
bboxes = distance2bbox(
self.anchor_center(anchors), bbox_pred, max_shape=img_shape)
self.anchor_center(anchors), bbox_pred, max_shape=img_shapes)
mlvl_bboxes.append(bboxes)
mlvl_scores.append(scores)
mlvl_bboxes = torch.cat(mlvl_bboxes)
batch_mlvl_bboxes = torch.cat(mlvl_bboxes, dim=1)
if rescale:
mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor)
batch_mlvl_bboxes /= batch_mlvl_bboxes.new_tensor(
scale_factors).unsqueeze(1)
mlvl_scores = torch.cat(mlvl_scores)
batch_mlvl_scores = torch.cat(mlvl_scores, dim=1)
# Add a dummy background class to the backend when using sigmoid
# remind that we set FG labels to [0, num_class-1] since mmdet v2.0
# BG cat_id: num_class
padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)
mlvl_scores = torch.cat([mlvl_scores, padding], dim=1)
padding = batch_mlvl_scores.new_zeros(batch_size,
batch_mlvl_scores.shape[1], 1)
batch_mlvl_scores = torch.cat([batch_mlvl_scores, padding], dim=-1)
if with_nms:
det_bboxes, det_labels = multiclass_nms(mlvl_bboxes, mlvl_scores,
cfg.score_thr, cfg.nms,
cfg.max_per_img)
return det_bboxes, det_labels
det_results = []
for (mlvl_bboxes, mlvl_scores) in zip(batch_mlvl_bboxes,
batch_mlvl_scores):
det_bbox, det_label = multiclass_nms(mlvl_bboxes, mlvl_scores,
cfg.score_thr, cfg.nms,
cfg.max_per_img)
det_results.append(tuple([det_bbox, det_label]))
else:
return mlvl_bboxes, mlvl_scores
det_results = [
tuple(mlvl_bs)
for mlvl_bs in zip(batch_mlvl_bboxes, batch_mlvl_scores)
]
return det_results
def get_targets(self,
anchor_list,

@ -516,25 +516,27 @@ class PAAHead(ATSSHead):
label_channels=1,
unmap_outputs=True)
def _get_bboxes_single(self,
cls_scores,
bbox_preds,
iou_preds,
mlvl_anchors,
img_shape,
scale_factor,
cfg,
rescale=False,
with_nms=True):
def _get_bboxes(self,
cls_scores,
bbox_preds,
iou_preds,
mlvl_anchors,
img_shapes,
scale_factors,
cfg,
rescale=False,
with_nms=True):
"""Transform outputs for a single batch item into labeled boxes.
This method is almost same as `ATSSHead._get_bboxes_single()`.
This method is almost same as `ATSSHead._get_bboxes()`.
We use sqrt(iou_preds * cls_scores) in NMS process instead of just
cls_scores. Besides, score voting is used when `` score_voting``
is set to True.
"""
assert with_nms, 'PAA only supports "with_nms=True" now'
assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors)
batch_size = cls_scores[0].shape[0]
mlvl_bboxes = []
mlvl_scores = []
mlvl_iou_preds = []
@ -542,50 +544,64 @@ class PAAHead(ATSSHead):
cls_scores, bbox_preds, iou_preds, mlvl_anchors):
assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
scores = cls_score.permute(1, 2, 0).reshape(
-1, self.cls_out_channels).sigmoid()
bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4)
iou_preds = iou_preds.permute(1, 2, 0).reshape(-1).sigmoid()
scores = cls_score.permute(0, 2, 3, 1).reshape(
batch_size, -1, self.cls_out_channels).sigmoid()
bbox_pred = bbox_pred.permute(0, 2, 3,
1).reshape(batch_size, -1, 4)
iou_preds = iou_preds.permute(0, 2, 3, 1).reshape(batch_size,
-1).sigmoid()
nms_pre = cfg.get('nms_pre', -1)
if nms_pre > 0 and scores.shape[0] > nms_pre:
max_scores, _ = (scores * iou_preds[:, None]).sqrt().max(dim=1)
if nms_pre > 0 and scores.shape[1] > nms_pre:
max_scores, _ = (scores * iou_preds[..., None]).sqrt().max(-1)
_, topk_inds = max_scores.topk(nms_pre)
batch_inds = torch.arange(batch_size).view(
-1, 1).expand_as(topk_inds).long()
anchors = anchors[topk_inds, :]
bbox_pred = bbox_pred[topk_inds, :]
scores = scores[topk_inds, :]
iou_preds = iou_preds[topk_inds]
bbox_pred = bbox_pred[batch_inds, topk_inds, :]
scores = scores[batch_inds, topk_inds, :]
iou_preds = iou_preds[batch_inds, topk_inds]
else:
anchors = anchors.expand_as(bbox_pred)
bboxes = self.bbox_coder.decode(
anchors, bbox_pred, max_shape=img_shape)
anchors, bbox_pred, max_shape=img_shapes)
mlvl_bboxes.append(bboxes)
mlvl_scores.append(scores)
mlvl_iou_preds.append(iou_preds)
mlvl_bboxes = torch.cat(mlvl_bboxes)
batch_mlvl_bboxes = torch.cat(mlvl_bboxes, dim=1)
if rescale:
mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor)
mlvl_scores = torch.cat(mlvl_scores)
batch_mlvl_bboxes /= batch_mlvl_bboxes.new_tensor(
scale_factors).unsqueeze(1)
batch_mlvl_scores = torch.cat(mlvl_scores, dim=1)
# Add a dummy background class to the backend when using sigmoid
# remind that we set FG labels to [0, num_class-1] since mmdet v2.0
# BG cat_id: num_class
padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)
mlvl_scores = torch.cat([mlvl_scores, padding], dim=1)
mlvl_iou_preds = torch.cat(mlvl_iou_preds)
mlvl_nms_scores = (mlvl_scores * mlvl_iou_preds[:, None]).sqrt()
det_bboxes, det_labels = multiclass_nms(
mlvl_bboxes,
mlvl_nms_scores,
cfg.score_thr,
cfg.nms,
cfg.max_per_img,
score_factors=None)
if self.with_score_voting and len(det_bboxes) > 0:
det_bboxes, det_labels = self.score_voting(det_bboxes, det_labels,
mlvl_bboxes,
mlvl_nms_scores,
cfg.score_thr)
return det_bboxes, det_labels
padding = batch_mlvl_scores.new_zeros(batch_size,
batch_mlvl_scores.shape[1], 1)
batch_mlvl_scores = torch.cat([batch_mlvl_scores, padding], dim=-1)
batch_mlvl_iou_preds = torch.cat(mlvl_iou_preds, dim=1)
batch_mlvl_nms_scores = (batch_mlvl_scores *
batch_mlvl_iou_preds[..., None]).sqrt()
det_results = []
for (mlvl_bboxes, mlvl_scores) in zip(batch_mlvl_bboxes,
batch_mlvl_nms_scores):
det_bbox, det_label = multiclass_nms(
mlvl_bboxes,
mlvl_scores,
cfg.score_thr,
cfg.nms,
cfg.max_per_img,
score_factors=None)
if self.with_score_voting and len(det_bbox) > 0:
det_bbox, det_label = self.score_voting(
det_bbox, det_label, mlvl_bboxes, mlvl_scores,
cfg.score_thr)
det_results.append(tuple([det_bbox, det_label]))
return det_results
def score_voting(self, det_bboxes, det_labels, mlvl_bboxes,
mlvl_nms_scores, score_thr):
@ -602,7 +618,7 @@ class PAAHead(ATSSHead):
with shape (num_anchors,4).
mlvl_nms_scores (Tensor): The scores of all boxes which is used
in the NMS procedure, with shape (num_anchors, num_class)
mlvl_iou_preds (Tensot): The predictions of IOU of all boxes
mlvl_iou_preds (Tensor): The predictions of IOU of all boxes
before the NMS procedure, with shape (num_anchors, 1)
score_thr (float): The score threshold of bboxes.

@ -79,35 +79,38 @@ class RPNHead(RPNTestMixin, AnchorHead):
return dict(
loss_rpn_cls=losses['loss_cls'], loss_rpn_bbox=losses['loss_bbox'])
def _get_bboxes_single(self,
cls_scores,
bbox_preds,
mlvl_anchors,
img_shape,
scale_factor,
cfg,
rescale=False):
def _get_bboxes(self,
cls_scores,
bbox_preds,
mlvl_anchors,
img_shapes,
scale_factors,
cfg,
rescale=False):
"""Transform outputs for a single batch item into bbox predictions.
Args:
cls_scores (list[Tensor]): Box scores for each scale level
Has shape (num_anchors * num_classes, H, W).
Has shape (N, num_anchors * num_classes, H, W).
bbox_preds (list[Tensor]): Box energies / deltas for each scale
level with shape (num_anchors * 4, H, W).
level with shape (N, num_anchors * 4, H, W).
mlvl_anchors (list[Tensor]): Box reference for each scale level
with shape (num_total_anchors, 4).
img_shape (tuple[int]): Shape of the input image,
img_shapes (list[tuple[int]]): Shape of the input image,
(height, width, 3).
scale_factor (ndarray): Scale factor of the image arange as
scale_factors (list[ndarray]): Scale factor of the image arange as
(w_scale, h_scale, w_scale, h_scale).
cfg (mmcv.Config): Test / postprocessing configuration,
if None, test_cfg would be used.
rescale (bool): If True, return boxes in original image space.
Returns:
Tensor: Labeled boxes in shape (n, 5), where the first 4 columns
list[tuple[Tensor, Tensor]]: Each item in result_list is 2-tuple.
The first item is an (n, 5) tensor, where the first 4 columns
are bounding box positions (tl_x, tl_y, br_x, br_y) and the
5-th column is a score between 0 and 1.
5-th column is a score between 0 and 1. The second item is a
(n,) tensor where each item is the predicted class labelof the
corresponding box.
"""
cfg = self.test_cfg if cfg is None else cfg
cfg = copy.deepcopy(cfg)
@ -117,26 +120,29 @@ class RPNHead(RPNTestMixin, AnchorHead):
mlvl_scores = []
mlvl_bbox_preds = []
mlvl_valid_anchors = []
batch_size = cls_scores[0].shape[0]
nms_pre_tensor = torch.tensor(
cfg.nms_pre, device=cls_scores[0].device, dtype=torch.long)
for idx in range(len(cls_scores)):
rpn_cls_score = cls_scores[idx]
rpn_bbox_pred = bbox_preds[idx]
assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:]
rpn_cls_score = rpn_cls_score.permute(1, 2, 0)
rpn_cls_score = rpn_cls_score.permute(0, 2, 3, 1)
if self.use_sigmoid_cls:
rpn_cls_score = rpn_cls_score.reshape(-1)
rpn_cls_score = rpn_cls_score.reshape(batch_size, -1)
scores = rpn_cls_score.sigmoid()
else:
rpn_cls_score = rpn_cls_score.reshape(-1, 2)
rpn_cls_score = rpn_cls_score.reshape(batch_size, -1, 2)
# We set FG labels to [0, num_class-1] and BG label to
# num_class in RPN head since mmdet v2.5, which is unified to
# be consistent with other head since mmdet v2.0. In mmdet v2.0
# to v2.4 we keep BG label as 0 and FG label as 1 in rpn head.
scores = rpn_cls_score.softmax(dim=1)[:, 0]
rpn_bbox_pred = rpn_bbox_pred.permute(1, 2, 0).reshape(-1, 4)
scores = rpn_cls_score.softmax(-1)[..., 0]
rpn_bbox_pred = rpn_bbox_pred.permute(0, 2, 3, 1).reshape(
batch_size, -1, 4)
anchors = mlvl_anchors[idx]
if cfg.nms_pre > 0:
anchors = anchors.expand_as(rpn_bbox_pred)
if nms_pre_tensor > 0:
# sort is faster than topk
# _, topk_inds = scores.topk(cfg.nms_pre)
# keep topk op for dynamic k in onnx model
@ -144,43 +150,41 @@ class RPNHead(RPNTestMixin, AnchorHead):
# sort op will be converted to TopK in onnx
# and k<=3480 in TensorRT
scores_shape = torch._shape_as_tensor(scores)
nms_pre = torch.where(scores_shape[0] < nms_pre_tensor,
scores_shape[0], nms_pre_tensor)
nms_pre = torch.where(scores_shape[1] < nms_pre_tensor,
scores_shape[1], nms_pre_tensor)
_, topk_inds = scores.topk(nms_pre)
scores = scores[topk_inds]
rpn_bbox_pred = rpn_bbox_pred[topk_inds, :]
anchors = anchors[topk_inds, :]
elif scores.shape[0] > cfg.nms_pre:
batch_inds = torch.arange(batch_size).view(
-1, 1).expand_as(topk_inds)
scores = scores[batch_inds, topk_inds]
rpn_bbox_pred = rpn_bbox_pred[batch_inds, topk_inds, :]
anchors = anchors[batch_inds, topk_inds, :]
elif scores.shape[-1] > cfg.nms_pre:
ranked_scores, rank_inds = scores.sort(descending=True)
topk_inds = rank_inds[:cfg.nms_pre]
scores = ranked_scores[:cfg.nms_pre]
rpn_bbox_pred = rpn_bbox_pred[topk_inds, :]
anchors = anchors[topk_inds, :]
topk_inds = rank_inds[:, :cfg.nms_pre]
scores = ranked_scores[:, :cfg.nms_pre]
batch_inds = torch.arange(batch_size).view(
-1, 1).expand_as(topk_inds)
rpn_bbox_pred = rpn_bbox_pred[batch_inds, topk_inds, :]
anchors = anchors[batch_inds, topk_inds, :]
mlvl_scores.append(scores)
mlvl_bbox_preds.append(rpn_bbox_pred)
mlvl_valid_anchors.append(anchors)
level_ids.append(
scores.new_full((scores.size(0), ), idx, dtype=torch.long))
scores = torch.cat(mlvl_scores)
anchors = torch.cat(mlvl_valid_anchors)
rpn_bbox_pred = torch.cat(mlvl_bbox_preds)
proposals = self.bbox_coder.decode(
anchors, rpn_bbox_pred, max_shape=img_shape)
ids = torch.cat(level_ids)
# Skip nonzero op while exporting to ONNX
if cfg.min_bbox_size > 0 and (not torch.onnx.is_in_onnx_export()):
w = proposals[:, 2] - proposals[:, 0]
h = proposals[:, 3] - proposals[:, 1]
valid_inds = torch.nonzero(
(w >= cfg.min_bbox_size)
& (h >= cfg.min_bbox_size),
as_tuple=False).squeeze()
if valid_inds.sum().item() != len(proposals):
proposals = proposals[valid_inds, :]
scores = scores[valid_inds]
ids = ids[valid_inds]
scores.new_full((
batch_size,
scores.size(1),
),
idx,
dtype=torch.long))
batch_mlvl_scores = torch.cat(mlvl_scores, dim=1)
batch_mlvl_anchors = torch.cat(mlvl_valid_anchors, dim=1)
batch_mlvl_rpn_bbox_pred = torch.cat(mlvl_bbox_preds, dim=1)
batch_mlvl_proposals = self.bbox_coder.decode(
batch_mlvl_anchors, batch_mlvl_rpn_bbox_pred, max_shape=img_shapes)
batch_mlvl_ids = torch.cat(level_ids, dim=1)
# deprecate arguments warning
if 'nms' not in cfg or 'max_num' in cfg or 'nms_thr' in cfg:
@ -209,5 +213,24 @@ class RPNHead(RPNTestMixin, AnchorHead):
f' respectively. Please delete the nms_thr ' \
f'which will be deprecated.'
dets, keep = batched_nms(proposals, scores, ids, cfg.nms)
return dets[:cfg.max_per_img]
result_list = []
for (mlvl_proposals, mlvl_scores,
mlvl_ids) in zip(batch_mlvl_proposals, batch_mlvl_scores,
batch_mlvl_ids):
# Skip nonzero op while exporting to ONNX
if cfg.min_bbox_size > 0 and (not torch.onnx.is_in_onnx_export()):
w = mlvl_proposals[:, 2] - mlvl_proposals[:, 0]
h = mlvl_proposals[:, 3] - mlvl_proposals[:, 1]
valid_ind = torch.nonzero(
(w >= cfg.min_bbox_size)
& (h >= cfg.min_bbox_size),
as_tuple=False).squeeze()
if valid_ind.sum().item() != len(mlvl_proposals):
mlvl_proposals = mlvl_proposals[valid_ind, :]
mlvl_scores = mlvl_scores[valid_ind]
mlvl_ids = mlvl_ids[valid_ind]
dets, keep = batched_nms(mlvl_proposals, mlvl_scores, mlvl_ids,
cfg.nms)
result_list.append(dets[:cfg.max_per_img])
return result_list

@ -191,36 +191,34 @@ class YOLOV3Head(BaseDenseHead, BBoxTestMixin):
Returns:
list[tuple[Tensor, Tensor]]: Each item in result_list is 2-tuple.
The first item is an (n, 5) tensor, where the first 4 columns
are bounding box positions (tl_x, tl_y, br_x, br_y) and the
5-th column is a score between 0 and 1. The second item is a
(n,) tensor where each item is the predicted class label of the
corresponding box.
The first item is an (n, 5) tensor, where 5 represent
(tl_x, tl_y, br_x, br_y, score) and the score between 0 and 1.
The shape of the second tensor in the tuple is (n,), and
each element represents the class label of the corresponding
box.
"""
result_list = []
num_levels = len(pred_maps)
for img_id in range(len(img_metas)):
pred_maps_list = [
pred_maps[i][img_id].detach() for i in range(num_levels)
]
scale_factor = img_metas[img_id]['scale_factor']
proposals = self._get_bboxes_single(pred_maps_list, scale_factor,
cfg, rescale, with_nms)
result_list.append(proposals)
pred_maps_list = [pred_maps[i].detach() for i in range(num_levels)]
scale_factors = [
img_metas[i]['scale_factor']
for i in range(pred_maps_list[0].shape[0])
]
result_list = self._get_bboxes(pred_maps_list, scale_factors, cfg,
rescale, with_nms)
return result_list
def _get_bboxes_single(self,
pred_maps_list,
scale_factor,
cfg,
rescale=False,
with_nms=True):
def _get_bboxes(self,
pred_maps_list,
scale_factors,
cfg,
rescale=False,
with_nms=True):
"""Transform outputs for a single batch item into bbox predictions.
Args:
pred_maps_list (list[Tensor]): Prediction maps for different scales
of each single image in the batch.
scale_factor (ndarray): Scale factor of the image arrange as
scale_factors (list(ndarray)): Scale factor of the image arrange as
(w_scale, h_scale, w_scale, h_scale).
cfg (mmcv.Config | None): Test / postprocessing configuration,
if None, test_cfg would be used.
@ -230,62 +228,71 @@ class YOLOV3Head(BaseDenseHead, BBoxTestMixin):
Default: True.
Returns:
tuple(Tensor):
det_bboxes (Tensor): BBox predictions in shape (n, 5), where
the first 4 columns are bounding box positions
(tl_x, tl_y, br_x, br_y) and the 5-th column is a score
between 0 and 1.
det_labels (Tensor): A (n,) tensor where each item is the
predicted class label of the corresponding box.
list[tuple[Tensor, Tensor]]: Each item in result_list is 2-tuple.
The first item is an (n, 5) tensor, where 5 represent
(tl_x, tl_y, br_x, br_y, score) and the score between 0 and 1.
The shape of the second tensor in the tuple is (n,), and
each element represents the class label of the corresponding
box.
"""
cfg = self.test_cfg if cfg is None else cfg
assert len(pred_maps_list) == self.num_levels
multi_lvl_bboxes = []
multi_lvl_cls_scores = []
multi_lvl_conf_scores = []
num_levels = len(pred_maps_list)
device = pred_maps_list[0].device
batch_size = pred_maps_list[0].shape[0]
featmap_sizes = [
pred_maps_list[i].shape[-2:] for i in range(num_levels)
pred_maps_list[i].shape[-2:] for i in range(self.num_levels)
]
multi_lvl_anchors = self.anchor_generator.grid_anchors(
featmap_sizes, pred_maps_list[0][0].device)
featmap_sizes, device)
# convert to tensor to keep tracing
nms_pre_tensor = torch.tensor(
cfg.get('nms_pre', -1), device=device, dtype=torch.long)
multi_lvl_bboxes = []
multi_lvl_cls_scores = []
multi_lvl_conf_scores = []
for i in range(self.num_levels):
# get some key info for current scale
pred_map = pred_maps_list[i]
stride = self.featmap_strides[i]
# (h, w, num_anchors*num_attrib) -> (h*w*num_anchors, num_attrib)
pred_map = pred_map.permute(1, 2, 0).reshape(-1, self.num_attrib)
pred_map[..., :2] = torch.sigmoid(pred_map[..., :2])
bbox_pred = self.bbox_coder.decode(multi_lvl_anchors[i],
pred_map[..., :4], stride)
# (b,h, w, num_anchors*num_attrib) ->
# (b,h*w*num_anchors, num_attrib)
pred_map = pred_map.permute(0, 2, 3,
1).reshape(batch_size, -1,
self.num_attrib)
# Inplace operation like
# ```pred_map[..., :2] = \torch.sigmoid(pred_map[..., :2])```
# would create constant tensor when exporting to onnx
pred_map_conf = torch.sigmoid(pred_map[..., :2])
pred_map_rest = pred_map[..., 2:]
pred_map = torch.cat([pred_map_conf, pred_map_rest], dim=-1)
pred_map_boxes = pred_map[..., :4]
multi_lvl_anchor = multi_lvl_anchors[i]
multi_lvl_anchor = multi_lvl_anchor.expand_as(pred_map_boxes)
bbox_pred = self.bbox_coder.decode(multi_lvl_anchor,
pred_map_boxes, stride)
# conf and cls
conf_pred = torch.sigmoid(pred_map[..., 4]).view(-1)
conf_pred = torch.sigmoid(pred_map[..., 4])
cls_pred = torch.sigmoid(pred_map[..., 5:]).view(
-1, self.num_classes) # Cls pred one-hot.
# Filtering out all predictions with conf < conf_thr
conf_thr = cfg.get('conf_thr', -1)
if conf_thr > 0 and (not torch.onnx.is_in_onnx_export()):
# TensorRT not support NonZero
# add as_tuple=False for compatibility in Pytorch 1.6
# flatten would create a Reshape op with constant values,
# and raise RuntimeError when doing inference in ONNX Runtime
# with a different input image (#4221).
conf_inds = conf_pred.ge(conf_thr).nonzero(
as_tuple=False).squeeze(1)
bbox_pred = bbox_pred[conf_inds, :]
cls_pred = cls_pred[conf_inds, :]
conf_pred = conf_pred[conf_inds]
batch_size, -1, self.num_classes) # Cls pred one-hot.
# Get top-k prediction
nms_pre = cfg.get('nms_pre', -1)
if 0 < nms_pre < conf_pred.size(0):
# Always keep topk op for dynamic input in onnx
if nms_pre_tensor > 0 and (torch.onnx.is_in_onnx_export()
or conf_pred.shape[1] > nms_pre_tensor):
from torch import _shape_as_tensor
# keep shape as tensor and get k
num_anchor = _shape_as_tensor(conf_pred)[1].to(device)
nms_pre = torch.where(nms_pre_tensor < num_anchor,
nms_pre_tensor, num_anchor)
_, topk_inds = conf_pred.topk(nms_pre)
bbox_pred = bbox_pred[topk_inds, :]
cls_pred = cls_pred[topk_inds, :]
conf_pred = conf_pred[topk_inds]
batch_inds = torch.arange(batch_size).view(
-1, 1).expand_as(topk_inds).long()
bbox_pred = bbox_pred[batch_inds, topk_inds, :]
cls_pred = cls_pred[batch_inds, topk_inds, :]
conf_pred = conf_pred[batch_inds, topk_inds]
# Save the result of current scale
multi_lvl_bboxes.append(bbox_pred)
@ -293,43 +300,70 @@ class YOLOV3Head(BaseDenseHead, BBoxTestMixin):
multi_lvl_conf_scores.append(conf_pred)
# Merge the results of different scales together
multi_lvl_bboxes = torch.cat(multi_lvl_bboxes)
multi_lvl_cls_scores = torch.cat(multi_lvl_cls_scores)
multi_lvl_conf_scores = torch.cat(multi_lvl_conf_scores)
batch_mlvl_bboxes = torch.cat(multi_lvl_bboxes, dim=1)
batch_mlvl_scores = torch.cat(multi_lvl_cls_scores, dim=1)
batch_mlvl_conf_scores = torch.cat(multi_lvl_conf_scores, dim=1)
# Set max number of box to be feed into nms in deployment
deploy_nms_pre = cfg.get('deploy_nms_pre', -1)
if deploy_nms_pre > 0 and torch.onnx.is_in_onnx_export():
_, topk_inds = multi_lvl_conf_scores.topk(deploy_nms_pre)
multi_lvl_bboxes = multi_lvl_bboxes[topk_inds, :]
multi_lvl_cls_scores = multi_lvl_cls_scores[topk_inds, :]
multi_lvl_conf_scores = multi_lvl_conf_scores[topk_inds]
if with_nms and (multi_lvl_conf_scores.size(0) == 0):
_, topk_inds = batch_mlvl_conf_scores.topk(deploy_nms_pre)
batch_inds = torch.arange(batch_size).view(
-1, 1).expand_as(topk_inds).long()
batch_mlvl_bboxes = batch_mlvl_bboxes[batch_inds, topk_inds, :]
batch_mlvl_scores = batch_mlvl_scores[batch_inds, topk_inds, :]
batch_mlvl_conf_scores = batch_mlvl_conf_scores[batch_inds,
topk_inds]
if with_nms and (batch_mlvl_conf_scores.size(0) == 0):
return torch.zeros((0, 5)), torch.zeros((0, ))
if rescale:
multi_lvl_bboxes /= multi_lvl_bboxes.new_tensor(scale_factor)
batch_mlvl_bboxes /= batch_mlvl_bboxes.new_tensor(
scale_factors).unsqueeze(1)
# In mmdet 2.x, the class_id for background is num_classes.
# i.e., the last column.
padding = multi_lvl_cls_scores.new_zeros(multi_lvl_cls_scores.shape[0],
1)
multi_lvl_cls_scores = torch.cat([multi_lvl_cls_scores, padding],
dim=1)
padding = batch_mlvl_scores.new_zeros(batch_size,
batch_mlvl_scores.shape[1], 1)
batch_mlvl_scores = torch.cat([batch_mlvl_scores, padding], dim=-1)
# Support exporting to onnx without nms
if with_nms and cfg.get('nms', None) is not None:
det_bboxes, det_labels = multiclass_nms(
multi_lvl_bboxes,
multi_lvl_cls_scores,
cfg.score_thr,
cfg.nms,
cfg.max_per_img,
score_factors=multi_lvl_conf_scores)
return det_bboxes, det_labels
det_results = []
for (mlvl_bboxes, mlvl_scores,
mlvl_conf_scores) in zip(batch_mlvl_bboxes, batch_mlvl_scores,
batch_mlvl_conf_scores):
# Filtering out all predictions with conf < conf_thr
conf_thr = cfg.get('conf_thr', -1)
if conf_thr > 0 and (not torch.onnx.is_in_onnx_export()):
# TensorRT not support NonZero
# add as_tuple=False for compatibility in Pytorch 1.6
# flatten would create a Reshape op with constant values,
# and raise RuntimeError when doing inference in ONNX
# Runtime with a different input image (#4221).
conf_inds = mlvl_conf_scores.ge(conf_thr).nonzero(
as_tuple=False).squeeze(1)
mlvl_bboxes = mlvl_bboxes[conf_inds, :]
mlvl_scores = mlvl_scores[conf_inds, :]
mlvl_conf_scores = mlvl_conf_scores[conf_inds]
det_bboxes, det_labels = multiclass_nms(
mlvl_bboxes,
mlvl_scores,
cfg.score_thr,
cfg.nms,
cfg.max_per_img,
score_factors=mlvl_conf_scores)
det_results.append(tuple([det_bboxes, det_labels]))
else:
return (multi_lvl_bboxes, multi_lvl_cls_scores,
multi_lvl_conf_scores)
det_results = [
tuple(mlvl_bs)
for mlvl_bs in zip(batch_mlvl_bboxes, batch_mlvl_scores,
batch_mlvl_conf_scores)
]
return det_results
@force_fp32(apply_to=('pred_maps', ))
def loss(self,

@ -97,10 +97,10 @@ def test_paa_head_loss():
assert len(results) == n
assert results[0].size() == (h * w * 5, c)
assert self.with_score_voting
cls_scores = [torch.ones(4, 5, 5)]
bbox_preds = [torch.ones(4, 5, 5)]
iou_preds = [torch.ones(1, 5, 5)]
mlvl_anchors = [torch.ones(5 * 5, 4)]
cls_scores = [torch.ones(2, 4, 5, 5)]
bbox_preds = [torch.ones(2, 4, 5, 5)]
iou_preds = [torch.ones(2, 1, 5, 5)]
mlvl_anchors = [torch.ones(2, 5 * 5, 4)]
img_shape = None
scale_factor = [0.5, 0.5]
cfg = mmcv.Config(
@ -111,7 +111,7 @@ def test_paa_head_loss():
nms=dict(type='nms', iou_threshold=0.6),
max_per_img=100))
rescale = False
self._get_bboxes_single(
self._get_bboxes(
cls_scores,
bbox_preds,
iou_preds,

@ -137,7 +137,7 @@ def test_rpn_forward():
'retinanet/retinanet_r50_fpn_1x_coco.py',
'guided_anchoring/ga_retinanet_r50_fpn_1x_coco.py',
'ghm/retinanet_ghm_r50_fpn_1x_coco.py',
'fcos/fcos_center_r50_caffe_fpn_gn-head_4x4_1x_coco.py',
'fcos/fcos_center_r50_caffe_fpn_gn-head_1x_coco.py',
'foveabox/fovea_align_r50_fpn_gn-head_4x4_2x_coco.py',
# 'free_anchor/retinanet_free_anchor_r50_fpn_1x_coco.py',
# 'atss/atss_r50_fpn_1x_coco.py', # not ready for topk

@ -2,6 +2,7 @@ import numpy as np
import pytest
import torch
from mmdet.core.bbox import distance2bbox
from mmdet.core.mask.structures import BitmapMasks, PolygonMasks
from mmdet.core.utils import mask2ndarray
@ -45,3 +46,47 @@ def test_mask2ndarray():
raw_masks = []
with pytest.raises(TypeError):
output_mask = mask2ndarray(raw_masks)
def test_distance2bbox():
point = torch.Tensor([[74., 61.], [-29., 106.], [138., 61.], [29., 170.]])
distance = torch.Tensor([[0., 0, 1., 1.], [1., 2., 10., 6.],
[22., -29., 138., 61.], [54., -29., 170., 61.]])
expected_decode_bboxes = torch.Tensor([[74., 61., 75., 62.],
[0., 104., 0., 112.],
[100., 90., 100., 120.],
[0., 120., 100., 120.]])
out_bbox = distance2bbox(point, distance, max_shape=(120, 100))
assert expected_decode_bboxes.allclose(out_bbox)
out = distance2bbox(point, distance, max_shape=torch.Tensor((120, 100)))
assert expected_decode_bboxes.allclose(out)
batch_point = point.unsqueeze(0).repeat(2, 1, 1)
batch_distance = distance.unsqueeze(0).repeat(2, 1, 1)
batch_out = distance2bbox(
batch_point, batch_distance, max_shape=(120, 100))[0]
assert out.allclose(batch_out)
batch_out = distance2bbox(
batch_point, batch_distance, max_shape=[(120, 100), (120, 100)])[0]
assert out.allclose(batch_out)
batch_out = distance2bbox(point, batch_distance, max_shape=(120, 100))[0]
assert out.allclose(batch_out)
# test max_shape is not equal to batch
with pytest.raises(AssertionError):
distance2bbox(
batch_point,
batch_distance,
max_shape=[(120, 100), (120, 100), (32, 32)])
rois = torch.zeros((0, 4))
deltas = torch.zeros((0, 4))
out = distance2bbox(rois, deltas, max_shape=(120, 100))
assert rois.shape == out.shape
rois = torch.zeros((2, 0, 4))
deltas = torch.zeros((2, 0, 4))
out = distance2bbox(rois, deltas, max_shape=(120, 100))
assert rois.shape == out.shape

Loading…
Cancel
Save