You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
269 lines
11 KiB
269 lines
11 KiB
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. |
# |
# Licensed under the Apache License, Version 2.0 (the "License"); |
# you may not use this file except in compliance with the License. |
# You may obtain a copy of the License at |
# |
# |
# |
# Unless required by applicable law or agreed to in writing, software |
# distributed under the License is distributed on an "AS IS" BASIS, |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
# See the License for the specific language governing permissions and |
# limitations under the License. |
# The code is based on: |
# |
from __future__ import absolute_import |
from __future__ import division |
from __future__ import print_function |
import numpy as np |
from paddlers.models.ppdet.utils.logger import setup_logger |
logger = setup_logger(__name__) |
def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False, eps=1e-6): |
"""Calculate overlap between two set of bboxes. |
If ``is_aligned `` is ``False``, then calculate the overlaps between each |
bbox of bboxes1 and bboxes2, otherwise the overlaps between each aligned |
pair of bboxes1 and bboxes2. |
Args: |
bboxes1 (Tensor): shape (B, m, 4) in <x1, y1, x2, y2> format or empty. |
bboxes2 (Tensor): shape (B, n, 4) in <x1, y1, x2, y2> format or empty. |
B indicates the batch dim, in shape (B1, B2, ..., Bn). |
If ``is_aligned `` is ``True``, then m and n must be equal. |
mode (str): "iou" (intersection over union) or "iof" (intersection over |
foreground). |
is_aligned (bool, optional): If True, then m and n must be equal. |
Default False. |
eps (float, optional): A value added to the denominator for numerical |
stability. Default 1e-6. |
Returns: |
Tensor: shape (m, n) if ``is_aligned `` is False else shape (m,) |
""" |
assert mode in ['iou', 'iof', 'giou'], 'Unsupported mode {}'.format(mode) |
# Either the boxes are empty or the length of boxes's last dimenstion is 4 |
assert (bboxes1.shape[-1] == 4 or bboxes1.shape[0] == 0) |
assert (bboxes2.shape[-1] == 4 or bboxes2.shape[0] == 0) |
# Batch dim must be the same |
# Batch dim: (B1, B2, ... Bn) |
assert bboxes1.shape[:-2] == bboxes2.shape[:-2] |
batch_shape = bboxes1.shape[:-2] |
rows = bboxes1.shape[-2] if bboxes1.shape[0] > 0 else 0 |
cols = bboxes2.shape[-2] if bboxes2.shape[0] > 0 else 0 |
if is_aligned: |
assert rows == cols |
if rows * cols == 0: |
if is_aligned: |
return np.random.random(batch_shape + (rows, )) |
else: |
return np.random.random(batch_shape + (rows, cols)) |
area1 = (bboxes1[..., 2] - bboxes1[..., 0]) * ( |
bboxes1[..., 3] - bboxes1[..., 1]) |
area2 = (bboxes2[..., 2] - bboxes2[..., 0]) * ( |
bboxes2[..., 3] - bboxes2[..., 1]) |
if is_aligned: |
lt = np.maximum(bboxes1[..., :2], bboxes2[..., :2]) # [B, rows, 2] |
rb = np.minimum(bboxes1[..., 2:], bboxes2[..., 2:]) # [B, rows, 2] |
wh = (rb - lt).clip(min=0) # [B, rows, 2] |
overlap = wh[..., 0] * wh[..., 1] |
if mode in ['iou', 'giou']: |
union = area1 + area2 - overlap |
else: |
union = area1 |
if mode == 'giou': |
enclosed_lt = np.minimum(bboxes1[..., :2], bboxes2[..., :2]) |
enclosed_rb = np.maximum(bboxes1[..., 2:], bboxes2[..., 2:]) |
else: |
lt = np.maximum(bboxes1[..., :, None, :2], |
bboxes2[..., None, :, :2]) # [B, rows, cols, 2] |
rb = np.minimum(bboxes1[..., :, None, 2:], |
bboxes2[..., None, :, 2:]) # [B, rows, cols, 2] |
wh = (rb - lt).clip(min=0) # [B, rows, cols, 2] |
overlap = wh[..., 0] * wh[..., 1] |
if mode in ['iou', 'giou']: |
union = area1[..., None] + area2[..., None, :] - overlap |
else: |
union = area1[..., None] |
if mode == 'giou': |
enclosed_lt = np.minimum(bboxes1[..., :, None, :2], |
bboxes2[..., None, :, :2]) |
enclosed_rb = np.maximum(bboxes1[..., :, None, 2:], |
bboxes2[..., None, :, 2:]) |
eps = np.array([eps]) |
union = np.maximum(union, eps) |
ious = overlap / union |
if mode in ['iou', 'iof']: |
return ious |
# calculate gious |
enclose_wh = (enclosed_rb - enclosed_lt).clip(min=0) |
enclose_area = enclose_wh[..., 0] * enclose_wh[..., 1] |
enclose_area = np.maximum(enclose_area, eps) |
gious = ious - (enclose_area - union) / enclose_area |
return gious |
def topk_(input, k, axis=1, largest=True): |
x = -input if largest else input |
if axis == 0: |
row_index = np.arange(input.shape[1 - axis]) |
topk_index = np.argpartition(x, k, axis=axis)[0:k, :] |
topk_data = x[topk_index, row_index] |
topk_index_sort = np.argsort(topk_data, axis=axis) |
topk_data_sort = topk_data[topk_index_sort, row_index] |
topk_index_sort = topk_index[0:k, :][topk_index_sort, row_index] |
else: |
column_index = np.arange(x.shape[1 - axis])[:, None] |
topk_index = np.argpartition(x, k, axis=axis)[:, 0:k] |
topk_data = x[column_index, topk_index] |
topk_data = -topk_data if largest else topk_data |
topk_index_sort = np.argsort(topk_data, axis=axis) |
topk_data_sort = topk_data[column_index, topk_index_sort] |
topk_index_sort = topk_index[:, 0:k][column_index, topk_index_sort] |
return topk_data_sort, topk_index_sort |
class ATSSAssigner(object): |
"""Assign a corresponding gt bbox or background to each bbox. |
Each proposals will be assigned with `0` or a positive integer |
indicating the ground truth index. |
- 0: negative sample, no assigned gt |
- positive integer: positive sample, index (1-based) of assigned gt |
Args: |
topk (float): number of bbox selected in each level |
""" |
def __init__(self, topk=9): |
self.topk = topk |
def __call__(self, |
bboxes, |
num_level_bboxes, |
gt_bboxes, |
gt_bboxes_ignore=None, |
gt_labels=None): |
"""Assign gt to bboxes. |
The assignment is done in following steps |
1. compute iou between all bbox (bbox of all pyramid levels) and gt |
2. compute center distance between all bbox and gt |
3. on each pyramid level, for each gt, select k bbox whose center |
are closest to the gt center, so we total select k*l bbox as |
candidates for each gt |
4. get corresponding iou for the these candidates, and compute the |
mean and std, set mean + std as the iou threshold |
5. select these candidates whose iou are greater than or equal to |
the threshold as postive |
6. limit the positive sample's center in gt |
Args: |
bboxes (np.array): Bounding boxes to be assigned, shape(n, 4). |
num_level_bboxes (List): num of bboxes in each level |
gt_bboxes (np.array): Groundtruth boxes, shape (k, 4). |
gt_bboxes_ignore (np.array, optional): Ground truth bboxes that are |
labelled as `ignored`, e.g., crowd boxes in COCO. |
gt_labels (np.array, optional): Label of gt_bboxes, shape (k, ). |
""" |
bboxes = bboxes[:, :4] |
num_gt, num_bboxes = gt_bboxes.shape[0], bboxes.shape[0] |
# assign 0 by default |
assigned_gt_inds = np.zeros((num_bboxes, ), dtype=np.int64) |
if num_gt == 0 or num_bboxes == 0: |
# No ground truth or boxes, return empty assignment |
max_overlaps = np.zeros((num_bboxes, )) |
if num_gt == 0: |
# No truth, assign everything to background |
assigned_gt_inds[:] = 0 |
if not np.any(gt_labels): |
assigned_labels = None |
else: |
assigned_labels = -np.ones((num_bboxes, ), dtype=np.int64) |
return assigned_gt_inds, max_overlaps |
# compute iou between all bbox and gt |
overlaps = bbox_overlaps(bboxes, gt_bboxes) |
# compute center distance between all bbox and gt |
gt_cx = (gt_bboxes[:, 0] + gt_bboxes[:, 2]) / 2.0 |
gt_cy = (gt_bboxes[:, 1] + gt_bboxes[:, 3]) / 2.0 |
gt_points = np.stack((gt_cx, gt_cy), axis=1) |
bboxes_cx = (bboxes[:, 0] + bboxes[:, 2]) / 2.0 |
bboxes_cy = (bboxes[:, 1] + bboxes[:, 3]) / 2.0 |
bboxes_points = np.stack((bboxes_cx, bboxes_cy), axis=1) |
distances = np.sqrt( |
np.power((bboxes_points[:, None, :] - gt_points[None, :, :]), 2) |
.sum(-1)) |
# Selecting candidates based on the center distance |
candidate_idxs = [] |
start_idx = 0 |
for bboxes_per_level in num_level_bboxes: |
# on each pyramid level, for each gt, |
# select k bbox whose center are closest to the gt center |
end_idx = start_idx + bboxes_per_level |
distances_per_level = distances[start_idx:end_idx, :] |
selectable_k = min(self.topk, bboxes_per_level) |
_, topk_idxs_per_level = topk_( |
distances_per_level, selectable_k, axis=0, largest=False) |
candidate_idxs.append(topk_idxs_per_level + start_idx) |
start_idx = end_idx |
candidate_idxs = np.concatenate(candidate_idxs, axis=0) |
# get corresponding iou for the these candidates, and compute the |
# mean and std, set mean + std as the iou threshold |
candidate_overlaps = overlaps[candidate_idxs, np.arange(num_gt)] |
overlaps_mean_per_gt = candidate_overlaps.mean(0) |
overlaps_std_per_gt = candidate_overlaps.std(0) |
overlaps_thr_per_gt = overlaps_mean_per_gt + overlaps_std_per_gt |
is_pos = candidate_overlaps >= overlaps_thr_per_gt[None, :] |
# limit the positive sample's center in gt |
for gt_idx in range(num_gt): |
candidate_idxs[:, gt_idx] += gt_idx * num_bboxes |
ep_bboxes_cx = np.broadcast_to( |
bboxes_cx.reshape(1, -1), [num_gt, num_bboxes]).reshape(-1) |
ep_bboxes_cy = np.broadcast_to( |
bboxes_cy.reshape(1, -1), [num_gt, num_bboxes]).reshape(-1) |
candidate_idxs = candidate_idxs.reshape(-1) |
# calculate the left, top, right, bottom distance between positive |
# bbox center and gt side |
l_ = ep_bboxes_cx[candidate_idxs].reshape(-1, num_gt) - gt_bboxes[:, 0] |
t_ = ep_bboxes_cy[candidate_idxs].reshape(-1, num_gt) - gt_bboxes[:, 1] |
r_ = gt_bboxes[:, 2] - ep_bboxes_cx[candidate_idxs].reshape(-1, num_gt) |
b_ = gt_bboxes[:, 3] - ep_bboxes_cy[candidate_idxs].reshape(-1, num_gt) |
is_in_gts = np.stack([l_, t_, r_, b_], axis=1).min(axis=1) > 0.01 |
is_pos = is_pos & is_in_gts |
# if an anchor box is assigned to multiple gts, |
# the one with the highest IoU will be selected. |
overlaps_inf = -np.inf * np.ones_like(overlaps).T.reshape(-1) |
index = candidate_idxs.reshape(-1)[is_pos.reshape(-1)] |
overlaps_inf[index] = overlaps.T.reshape(-1)[index] |
overlaps_inf = overlaps_inf.reshape(num_gt, -1).T |
max_overlaps = overlaps_inf.max(axis=1) |
argmax_overlaps = overlaps_inf.argmax(axis=1) |
assigned_gt_inds[max_overlaps != |
-np.inf] = argmax_overlaps[max_overlaps != -np.inf] + 1 |
return assigned_gt_inds, max_overlaps