OpenMMLab Detection Toolbox and Benchmark
https://mmdetection.readthedocs.io/
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
469 lines
19 KiB
469 lines
19 KiB
from multiprocessing import Pool |
|
|
|
import mmcv |
|
import numpy as np |
|
from mmcv.utils import print_log |
|
from terminaltables import AsciiTable |
|
|
|
from .bbox_overlaps import bbox_overlaps |
|
from .class_names import get_classes |
|
|
|
|
|
def average_precision(recalls, precisions, mode='area'): |
|
"""Calculate average precision (for single or multiple scales). |
|
|
|
Args: |
|
recalls (ndarray): shape (num_scales, num_dets) or (num_dets, ) |
|
precisions (ndarray): shape (num_scales, num_dets) or (num_dets, ) |
|
mode (str): 'area' or '11points', 'area' means calculating the area |
|
under precision-recall curve, '11points' means calculating |
|
the average precision of recalls at [0, 0.1, ..., 1] |
|
|
|
Returns: |
|
float or ndarray: calculated average precision |
|
""" |
|
no_scale = False |
|
if recalls.ndim == 1: |
|
no_scale = True |
|
recalls = recalls[np.newaxis, :] |
|
precisions = precisions[np.newaxis, :] |
|
assert recalls.shape == precisions.shape and recalls.ndim == 2 |
|
num_scales = recalls.shape[0] |
|
ap = np.zeros(num_scales, dtype=np.float32) |
|
if mode == 'area': |
|
zeros = np.zeros((num_scales, 1), dtype=recalls.dtype) |
|
ones = np.ones((num_scales, 1), dtype=recalls.dtype) |
|
mrec = np.hstack((zeros, recalls, ones)) |
|
mpre = np.hstack((zeros, precisions, zeros)) |
|
for i in range(mpre.shape[1] - 1, 0, -1): |
|
mpre[:, i - 1] = np.maximum(mpre[:, i - 1], mpre[:, i]) |
|
for i in range(num_scales): |
|
ind = np.where(mrec[i, 1:] != mrec[i, :-1])[0] |
|
ap[i] = np.sum( |
|
(mrec[i, ind + 1] - mrec[i, ind]) * mpre[i, ind + 1]) |
|
elif mode == '11points': |
|
for i in range(num_scales): |
|
for thr in np.arange(0, 1 + 1e-3, 0.1): |
|
precs = precisions[i, recalls[i, :] >= thr] |
|
prec = precs.max() if precs.size > 0 else 0 |
|
ap[i] += prec |
|
ap /= 11 |
|
else: |
|
raise ValueError( |
|
'Unrecognized mode, only "area" and "11points" are supported') |
|
if no_scale: |
|
ap = ap[0] |
|
return ap |
|
|
|
|
|
def tpfp_imagenet(det_bboxes, |
|
gt_bboxes, |
|
gt_bboxes_ignore=None, |
|
default_iou_thr=0.5, |
|
area_ranges=None): |
|
"""Check if detected bboxes are true positive or false positive. |
|
|
|
Args: |
|
det_bbox (ndarray): Detected bboxes of this image, of shape (m, 5). |
|
gt_bboxes (ndarray): GT bboxes of this image, of shape (n, 4). |
|
gt_bboxes_ignore (ndarray): Ignored gt bboxes of this image, |
|
of shape (k, 4). Default: None |
|
default_iou_thr (float): IoU threshold to be considered as matched for |
|
medium and large bboxes (small ones have special rules). |
|
Default: 0.5. |
|
area_ranges (list[tuple] | None): Range of bbox areas to be evaluated, |
|
in the format [(min1, max1), (min2, max2), ...]. Default: None. |
|
|
|
Returns: |
|
tuple[np.ndarray]: (tp, fp) whose elements are 0 and 1. The shape of |
|
each array is (num_scales, m). |
|
""" |
|
# an indicator of ignored gts |
|
gt_ignore_inds = np.concatenate( |
|
(np.zeros(gt_bboxes.shape[0], dtype=np.bool), |
|
np.ones(gt_bboxes_ignore.shape[0], dtype=np.bool))) |
|
# stack gt_bboxes and gt_bboxes_ignore for convenience |
|
gt_bboxes = np.vstack((gt_bboxes, gt_bboxes_ignore)) |
|
|
|
num_dets = det_bboxes.shape[0] |
|
num_gts = gt_bboxes.shape[0] |
|
if area_ranges is None: |
|
area_ranges = [(None, None)] |
|
num_scales = len(area_ranges) |
|
# tp and fp are of shape (num_scales, num_gts), each row is tp or fp |
|
# of a certain scale. |
|
tp = np.zeros((num_scales, num_dets), dtype=np.float32) |
|
fp = np.zeros((num_scales, num_dets), dtype=np.float32) |
|
if gt_bboxes.shape[0] == 0: |
|
if area_ranges == [(None, None)]: |
|
fp[...] = 1 |
|
else: |
|
det_areas = (det_bboxes[:, 2] - det_bboxes[:, 0]) * ( |
|
det_bboxes[:, 3] - det_bboxes[:, 1]) |
|
for i, (min_area, max_area) in enumerate(area_ranges): |
|
fp[i, (det_areas >= min_area) & (det_areas < max_area)] = 1 |
|
return tp, fp |
|
ious = bbox_overlaps(det_bboxes, gt_bboxes - 1) |
|
gt_w = gt_bboxes[:, 2] - gt_bboxes[:, 0] |
|
gt_h = gt_bboxes[:, 3] - gt_bboxes[:, 1] |
|
iou_thrs = np.minimum((gt_w * gt_h) / ((gt_w + 10.0) * (gt_h + 10.0)), |
|
default_iou_thr) |
|
# sort all detections by scores in descending order |
|
sort_inds = np.argsort(-det_bboxes[:, -1]) |
|
for k, (min_area, max_area) in enumerate(area_ranges): |
|
gt_covered = np.zeros(num_gts, dtype=bool) |
|
# if no area range is specified, gt_area_ignore is all False |
|
if min_area is None: |
|
gt_area_ignore = np.zeros_like(gt_ignore_inds, dtype=bool) |
|
else: |
|
gt_areas = gt_w * gt_h |
|
gt_area_ignore = (gt_areas < min_area) | (gt_areas >= max_area) |
|
for i in sort_inds: |
|
max_iou = -1 |
|
matched_gt = -1 |
|
# find best overlapped available gt |
|
for j in range(num_gts): |
|
# different from PASCAL VOC: allow finding other gts if the |
|
# best overlapped ones are already matched by other det bboxes |
|
if gt_covered[j]: |
|
continue |
|
elif ious[i, j] >= iou_thrs[j] and ious[i, j] > max_iou: |
|
max_iou = ious[i, j] |
|
matched_gt = j |
|
# there are 4 cases for a det bbox: |
|
# 1. it matches a gt, tp = 1, fp = 0 |
|
# 2. it matches an ignored gt, tp = 0, fp = 0 |
|
# 3. it matches no gt and within area range, tp = 0, fp = 1 |
|
# 4. it matches no gt but is beyond area range, tp = 0, fp = 0 |
|
if matched_gt >= 0: |
|
gt_covered[matched_gt] = 1 |
|
if not (gt_ignore_inds[matched_gt] |
|
or gt_area_ignore[matched_gt]): |
|
tp[k, i] = 1 |
|
elif min_area is None: |
|
fp[k, i] = 1 |
|
else: |
|
bbox = det_bboxes[i, :4] |
|
area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) |
|
if area >= min_area and area < max_area: |
|
fp[k, i] = 1 |
|
return tp, fp |
|
|
|
|
|
def tpfp_default(det_bboxes, |
|
gt_bboxes, |
|
gt_bboxes_ignore=None, |
|
iou_thr=0.5, |
|
area_ranges=None): |
|
"""Check if detected bboxes are true positive or false positive. |
|
|
|
Args: |
|
det_bbox (ndarray): Detected bboxes of this image, of shape (m, 5). |
|
gt_bboxes (ndarray): GT bboxes of this image, of shape (n, 4). |
|
gt_bboxes_ignore (ndarray): Ignored gt bboxes of this image, |
|
of shape (k, 4). Default: None |
|
iou_thr (float): IoU threshold to be considered as matched. |
|
Default: 0.5. |
|
area_ranges (list[tuple] | None): Range of bbox areas to be evaluated, |
|
in the format [(min1, max1), (min2, max2), ...]. Default: None. |
|
|
|
Returns: |
|
tuple[np.ndarray]: (tp, fp) whose elements are 0 and 1. The shape of |
|
each array is (num_scales, m). |
|
""" |
|
# an indicator of ignored gts |
|
gt_ignore_inds = np.concatenate( |
|
(np.zeros(gt_bboxes.shape[0], dtype=np.bool), |
|
np.ones(gt_bboxes_ignore.shape[0], dtype=np.bool))) |
|
# stack gt_bboxes and gt_bboxes_ignore for convenience |
|
gt_bboxes = np.vstack((gt_bboxes, gt_bboxes_ignore)) |
|
|
|
num_dets = det_bboxes.shape[0] |
|
num_gts = gt_bboxes.shape[0] |
|
if area_ranges is None: |
|
area_ranges = [(None, None)] |
|
num_scales = len(area_ranges) |
|
# tp and fp are of shape (num_scales, num_gts), each row is tp or fp of |
|
# a certain scale |
|
tp = np.zeros((num_scales, num_dets), dtype=np.float32) |
|
fp = np.zeros((num_scales, num_dets), dtype=np.float32) |
|
|
|
# if there is no gt bboxes in this image, then all det bboxes |
|
# within area range are false positives |
|
if gt_bboxes.shape[0] == 0: |
|
if area_ranges == [(None, None)]: |
|
fp[...] = 1 |
|
else: |
|
det_areas = (det_bboxes[:, 2] - det_bboxes[:, 0]) * ( |
|
det_bboxes[:, 3] - det_bboxes[:, 1]) |
|
for i, (min_area, max_area) in enumerate(area_ranges): |
|
fp[i, (det_areas >= min_area) & (det_areas < max_area)] = 1 |
|
return tp, fp |
|
|
|
ious = bbox_overlaps(det_bboxes, gt_bboxes) |
|
# for each det, the max iou with all gts |
|
ious_max = ious.max(axis=1) |
|
# for each det, which gt overlaps most with it |
|
ious_argmax = ious.argmax(axis=1) |
|
# sort all dets in descending order by scores |
|
sort_inds = np.argsort(-det_bboxes[:, -1]) |
|
for k, (min_area, max_area) in enumerate(area_ranges): |
|
gt_covered = np.zeros(num_gts, dtype=bool) |
|
# if no area range is specified, gt_area_ignore is all False |
|
if min_area is None: |
|
gt_area_ignore = np.zeros_like(gt_ignore_inds, dtype=bool) |
|
else: |
|
gt_areas = (gt_bboxes[:, 2] - gt_bboxes[:, 0]) * ( |
|
gt_bboxes[:, 3] - gt_bboxes[:, 1]) |
|
gt_area_ignore = (gt_areas < min_area) | (gt_areas >= max_area) |
|
for i in sort_inds: |
|
if ious_max[i] >= iou_thr: |
|
matched_gt = ious_argmax[i] |
|
if not (gt_ignore_inds[matched_gt] |
|
or gt_area_ignore[matched_gt]): |
|
if not gt_covered[matched_gt]: |
|
gt_covered[matched_gt] = True |
|
tp[k, i] = 1 |
|
else: |
|
fp[k, i] = 1 |
|
# otherwise ignore this detected bbox, tp = 0, fp = 0 |
|
elif min_area is None: |
|
fp[k, i] = 1 |
|
else: |
|
bbox = det_bboxes[i, :4] |
|
area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) |
|
if area >= min_area and area < max_area: |
|
fp[k, i] = 1 |
|
return tp, fp |
|
|
|
|
|
def get_cls_results(det_results, annotations, class_id): |
|
"""Get det results and gt information of a certain class. |
|
|
|
Args: |
|
det_results (list[list]): Same as `eval_map()`. |
|
annotations (list[dict]): Same as `eval_map()`. |
|
class_id (int): ID of a specific class. |
|
|
|
Returns: |
|
tuple[list[np.ndarray]]: detected bboxes, gt bboxes, ignored gt bboxes |
|
""" |
|
cls_dets = [img_res[class_id] for img_res in det_results] |
|
cls_gts = [] |
|
cls_gts_ignore = [] |
|
for ann in annotations: |
|
gt_inds = ann['labels'] == class_id |
|
cls_gts.append(ann['bboxes'][gt_inds, :]) |
|
|
|
if ann.get('labels_ignore', None) is not None: |
|
ignore_inds = ann['labels_ignore'] == class_id |
|
cls_gts_ignore.append(ann['bboxes_ignore'][ignore_inds, :]) |
|
else: |
|
cls_gts_ignore.append(np.empty((0, 4), dtype=np.float32)) |
|
|
|
return cls_dets, cls_gts, cls_gts_ignore |
|
|
|
|
|
def eval_map(det_results, |
|
annotations, |
|
scale_ranges=None, |
|
iou_thr=0.5, |
|
dataset=None, |
|
logger=None, |
|
tpfp_fn=None, |
|
nproc=4): |
|
"""Evaluate mAP of a dataset. |
|
|
|
Args: |
|
det_results (list[list]): [[cls1_det, cls2_det, ...], ...]. |
|
The outer list indicates images, and the inner list indicates |
|
per-class detected bboxes. |
|
annotations (list[dict]): Ground truth annotations where each item of |
|
the list indicates an image. Keys of annotations are: |
|
|
|
- `bboxes`: numpy array of shape (n, 4) |
|
- `labels`: numpy array of shape (n, ) |
|
- `bboxes_ignore` (optional): numpy array of shape (k, 4) |
|
- `labels_ignore` (optional): numpy array of shape (k, ) |
|
scale_ranges (list[tuple] | None): Range of scales to be evaluated, |
|
in the format [(min1, max1), (min2, max2), ...]. A range of |
|
(32, 64) means the area range between (32**2, 64**2). |
|
Default: None. |
|
iou_thr (float): IoU threshold to be considered as matched. |
|
Default: 0.5. |
|
dataset (list[str] | str | None): Dataset name or dataset classes, |
|
there are minor differences in metrics for different datsets, e.g. |
|
"voc07", "imagenet_det", etc. Default: None. |
|
logger (logging.Logger | str | None): The way to print the mAP |
|
summary. See `mmcv.utils.print_log()` for details. Default: None. |
|
tpfp_fn (callable | None): The function used to determine true/ |
|
false positives. If None, :func:`tpfp_default` is used as default |
|
unless dataset is 'det' or 'vid' (:func:`tpfp_imagenet` in this |
|
case). If it is given as a function, then this function is used |
|
to evaluate tp & fp. Default None. |
|
nproc (int): Processes used for computing TP and FP. |
|
Default: 4. |
|
|
|
Returns: |
|
tuple: (mAP, [dict, dict, ...]) |
|
""" |
|
assert len(det_results) == len(annotations) |
|
|
|
num_imgs = len(det_results) |
|
num_scales = len(scale_ranges) if scale_ranges is not None else 1 |
|
num_classes = len(det_results[0]) # positive class num |
|
area_ranges = ([(rg[0]**2, rg[1]**2) for rg in scale_ranges] |
|
if scale_ranges is not None else None) |
|
|
|
pool = Pool(nproc) |
|
eval_results = [] |
|
for i in range(num_classes): |
|
# get gt and det bboxes of this class |
|
cls_dets, cls_gts, cls_gts_ignore = get_cls_results( |
|
det_results, annotations, i) |
|
# choose proper function according to datasets to compute tp and fp |
|
if tpfp_fn is None: |
|
if dataset in ['det', 'vid']: |
|
tpfp_fn = tpfp_imagenet |
|
else: |
|
tpfp_fn = tpfp_default |
|
if not callable(tpfp_fn): |
|
raise ValueError( |
|
f'tpfp_fn has to be a function or None, but got {tpfp_fn}') |
|
|
|
# compute tp and fp for each image with multiple processes |
|
tpfp = pool.starmap( |
|
tpfp_fn, |
|
zip(cls_dets, cls_gts, cls_gts_ignore, |
|
[iou_thr for _ in range(num_imgs)], |
|
[area_ranges for _ in range(num_imgs)])) |
|
tp, fp = tuple(zip(*tpfp)) |
|
# calculate gt number of each scale |
|
# ignored gts or gts beyond the specific scale are not counted |
|
num_gts = np.zeros(num_scales, dtype=int) |
|
for j, bbox in enumerate(cls_gts): |
|
if area_ranges is None: |
|
num_gts[0] += bbox.shape[0] |
|
else: |
|
gt_areas = (bbox[:, 2] - bbox[:, 0]) * ( |
|
bbox[:, 3] - bbox[:, 1]) |
|
for k, (min_area, max_area) in enumerate(area_ranges): |
|
num_gts[k] += np.sum((gt_areas >= min_area) |
|
& (gt_areas < max_area)) |
|
# sort all det bboxes by score, also sort tp and fp |
|
cls_dets = np.vstack(cls_dets) |
|
num_dets = cls_dets.shape[0] |
|
sort_inds = np.argsort(-cls_dets[:, -1]) |
|
tp = np.hstack(tp)[:, sort_inds] |
|
fp = np.hstack(fp)[:, sort_inds] |
|
# calculate recall and precision with tp and fp |
|
tp = np.cumsum(tp, axis=1) |
|
fp = np.cumsum(fp, axis=1) |
|
eps = np.finfo(np.float32).eps |
|
recalls = tp / np.maximum(num_gts[:, np.newaxis], eps) |
|
precisions = tp / np.maximum((tp + fp), eps) |
|
# calculate AP |
|
if scale_ranges is None: |
|
recalls = recalls[0, :] |
|
precisions = precisions[0, :] |
|
num_gts = num_gts.item() |
|
mode = 'area' if dataset != 'voc07' else '11points' |
|
ap = average_precision(recalls, precisions, mode) |
|
eval_results.append({ |
|
'num_gts': num_gts, |
|
'num_dets': num_dets, |
|
'recall': recalls, |
|
'precision': precisions, |
|
'ap': ap |
|
}) |
|
pool.close() |
|
if scale_ranges is not None: |
|
# shape (num_classes, num_scales) |
|
all_ap = np.vstack([cls_result['ap'] for cls_result in eval_results]) |
|
all_num_gts = np.vstack( |
|
[cls_result['num_gts'] for cls_result in eval_results]) |
|
mean_ap = [] |
|
for i in range(num_scales): |
|
if np.any(all_num_gts[:, i] > 0): |
|
mean_ap.append(all_ap[all_num_gts[:, i] > 0, i].mean()) |
|
else: |
|
mean_ap.append(0.0) |
|
else: |
|
aps = [] |
|
for cls_result in eval_results: |
|
if cls_result['num_gts'] > 0: |
|
aps.append(cls_result['ap']) |
|
mean_ap = np.array(aps).mean().item() if aps else 0.0 |
|
|
|
print_map_summary( |
|
mean_ap, eval_results, dataset, area_ranges, logger=logger) |
|
|
|
return mean_ap, eval_results |
|
|
|
|
|
def print_map_summary(mean_ap, |
|
results, |
|
dataset=None, |
|
scale_ranges=None, |
|
logger=None): |
|
"""Print mAP and results of each class. |
|
|
|
A table will be printed to show the gts/dets/recall/AP of each class and |
|
the mAP. |
|
|
|
Args: |
|
mean_ap (float): Calculated from `eval_map()`. |
|
results (list[dict]): Calculated from `eval_map()`. |
|
dataset (list[str] | str | None): Dataset name or dataset classes. |
|
scale_ranges (list[tuple] | None): Range of scales to be evaluated. |
|
logger (logging.Logger | str | None): The way to print the mAP |
|
summary. See `mmcv.utils.print_log()` for details. Default: None. |
|
""" |
|
|
|
if logger == 'silent': |
|
return |
|
|
|
if isinstance(results[0]['ap'], np.ndarray): |
|
num_scales = len(results[0]['ap']) |
|
else: |
|
num_scales = 1 |
|
|
|
if scale_ranges is not None: |
|
assert len(scale_ranges) == num_scales |
|
|
|
num_classes = len(results) |
|
|
|
recalls = np.zeros((num_scales, num_classes), dtype=np.float32) |
|
aps = np.zeros((num_scales, num_classes), dtype=np.float32) |
|
num_gts = np.zeros((num_scales, num_classes), dtype=int) |
|
for i, cls_result in enumerate(results): |
|
if cls_result['recall'].size > 0: |
|
recalls[:, i] = np.array(cls_result['recall'], ndmin=2)[:, -1] |
|
aps[:, i] = cls_result['ap'] |
|
num_gts[:, i] = cls_result['num_gts'] |
|
|
|
if dataset is None: |
|
label_names = [str(i) for i in range(num_classes)] |
|
elif mmcv.is_str(dataset): |
|
label_names = get_classes(dataset) |
|
else: |
|
label_names = dataset |
|
|
|
if not isinstance(mean_ap, list): |
|
mean_ap = [mean_ap] |
|
|
|
header = ['class', 'gts', 'dets', 'recall', 'ap'] |
|
for i in range(num_scales): |
|
if scale_ranges is not None: |
|
print_log(f'Scale range {scale_ranges[i]}', logger=logger) |
|
table_data = [header] |
|
for j in range(num_classes): |
|
row_data = [ |
|
label_names[j], num_gts[i, j], results[j]['num_dets'], |
|
f'{recalls[i, j]:.3f}', f'{aps[i, j]:.3f}' |
|
] |
|
table_data.append(row_data) |
|
table_data.append(['mAP', '', '', '', f'{mean_ap[i]:.3f}']) |
|
table = AsciiTable(table_data) |
|
table.inner_footing_row_border = True |
|
print_log('\n' + table.table, logger=logger)
|
|
|